I would like to scrap an url:
1 request to get a list of elements
1 request on each result to get details
Here what I have:
var request = require('request')
, cheerio = require('cheerio')
, async = require('async')
, format = require('util').format;
var baseurl = 'http://magiccards.info';
async.waterfall([
function (callback) {
request(baseurl + '/sitemap.html', function (err, response, body) {
var sets = [];
var $ = cheerio.load(body);
$('a[href$="/en.html"]').each(function () {
sets.push({"name": $(this).text(), "code":$(this).attr('href').match(/\/([^)]+)\//)[1], "path": $(this).attr('href'), "translations":[]});
});
callback(null, sets);
});
},
function (sets, callback) {
console.log(sets);
async.eachSeries(sets, function (set, callback) {
console.log('SET ' + set.code.toUpperCase());
request(baseurl + set.path, function (err, response, body) {
var $ = cheerio.load(body);
$('body > a[href^="/' + set.code + '/"]').each(function () {
console.log(' %s (%s)', $(this).text(), $(this).attr('href'));
});
});
});
}
], function (err, result) {
console.log('ERR');
// result now equals 'done'
});
The problem is that the 2nd waterfall function run only once, if I replace the eachSeries with an each, the loop does run X times (but I need to wait for result).
Wath am I missing?