I'm writing an application that scrapes fan sites for characters as a practice exercise. Currently I have an array of URLs that I am looping through and scraping the data I want, then outputting this data to a output.json file to store for later. I am having issues with my formatting when writing to this file.
Maybe I should store my data differently, I am open to suggestions on best practices/other methods. I would just like this data accessible later.
server.js
var express = require('express');
var cheerio = require('cheerio');
var app = express();
var rp = require('request-promise');
var fsp = require('fs-promise');
app.get('/', function(req, res){
urls = [
'fansite.com/boss1', 'fansite.com/boss2'
];
function parse(html) {
var bosses = require('./output.json');
var $ = cheerio.load(html);
$('.page-header__title').filter(function () {
var data = $(this);
name = data.text();
bosses.name = name;
})
return bosses;
}
var append = file => content => fsp.appendFile(file, JSON.stringify(content, null, 2));
urls.forEach(function (url) {
rp(url)
.then(parse)
.then(append('output.json'))
.then(() => console.log('Success'))
.then(res.send('Bosses Updated.'))
.catch(err => console.log('Error:', err));
});
})
app.listen('8081')
console.log('Running on port 8081');
exports = module.exports = app;
output.json
{
}{
"name": "Boss1"
}{
"name": "Boss2"
}