I'm working on a simple app using Node.js which needs to do the following when given a valid URL
- Retrieve the HTML of the remote page, save it locally.
- Spider the HTML (using cheerio) and record all JS and CSS file references.
- Make HTTP request for each JS/CSS file and save it to the server by file name.
- Zip up the html, css, and js files and stream the resulting file to the browser.
I've got 1 and 2 working, and the first half of #3 but I'm running into issues with the synchronous nature of the downloads. My code is running too fast and generating file names for the CSS and JS files, but none of the content. I'm guessing this is because my code isn't synchronous. The problem is that I don't know in advance how many files there might be and all of them have to be there before the ZIP file can be generated.
Here's the flow of my app as it currently exists. I've left out the helper methods as they don't affect synchronicity. Can any of you provide input as to what I should do?
http.get(fullurl, function(res) {
res.on('data', function (chunk) {
var $source = $(''+chunk),
js = getJS($source, domain),
css = getCSS($source, domain),
uniqueName = pw(),
dir = [baseDir,'jsd-', uniqueName, '/'].join(''),
jsdir = dir + 'js/',
cssdir = dir + 'css/',
html = rewritePaths($source);
// create tmp directory
fs.mkdirSync(dir);
console.log('creating index.html');
// save index file
fs.writeFileSync(dir + 'index.html', html);
// create js directory
fs.mkdirSync(jsdir);
// Save JS files
js.forEach(function(jsfile){
var filename = jsfile.split('/').reverse()[0];
request(jsfile).pipe(fs.createWriteStream(jsdir + filename));
console.log('creating ' + filename);
});
// create css directory
fs.mkdirSync(cssdir);
// Save CSS files
css.forEach(function(cssfile){
var filename = cssfile.split('/').reverse()[0];
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
console.log('creating ' + filename);
});
// write zip file to /tmp
writeZip(dir,uniqueName);
// https://npmjs.org/package/node-zip
// http://stuk.github.com/jszip/
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});