why phantomjs code doesn't go through array?

2019-03-02 04:50发布

问题:

After running this code he save an infinite number of files with the source code of the first page("http://site1.com"), why he doesn't go through other links and doesn't stop ?

var args = ["http://site1.com", "http://site2.com", "http://site3.com"];

var fs = require('fs');
var i = 0;

function handle_page(file){
    page.open(file,function(){
        page.evaluate(function(){
            fs.write(i + '.html', page.content, 'w');
        });
        setTimeout(next_page,100);
   });
}

function next_page(){
   var file = args.shift();
   if(!file){ phantom.exit(0); }
   i++
   handle_page(file);
}
next_page();

回答1:

page.evaluate() is the sandboxed page context in PhantomJS. It doesn't have access to any variable defined outside. So you cannot reference fs or page inside of it and you don't need to, because page.content is available in the outer context:

page.open(file,function(){
    fs.write(i + '.html', page.content, 'w');
    setTimeout(next_page,100);
});

The remaining code looks fine.