Why am I not receiving phantomjs page response?

2019-09-18 07:48发布

问题:

I'm trying to access some QUnit test urls using phantomjs (version 1.9.1, on Windows). I'm behind a proxy at a corporate site but the URLs I'm trying to access are being served from my local development workstation, plus I've even tried accessing the same URLs with two other browsers (Hv3 and Dooble) without the necessary proxy settings, and they get an HTML response, even if they can't execute the QUnit javascript.

So I've even tried adjusting the javascriptEnabled setting (plus another couple of settings, see code below) to false to try to just get the raw HTML, but to no avail. I've wrapped my call to page.open in a try/catch but apparently this is not because of an exception; rather a console.log statement immediately before the final phantom.exit() statement gets executed.

Furthermore I've followed recommendations from https://github.com/ariya/phantomjs/wiki/Network-Monitoring including logging from page.onResourceRequested, page.onError and page.onResourceReceived, and only the callback for onResourceReceived gets executed. And I'm specifying the --proxy-type=none command line argument, all to no avail.

Code and output below, thanks in advance. I'm at a loss; maybe it's a phantomjs issue? Just want to rule out everything before reporting it though.

CODE:

var page = require('webpage').create();

page.onResourceRequested = function (request) {
    console.log('Request ' + JSON.stringify(request, undefined, 4));
};

page.onResourceReceived = function (response) {
    console.log('Receive ' + JSON.stringify(response, undefined, 4));
};

page.onError = function (msg, trace) {
    console.log(msg);
    trace.forEach(function(item) {
        console.log('  ', item.file, ':', item.line);
    })
}

page.settings.webSecurityEnabled = false;
page.settings.localToRemoteUrlAccessEnabled = true;
//page.settings.javascriptEnabled = false;

for (var setting in page.settings) {
    console.log(setting + ": " + page.settings[setting]);
}

try {
    page.open('http://local.example.com:9001/test/workflow', function() {
        console.log('page opened');
    });
}
catch(xcep) {
    console.log(xcep);
}

console.log('before exit');
phantom.exit();

OUTPUT:

XSSAuditingEnabled: false
javascriptCanCloseWindows: true
javascriptCanOpenWindows: true
javascriptEnabled: true
loadImages: true
localToRemoteUrlAccessEnabled: true
userAgent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.34 (KHTML, like Gecko) PhantomJS/1.9.1 Safari/534.34
webSecurityEnabled: false
Request {
    "headers": [
        {
            "name": "User-Agent",
            "value": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.34 (KHTML, like Gecko) PhantomJS/1.9.1 Safari/534.34"
        },
        {
            "name": "Accept",
            "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        }
    ],
    "id": 1,
    "method": "GET",
    "time": "2013-07-12T09:49:58.262Z",
    "url": "http://local.example.com:9001/test/workflow"
}
before exit

回答1:

page.open() is asynchronous. So the line with phantom.exit() will be executed before the page has been loaded and this stops the PhantomJS process. Move the phantom.exit() inside the callback for page.open() and place it at the end of the callback (i.e., after your processing code). So basically you will have this:

page.open('http://local.example.com:9001/test/workflow', function() {
      console.log('page opened');

      phantom.exit();
});


回答2:

Just trying to redeem myself here for being such a bonehead and forgetting the callback to page.open executes asyncrhonously. As mentioned in the comment to the answer I accepted, I was loathe to call phantom.exit() from within that callback because I'd actually need to be doing it in a loop.

Well here is the way to do that with a closure, including usage of a try/catch to ensure phantom.exit() will always be called.

NOTE: rather than declaring

page = require('webpage').create()

at the top, we instead merely create a reference to the webpage module, and then pass webpage.create() into the closure upon every loop iteration.

CODE:

var fs = require('fs'),
    webpage = require('webpage');

var publicJsDir = [fs.workingDirectory, '..', 'public', 'js'].join(fs.separator),
    testNames = fs.list(publicJsDir).map(function(file){
        return (file.match(/^(.*)\.test\.js$/) || [])[1];
    }).filter(function(val) {return val});

for (var i=testNames.length; i--; ) {
    (function(i, testName, page){
        try {
            console.log(testName); //page.open(...
            if (!i) phantom.exit();
        }
        catch(xcep) {
            console.log(xcep);
            phantom.exit();
        }
    })(i, testNames[i], webpage.create());
}

OUTPUT:

workflow
workflow.cloning
utils.trains
utils.stations
models.trains.processors
models.stations
gis
fp
flow
ds