can phantomjs work with node.js?

2019-01-07 11:45发布

问题:

I would like to use phantomjs in my node.js script. there is a phantomjs-node library.. but unfortunately the author used this weird coffee script code to explain what he's doing:

phantom = require 'phantom'

phantom.create (ph) ->
  ph.createPage (page) ->
    page.open "http://www.google.com", (status) ->
      console.log "opened google? ", status
      page.evaluate (-> document.title), (result) ->
        console.log 'Page title is ' + result
        ph.exit()

now if I were to use phantomjs directly with javascript, it would look something like this:

var page = require('webpage').create();
page.open(url, function (status) {
    var title = page.evaluate(function () {
        return document.title;
    });
    console.log('Page title is ' + title);
});

so basically I'm trying to write up the equivalent of the first snippet of code above in normal javascript (by reading the coffee script documentation.. this is what I did:

// file name: phantomTest.js

var phantom = require('phantom');

phantom.create(function(ph) {
    ph.createPage(function(page) {
        page.open('http://www.google.com', function(status) {
            console.log('opened google?', status);
            var title = page.evaluate(function() {
                return document.title;
            });
            console.log('page title is ' + title);              
        });
    });
    ph.exit();
});

unfortunately it's not working! If I run

node phantomTest.js

on the shell, nothing happens.. nothing returns and the process doesn't stop.. any ideas?

update:

I just read this in the phantomjs faq:

Q: Why is PhantomJS not written as Node.js module?

A: The short answer: "No one can serve two masters."

A longer explanation is as follows.

As of now, it is technically very challenging to do so.

Every Node.js module is essentially "a slave" to the core of Node.js, i.e. "the master". In its current state, PhantomJS (and its included WebKit) needs to have the full control (in a synchronous matter) over everything: event loop, network stack, and JavaScript execution.

If the intention is just about using PhantomJS right from a script running within Node.js, such a "loose binding" can be achieved by launching a PhantomJS process and interact with it.

mmm.. could this have something to do with it? but then that whole library wouldn't make sense!

update 2:

I found this code in the web that does the same thing:

var phantom = require('phantom');
phantom.create(function(ph) {
  return ph.createPage(function(page) {
    return page.open("http://www.google.com", function(status) {
      console.log("opened google? ", status);
      return page.evaluate((function() {
        return document.title;
      }), function(result) {
        console.log('Page title is ' + result);
        return ph.exit();
      });
    });
  });
});

unfortunately that's not working either.. same result!

回答1:

phantomjs-node isn't an official supported npm package for phantomjs. Instead, it implements a "nauseously clever bridge" between node and phantom by creating a web server that uses websockets to serve as an IPC channel between node and phantom. I'm not making this up:

So we communicate with PhantomJS by spinning up an instance of ExpressJS, opening Phantom in a subprocess, and pointing it at a special webpage that turns socket.io messages into alert() calls. Those alert() calls are picked up by Phantom and there you go!

So I wouldn't be surprised if phantomjs-node works, doesn't work, fails silently, or fails spectacularly. Nor would I expect anyone other than the author of phantomjs-node to be able to troubleshoot phantomjs-node.

The answer to your original question is the answer from the phantomjs faq: No. Phantom and node have irreconcilable differences. Both expect to have complete control over fundamental low-level functionality like the event loop, the network stack, and JS execution so they can't cooperate within the same process.



回答2:

You could also give phridge a try. Your example would've been written like this:

var phantom;

// spawn a new PhantomJS process
phridge.spawn()
    .then(function (ph) {
        phantom = ph;
        return phantom.openPage("http://www.google.com");
    })
    .then(function (page) {
        return page.run(function () {
            // this function runs inside PhantomJS with this bound to a webpage instance
            return this.title;
        });
    })
    .then(function (title) {
        console.log('Page title is ' + title);
        // terminates the process cleanly
        phantom.dispose();
    });


回答3:

I am now the new maintainer for phantom-node package. It doesn't use coffeescript anymore. You can do something like

var phantom = require('phantom');

phantom.create().then(function(ph) {
  ph.createPage().then(function(page) {
    page.open('https://stackoverflow.com/').then(function(status) {
      console.log(status);
      page.property('content').then(function(content) {
        console.log(content);
        page.close();
        ph.exit();
      });
    });
  });
});

The new version is much faster and resilient. It also doesn't use websockets anymore.



回答4:

change your code to this, and it will be working:

 var phantom = require('phantom');
 phantom.create(function(ph) {
   ph.createPage(function(page) {
     page.open("http://www.google.com", function(status) {
       console.log("opened google? ", status);
       page.evaluate((function() {
         return document.title;
       }), function(result) {
         console.log('Page title is ' + result);
         ph.exit();
       });
     });
   });
 });


回答5:

You could just ditch PhantomJS like I did because it was just too much a pain with these wrappers not working well, and go with Zombie.js which is pretty popular too.



回答6:

Seems this is working..

var phantom = require('phantom');

phantom.create().then(function(ph) {
  ph.createPage().then(function(page) {
    page.open('https://stackoverflow.com/').then(function(status) {
      console.log(status);
      page.property('content').then(function(content) {
        console.log(content);
        page.close();
        ph.exit();
      });
    });
  });
});

But I am trying to generate an html page with some external script file. It is unable to inject a script file. I tried like following. Callback is not returning from the line page.injectJs('./jQuery.min.js',function() {

var phantom = require('phantom');

    phantom.create().then(function(ph) {
      ph.createPage().then(function(page) {
        page.injectJs('./jQuery.min.js', function() {
          page.property('content').then(function(content) {
            console.log(content);
            page.close();
            ph.exit();
          });
        });
      });
    });


回答7:

I experienced the same problems as you do, and apparently, there is a known issue with phantomjs-node and newer versions of nodejs. Seems like it stopped working somewhere around node 0.9.3, according to the comments in the issue. So until that has been resolved, you either have to downgrade nodejs, or try a different module, like node-phantom, or just use exec/spawn.