Using the 'webpage' Phantom module in node

2019-03-10 06:29发布

I am trying to wrap a PhantomJS script in a node.js process. The phantom script grabs a url from the arguments provided on the command line and outputs a pdf (much similar to the rasterize.js example included with the pahntom install).

The phantom script I have works fine, it's just my employer wants a node script if possible. No problem, I can use the node-phantom node module to wrap it.

But now I've hit a stumbling block, my phantom script has:

var page = require('webpage').create();

So, node.js is trying to find a module called 'webpage', the 'webpage' module is built into the phantom install so node can't find it. As far as I can tell, there is no npm module called 'webpage'.

'webpage' is used like this:

page.open(address, function (status) {

    if (status !== 'success') {

        // --- Error opening the webpage ---
        console.log('Unable to load the address!');

    } else {

        // --- Keep Looping Until Render Completes ---
        window.setTimeout(function () {
            page.render(output);
            phantom.exit();
        }, 200);
    }
});

where address is the url specified on the command line and output is another argument, the name and type of the file.

Can anyone help me out? This is quite an abstract one so I'm not expecting much if I'm honest, worth a try though.

Thanks.

EDIT - Approx 2hrs later

I now have this which throws out a PDF:

var phanty = require('node-phantom');

var system = require('system');

phanty.create(function(err,phantom) {

    //var page = require('webpage').create();

    var address;
    var output;
    var size;

    if (system.args.length < 4 || system.args.length > 6) {

        // --- Bad Input ---

        console.log('Wrong usage, you need to specify the BLAH BLAH BLAH');
        phantom.exit(1);

    } else {

        phantom.createPage(function(err,page){

            // --- Set Variables, Web Address, Output ---
            address = system.args[2];
            output = system.args[3];
            page.viewportSize = { width: 600, height: 600 };


            // --- Set Variables, Web Address ---
            if (system.args.length > 4 && system.args[3].substr(-4) === ".pdf") {

                // --- PDF Specific ---
                size = system.args[4].split('*');
                page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: '0px' }
                                                   : { format: system.args[4], orientation: 'portrait', margin: '1cm' };
            }

            // --- Zoom Factor (Should Never Be Set) ---
            if (system.args.length > 5) {
                page.zoomFactor = system.args[5];
            } else {
                page.zoomFactor = 1;
            }

            //----------------------------------------------------

            page.open(address ,function(err,status){

                if (status !== 'success') {

                    // --- Error opening the webpage ---
                    console.log('Unable to load the address!');

                } else {

                    // --- Keep Looping Until Render Completes ---
                    process.nextTick(function () {
                        page.render(output);
                        phantom.exit();
                    }, 200);
                }

            });

        });
    }
});

But! It's not the right size! The page object created using the phantom 'webpage' create() function looks like this before it's passed the URL:

phantom returned page

Whereas mine in my node script, looks like this:

my page

Is it possible to hard code the properties to achieve A4 formatting? What properties am I missing?

I'm so close!

2条回答
戒情不戒烟
2楼-- · 2019-03-10 07:17

Using https://github.com/sgentle/phantomjs-node I have made an A4 page in nodejs using phantom with the following code:

phantom.create(function(ph){
    ph.createPage(function(page) {
        page.set("paperSize", { format: "A4", orientation: 'portrait', margin: '1cm' });
        page.open("http://www.google.com", function(status) {
            page.render("google.pdf", function(){
                console.log("page rendered");
                ph.exit();
            })
        })
    })

});

Side Note:

the page.set() function takes any variable that you would set in the rasterize.js example. See how paperSize is set above and compare it to the relevant lines in rasterize.js

查看更多
老娘就宠你
3楼-- · 2019-03-10 07:25

It should be something like:

var phantom=require('../node-phantom');
phantom.create(function(error,ph){
  ph.createPage(function(err,page){
    page.open(url ,function(err,status){
      // do something
    });
  });
});

Your confusion here is because you want to reuse the same concepts and metaphors from your PhantomJS script. It does not work that way. I suggest that you spend some time studying the included tests of node-phantom, see https://github.com/alexscheelmeyer/node-phantom/tree/master/test.

查看更多
登录 后发表回答