Using PhantomJS to embed all images of a webpage p

2019-01-07 21:31发布

I'm trying to convert a webpage into a single file by embedding all the images (and other external resources once I passed this point). Here's how I run PhantomJs:

./phantomjs --web-security=false ./embed_images.js http://localhost/index.html > output.txt

And here's the embed_images.js:

var page = require('webpage').create(),
    system = require('system'),
    address;

if (system.args.length === 1) {
    console.log('Usage: embed_images.js <some URL>');
    phantom.exit(1);
}
else {
    page.onConsoleMessage = function(msg) {
        console.log(msg);
    };
    address = system.args[1];
    page.open(address, function(status) {
        page.evaluate(function() {
            function embedImg(org) {
                var img = new Image();
                img.src = org.src;
                img.onload = function() {
                    var canvas = document.createElement("canvas");
                    canvas.width = this.width;
                    canvas.height = this.height;

                    var ctx = canvas.getContext("2d");
                    ctx.drawImage(this, 0, 0);

                    var dataURL = canvas.toDataURL("image/png");

                    org.src = dataURL;
                    console.log(dataURL);
                }
            }
            var imgs = document.getElementsByTagName("img");
            for (var index=0; index < imgs.length; index++) {
                embedImg(imgs[index]);
            }
        });
        phantom.exit()
    });
}

When I run the mentioned command, it results in a file like this:

Unsafe JavaScript attempt to access frame with URL  from frame with URL file://./embed_images.js. Domains, protocols and ports must match.
Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

There's multiple instances of the above error message. To test what's wrong, I ran the below code in my Chromium's console:

function embedImg(org) {
    var img = new Image();
    img.src = org.src;
    img.onload = function() {
        var canvas = document.createElement("canvas");
        canvas.width = this.width;
        canvas.height = this.height;

        var ctx = canvas.getContext("2d");
        ctx.drawImage(this, 0, 0);

        var dataURL = canvas.toDataURL("image/png");

        org.src = dataURL;
        console.log(dataURL);
    }
}
var imgs = document.getElementsByTagName("img");
for (var index=0; index < imgs.length; index++) {
    embedImg(imgs[index]);
}

And it works just fine (my webpage doesn't reference any cross-domain images)! It will embed all the images into the HTML page. Does anyone know what might the problem be?

Here's my index.html file's content:

<!DOCTYPE html >
<html>
<head>
<meta charset="utf-8" />
</head>

<body>
<img src="1.png" >
</body>
</html>

And actual output (output.txt):

Unsafe JavaScript attempt to access frame with URL  from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL  from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL file://./embed_images.js. Domains, protocols and ports must match.

The strange thing is that while I've got just one image on my page, there are numerous error messages!

I'm using phantomjs-1.9.8-linux-x86_64.

1条回答
相关推荐>>
2楼-- · 2019-01-07 21:59

Those notices are printed when phantom.exit is called. They don't cause any trouble, but are not nice when you need a clean PhantomJS output. In your case you can suppress the notices by "asynchronizing" phantom.exit like this:

setTimeout(function(){
    phantom.exit();
}, 0);

I think the reason this is happening is because a large string is passed from the page context when phantom tries to exit.

I created a github issue for this.

查看更多
登录 后发表回答