Phantom.js login instagram page

2019-01-25 13:10发布

问题:

I am trying use phantom.js to login in instagram website. My first approach was:

document.querySelector("input[name='username']").value = "Username";
document.querySelector("input[name='password']").value = "Pass";

But this code don't change da DOM.

My second approach:

document.getElementsByClassName("_kp5f7 _qy55y")[0].setAttribute("value", "Username");
document.getElementsByClassName("_kp5f7 _qy55y")[0].value = "Pass";

But when I inspect the network packages fields username and pass are blank.

Instagram login page: https://www.instagram.com/accounts/login/

回答1:

Update: Please see edit below the answer

I don't really think that

PhantomJS can't handle that page

It's probably us not doing good enough of impersonation of a real browser.

A quick search for "instagram login phantomjs" found this neat solution that works: https://github.com/awener/instagram-login-phantomjs/blob/master/phan.js

It uses PhantomJS mechanism of simulating "real" keypresses and clicks.

Here's a copy of the script just in case.

var page = require('webpage').create();
var username = "myusername";
var password = "password";
page.viewportSize = { width: 1024 , height: 600 };
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36';

page.open('https:/instagram.com/accounts/login/', function() {

    var ig = page.evaluate(function() {
        function getCoords(box) {
            return  {
                x: box.left,
              y: box.top 
            };
        }   

        function getPosition(type, name) {
            // find fields to fill
            var input = document.getElementsByTagName(type);
            for(var i = 0; i < input.length; i++) {
                if(name && input[i].name == name)  return getCoords(input[i].getBoundingClientRect());
                else if(!name && input[i].className)    return getCoords(input[i].getBoundingClientRect()); // this is for login button
            }
        }
        return {
            user: getPosition('input', 'username'),
            pass: getPosition('input', 'password'),
            login: getPosition('button')
        };

     });

     // fill in data and press login
     page.sendEvent('click',ig.user.x, ig.user.y);
     page.sendEvent('keypress', username);

     page.sendEvent('click',ig.pass.x, ig.pass.y);
     page.sendEvent('keypress', password);
     page.sendEvent('click', ig.login.x, ig.login.y);

    // wait for response
    setTimeout(function() {
        page.render('/path/to/screenshot.png');
        phantom.exit();
    }, 5000);

});

Edit with explanation on how to run the script on Linux

The reason this did not work on Debian/Ubuntu is SSL certificate issues.

There is a verbose mode telling about what PhantomJS is doing, when you run it with --debug=true CLI option. Using that I've found the cause of the problem:

[DEBUG] Network - SSL Error: "The issuer certificate of a locally looked up certificate could not be found"
[DEBUG] Network - SSL Error: "The root CA certificate is not trusted for this purpose"
[DEBUG] Network - Resource request error: QNetworkReply::NetworkError(SslHandshakeFailedError) ( "SSL handshake failed" ) URL: "https://instagramstatic-a.akamaihd.net/h1/scripts/polyfills/es5-sham.min.js/fc3c22cf2d67.js"
...

To avoid this type of problems you just have to run Phantomjs with another CLI argument telling it to ignore SSL errors:

/pth/to/phantomjs --ignore-ssl-errors=true /path/to/script.js