How to get Google's Knowledge Graph “people al

2019-05-29 20:10发布

问题:

I'm trying to get Google's "People also search for" content on the search results page and I'm using PhantomJS to scrape their results. However, that Knowledgebase part I need does not show up in the body I get. Does anyone know what I could do to have it shown to me?

Here's the code:

var phantom = require('phantom');

phantom.create(function (ph) {
    ph.createPage(function (page) {
        page.open("http://www.google.com/ncr", function (status) {
            console.log("opened google NCR ", status);
            page.evaluate(function () { return document.title; }, function (result) {
                console.log('Page title is ' + result);
                page.open("https://www.google.com/search?gws_rd=ssl&site=&source=hp&q=google&oq=google", function (status) {
                    console.log("opened google Search Results ", status);
                    page.evaluate(function () { return document.body; }, function (result) {
                        console.log(result);
                        ph.exit();
                    });
                });
            });
        });
    });
});

PS I have to first request `google.com/ncr' to force-load Google.Com's results as I'm based in Germany and the German version does not have the knowledge graph. Maybe the requests above can also be simplified...

回答1:

It may be that the page's js hasn't finished by the time you get the body. Try adding this into your page.evaluate.

window.setTimeout( function() { <your page logic> }, 1000);

You may need to fiddle with the time.

Also you can use jquery by doing page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js', function(){<your logic>}); after opening the page but before running the evaluate.



回答2:

Found the answer - had to manually set the userAgent to something like Chrome

Modified code below:

var phantom = require('phantom');

phantom.create(function (ph) {
    ph.createPage(function (page) {
        page.set('settings.userAgent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1');
        page.open("http://www.google.com/ncr", function (status) {
            console.log("opened google NCR ", status);
            page.evaluate(function () { return document.title; }, function (result) {
                console.log('Page title is ' + result);
                page.open("https://www.google.com/search?gws_rd=ssl&site=&source=hp&q=google&oq=google", function (status) {
                    console.log("opened google Search Results ", status);
                    page.evaluate(function () { return document.body; }, function (result) {
                        console.log(result);
                        ph.exit();
                    });
                });

            });
        });
    });
});