Phantomjs to scrape webpage function not working

2019-02-26 02:33发布

问题:

I am using phantomjs to learn how to scrape a webpage, so far I have developed the following code below.. I know that I am able to connect to the site, but I am unable to get data from the table at all..am I on the right track?

My goal is to scrape data from the table on this site. I also understand that I need to use includeJs or injectJs to wait for the table to load else I would be scraping an empty html page. I am trying to put these concepts together, but am stuck for over 3 days now..please give some guidance..

var page = require('webpage').create();
console.log('Welcome to scraping...');

page.open('http://www.inc.com/inc5000/index.html', function(status) {
    page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
  if (status !== 'success') {
    console.log('Unable to access network');
  } else {
    var ua = page.evaluate(function() {
      var DATA = [];
      var res= document.getElementById((this).find('td')).eq(0).text();
      console.log(res); //**See output below for what is happening here**
        $('tr.ng-scope').each(function(){
            var $tds = $(this).find('td');

            DATA.push({
                rank:     $tds.eq(0).text(),
                company:  $tds.eq(1).text(),
                growth:   $tds.eq(2).text(),
                revenue:  $tds.eq(3).text(),
                industry: $tds.eq(4).text()
            });
        });

        console.log(DATA); 
      console.log('Got into func');  
      //return document.getElementById('myagent').textContent;
    });
    console.log('Got out of function');
    console.log(ua);
  }
  phantom.exit();
  });

});

The output I am getting is:

Welcome to scraping...

ReferenceError: Can't find variable: angular

http://stage.inc.com/js/Inc5000ListApp.js?UPDATE1:2
http://www.inc.com/inc5000/index.html:2485

TypeError: 'null' is not an object (evaluating 'document.getElementById(this.find('td')).eq')

phantomjs://webpage.evaluate():3 //what does this mean??

phantomjs://webpage.evaluate():20

phantomjs://webpage.evaluate():20

Got out of function

null