I am using phantomjs to learn how to scrape a webpage, so far I have developed the following code below.. I know that I am able to connect to the site, but I am unable to get data from the table at all..am I on the right track?
My goal is to scrape data from the table on this site. I also understand that I need to use includeJs or injectJs to wait for the table to load else I would be scraping an empty html page. I am trying to put these concepts together, but am stuck for over 3 days now..please give some guidance..
var page = require('webpage').create();
console.log('Welcome to scraping...');
page.open('http://www.inc.com/inc5000/index.html', function(status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var ua = page.evaluate(function() {
var DATA = [];
var res= document.getElementById((this).find('td')).eq(0).text();
console.log(res); //**See output below for what is happening here**
$('tr.ng-scope').each(function(){
var $tds = $(this).find('td');
DATA.push({
rank: $tds.eq(0).text(),
company: $tds.eq(1).text(),
growth: $tds.eq(2).text(),
revenue: $tds.eq(3).text(),
industry: $tds.eq(4).text()
});
});
console.log(DATA);
console.log('Got into func');
//return document.getElementById('myagent').textContent;
});
console.log('Got out of function');
console.log(ua);
}
phantom.exit();
});
});
The output I am getting is:
Welcome to scraping...
ReferenceError: Can't find variable: angular
http://stage.inc.com/js/Inc5000ListApp.js?UPDATE1:2
http://www.inc.com/inc5000/index.html:2485TypeError: 'null' is not an object (evaluating 'document.getElementById(this.find('td')).eq')
phantomjs://webpage.evaluate():3 //what does this mean??
phantomjs://webpage.evaluate():20
phantomjs://webpage.evaluate():20
Got out of function
null