I have created a script in phantomJs. What it does is, it fetches some elements from a specific page which works fine.
Here is the code:
var page = new WebPage(), testindex = 0, loadInProgress = false, fs = require('fs'), i = 0, j = 0, k = 0;
page.onConsoleMessage = function(msg) { console.log(msg); };
page.onLoadStarted = function() { loadInProgress = true; console.log("load started"); };
page.onLoadFinished = function() { loadInProgress = false; console.log("load finished"); };
// Sets the User Agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.107 Safari/537.36';
// Enable/Disable Javascript
// page.settings.javascriptEnabled = false;
var steps = [
function() { //Load Page
page.open("http://www.example.com/mobiles/");
page.injectJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js");
},
function() { //Fetch Products
page.onCallback = function(result) {
var fs = require('fs');
fs.write('product-list.csv', result, 'w+');
};
page.evaluate(function() {
var arr_mainList = new Array();
var arr_innerList = new Array();
try {
for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
arr_mainList.push(document.getElementsByClassName("lap_thu_box")[i]);
window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].textContent + ", ");
//window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href + ", ");
var myWindow = window.open(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href);
console.log(myWindow.getElementsByClassName("item_desc")[0].textContent);
myWindow.close();
if (arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href.length > 43) {
var innerURL = arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href;
console.log(innerURL);
}
window.callPhantom(arr_mainList[i].getElementsByTagName("img")[0].getAttribute("data-original") + ", ");
arr_innerList.push(arr_mainList[i]);
for (j = 0; j < (document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length); j++) {
if ((j+1) < document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length) {
window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + " | ");
}
else {
window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + ", ");
}
};
//window.callPhantom(", ");
window.callPhantom(arr_innerList[i].getElementsByClassName("cat_price")[0].textContent.replace(/,/g, ""));
window.callPhantom("\n");
};
loadInProgress = true;
console.log("Successful.");
}
catch(ex) {
console.log("Failed: " + ex);
}
});
}
];
interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
setTimeout(function() {
//fs.write('product-list.html', page.content, 'w');
console.log("test complete!");
phantom.exit();
}, 100);
}
}, 5000);
Now if i run the program I get all the information in csv file. Except when it goes to window.open, phantomJs stops. I know i can't open a new page inside page.evaluate. But i need to fetch the product description and add it to csv file in place of product link. I have been searching for hours now, any help would be nice. Note: My limitations are that i have to use phantomJs.