I'm having trouble clicking all JavaScript based links in a DOM and saving the
output. The links have the form
<a id="html" href="javascript:void(0);" onclick="goToHtml();">HTML</a>
the following code works great:
var casper = require('casper').create();
var fs = require('fs');
var firstUrl = 'http://www.testurl.com/test.html';
var css_selector = '#jan_html';
casper.start(firstUrl);
casper.thenClick(css_selector, function(){
console.log("whoop");
});
casper.waitFor(function check() {
return this.getCurrentUrl() != firstUrl;
}, function then() {
console.log(this.getCurrentUrl());
var file_title = this.getTitle().split(' ').join('_') + '.html';
fs.write(file_title, this.getPageContent());
});
casper.run();
However, how can I get this to work with a selector of "a", clicking all
available links and saving content? I'm not sure how to get the clickWhileSelector to remove nodes from the selector as is done here: Click on all links matching a selector
I have this script that first will get all links from a page then save 'href' attributes to an array, then will iterate over this array and then open each link one by one and echo the url :
var casper = require('casper').create({
logLevel:"verbose",
debug:true
});
var links;
casper.start('http://localhost:8000');
casper.then(function getLinks(){
links = this.evaluate(function(){
var links = document.getElementsByTagName('a');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
});
});
casper.then(function(){
this.each(links,function(self,link){
self.thenOpen(link,function(a){
this.echo(this.getCurrentUrl());
});
});
});
casper.run(function(){
this.exit();
});
rusln's answer works great if all the links have a meaningful href
attribute (actual URL). If you want to click every a
that also triggers a javascript function, you may need to iterate some other way over the elements.
I propose using the XPath generator from stijn de ryck for an element.
- You can then sample all XPaths that are on the page.
- Then you open the page for every
a
that you have the XPath for and click it by XPath.
- Wait a little if it is a single page application
- Do something
var startURL = 'http://localhost:8000',
xPaths
x = require('casper').selectXPath;
casper.start(startURL);
casper.then(function getLinks(){
xPaths = this.evaluate(function(){
// copied from https://stackoverflow.com/a/5178132/1816580
function createXPathFromElement(elm) {
var allNodes = document.getElementsByTagName('*');
for (var segs = []; elm && elm.nodeType == 1; elm = elm.parentNode) {
if (elm.hasAttribute('id')) {
var uniqueIdCount = 0;
for (var n=0;n < allNodes.length;n++) {
if (allNodes[n].hasAttribute('id') && allNodes[n].id == elm.id) uniqueIdCount++;
if (uniqueIdCount > 1) break;
};
if ( uniqueIdCount == 1) {
segs.unshift('id("' + elm.getAttribute('id') + '")');
return segs.join('/');
} else {
segs.unshift(elm.localName.toLowerCase() + '[@id="' + elm.getAttribute('id') + '"]');
}
} else if (elm.hasAttribute('class')) {
segs.unshift(elm.localName.toLowerCase() + '[@class="' + elm.getAttribute('class') + '"]');
} else {
for (i = 1, sib = elm.previousSibling; sib; sib = sib.previousSibling) {
if (sib.localName == elm.localName) i++; };
segs.unshift(elm.localName.toLowerCase() + '[' + i + ']');
};
};
return segs.length ? '/' + segs.join('/') : null;
};
var links = document.getElementsByTagName('a');
var xPaths = Array.prototype.map.call(links, createXPathFromElement);
return xPaths;
});
});
casper.then(function(){
this.each(xPaths, function(self, xpath){
self.thenOpen(startURL);
self.thenClick(x(xpath));
// waiting some time may be necessary for single page applications
self.wait(1000);
self.then(function(a){
// do something meaningful here
this.echo(this.getCurrentUrl());
});
// Uncomment the following line in case each click opens a new page instead of staying at the same page
//self.back()
});
});
casper.run(function(){
this.exit();
});