I am trying to get all links under multiple level iframes in CasperJS. There was a solution to resolve the case that there is one-level iframe. I am trying to put getLinksFromIFrame inside the getLinksFromIfram to do recurive traverse but failed.
For this code, how should I do it for multiple-level iframes?
function getLinksFromIframes( callback ) {
var links = [];
var iframes = this.evaluate( function() {
var iframes = [];
[].forEach.call(document.querySelectorAll("iframe"), function(iframe, i) { iframes.push( i ); });
return iframes;
});
iframes.forEach( function( index ) {
this.withFrame(index, function() {
links = links.concat( this.getElementsAttribute( 'a', 'href' ) );
console.log("works: " + links);
});
}, this);
this.then(function(){
callback.call(this, links);
});
}
casper.start(url, function () {
getLinksFromIframes.call(this, function(links){
thelinks = links;
console.log("Links: " + thelinks);
});
})
.then(function(){
console.log("Links later: " + thelinks);
})
.run();
Maybe like this:
var casper = require("casper").create({
// verbose: true,
// logLevel: "debug",
webSecurityEnabled : false
});
//page.onConsoleMessage = function(msg) {console.log(msg);};
casper.on('remote.message', function (message) {
this.echo(message);
});
casper.start("http://domu-test-2/node/1", function () {
this.evaluate(function () {
var i,
x = document.querySelector("iframe#test") //First iframe
.contentDocument.querySelector("iframe#test2") //Second iframe in the first
.contentDocument.querySelectorAll("a"); //Links
for (i = 0; i < x.length; i++) {
console.log(x[i].href)
}
})
}).wait(1000).run();
It was hard, but i've created this script:
var casper = require("casper").create({
// verbose: true,
// logLevel: "debug",
webSecurityEnabled : false
});
var links = [];
function get_links(obj) {
return obj.evaluate(function () {
var i,
l = document.querySelectorAll("a"),
l2 = [];
for (i = 0; i < l.length; i++) {
l2[i] = l[i].href;
}
return l2
});
}
function unique(arr) {
var obj = {};
for (var i = 0; i < arr.length; i++) {
if (/http(.*)?/.test(arr[i])) {
var str = arr[i];
obj[str] = true;
}
}
return Object.keys(obj);
}
function getLinksFromIframes(callback) {
this.echo("Here we come: " + this.getCurrentUrl() + "\n");
function to_frame(obj) {
var iframes = to_evaluate(obj);
iframes.forEach(function (index) {
this.withFrame(index, function () {
this.echo("We are here: " + this.getCurrentUrl());
var l = unique(get_links(this));
var i;
for (i = 0; i < l.length; i++) {
console.log(l[i]);
links.push(l[i])
}
links = unique(links);
console.log("");
to_frame(this) //multi lvl
}); //The first iframe
}, obj);
}
function to_evaluate(obj) {
return obj.evaluate(function () {
var iframes = [];
[].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
iframes.push(i);
});
return iframes;
})
}
to_frame(this);
this.then(function () {
callback.call(this);
});
}
casper.start("http://domu-test-2/node/1", function () {
getLinksFromIframes.call(this, function () {
console.log("Done!\n");
var i;
for (i = 0; i < links.length; i++) {
console.log(links[i]);
}
});
}).then(function () {}).run();
Note:
Now we have a full multi lvl.
./casperjs test.js >>/dev/stdout
Here we come: http://domu-test-2/node/1
We are here: http://domu-test-2/node/2
http://link_1_inside_iframe(1.1)_from_main_frame
We are here: http://domu-test-2/node/3
http://link_1_inside_iframe(2.1)_from_1.1
We are here: http://domu-test-2/node/5
http://link_1_inside_iframe(2.2)_from_1.1
We are here: http://domu-test-2/node/4
http://link_1_inside_iframe(1.2)_from_main_frame
We are here: http://domu-test-2/node/6
http://link_1_inside_iframe(2.1)_from_1.2