I'm trying to crawl a webpage using Nightmare, but want to wait for #someelem
to be present, only if it actually exists. Otherwise, I want Nightmare to move on. How can this be done using .wait()
?
I can't use .wait(ms)
. Using .wait(selector)
means Nightmare will keep waiting until the element is present, but if the page will never have this element, Nightmare will keep waiting forever.
The last option is to use .wait(fn)
. And I've tried something like this
.wait(function(cheerio) {
var $ = cheerio.load(document.body.outerHTML);
var attempt = 0;
function doEval() {
if ( $('#elem').length > 0 ) {
return true;
}
else {
attempt++;
if ( attempt < 10 ) {
setTimeout(doEval,2000); //This seems iffy.
}
else {
return true;
}
}
}
return doEval();
},cheerio)
So, wait and attempt again (upto a threshold), and if the element is not found, then just move on. The code seems wrong around setTimeout, because .wait
is done at the browser-scope.
Thanks in advance!
I don't think passing the cheerio
library as you have it is going to work very well. The arguments get serialized (more or less) to be passed to the child Electron process, so passing an entire library probably won't work.
On the up side, the fn
part of .wait(fn)
is executed in the page context - meaning you have full access to document
and the methods it has (eg, querySelector
). You could also have access to the page's jQuery context if it exists, or you could even use .inject()
to inject it if not.
Setting that aside, you're right insofar as .wait()
(and .evaluate()
, for that matter) expect a synchronous method, at least until something like promises could be used directly in .evaluate()
.
Until that is available, you could use .action()
to mimic the behavior you want:
var Nightmare = require('nightmare');
Nightmare.action('deferredWait', function(done) {
var attempt = 0;
var self = this;
function doEval() {
self.evaluate_now(function(selector) {
return (document.querySelector(selector) !== null);
}, function(result) {
if (result) {
done(null, true);
} else {
attempt++;
if (attempt < 10) {
setTimeout(doEval, 2000); //This seems iffy.
} else {
done(null, false);
}
}
}, '#elem');
};
doEval();
return this;
});
var nightmare = Nightmare();
nightmare.goto('http://example.com')
.deferredWait()
.then(function(result) {
console.log(result);
});
- As mentioned in documentation of nightmarejs,
.wait(selector)
Wait until the element selector is present e.g. .wait('#pay-button')
the wait wait works in this case only till the element first becomes visible if it does not then it will work till default timeout of 30s
- wait with function
.wait(function () {
return (document.querySelector(selector) === null);
})
where selector is the element based on whose existence in the DOM we wait.
Here i create a function to get html sources for different conditions, i'm crawling the TimeWarnerCable page to get info about TV, Internet and Bundle plans, so my function get some parameters and react for each one in different calls. You can use the .exists() to check selectors and then continue with nightmare
function getSource(url,serviceQuantity,zip){
var defer=Q.defer();
var Nightmare = require('nightmare');
var nightmare = Nightmare({openDevTools:browserDev ,show: browserVisible,'webPreferences':{partition: 'nopersist'}});
nightmare
.goto(url)
.cookies.clear()
.wait(2000)
.exists('div.messagebox-wrapper.twc-container[style="display: block;"]')
.then(function(noZipSet){
if (noZipSet){
debug('No zipcode settled down');
nightmare
.insert('fieldset > div > input[placeholder="Enter Your ZIP Code"]',zip)
.type('fieldset > div > input[placeholder="Enter Your ZIP Code"]', '\u000d');//I do "Enter" because nightmare can't find the submit button
}else{
debug('Zipcode settled down');
nightmare
.click('div.section.newHeaderIcons > div > ul > li:nth-child(4) > div > a')
.wait(2000)
.insert('form.geoLoc > fieldset > div > input[placeholder="Update Your ZIP Code"]',zip)
.type('form.geoLoc > fieldset > div > input[placeholder="Update Your ZIP Code"]', '\u000d');//I do "Enter" because nightmare can't find the submit button
}
nightmare
.wait(8500)
.exists('div[style="display: block;"] > div > div > div > div > div > div > div.parsys.oof-error-content > div > div > div > div > div > div > p[style="color: #333333;"]')
.then(function(zipNotAvailable){
if (zipNotAvailable){
debug('Service not available in '+zip+' for '+serviceQuantity+' services');
nightmare
.end()
.then(function(){
defer.resolve('');
});
}else{
debug('Service available on the zipcode');
switch (serviceQuantity) {
case 1:
nightmare
.evaluate(function(){
return document.querySelector('html').innerHTML;
})
.end()
.then(function (result) {
defer.resolve(result);
})
.catch(function (error) {
debug('ERROR >> Search failed:', error);
});
break;
case 2:
nightmare
.click('#tv-filter')
.wait(500)
.click('#internet-filter')
.wait(500)
.evaluate(function(){
return document.querySelector('html').innerHTML;
})
.end()
.then(function (result) {
defer.resolve(result);
})
.catch(function (error) {
debug('ERROR >> Search failed:', error);
});
break;
case 3:
nightmare
.click('#tv-filter')
.wait(500)
.click('#internet-filter')
.wait(500)
.click('#phone-filter')
.wait(500)
.evaluate(function(){
return document.querySelector('html').innerHTML;
})
.end()
.then(function (result) {
defer.resolve(result);
})
.catch(function (error) {
debug('ERROR >> Search failed:', error);
});
break;
}
}
});
});
return defer.promise;
}