I am tinkering around with request-promise to crawl a friends webpage. I am using the crawl a webpage better example on their GitHub. What I have so far is this:
var rp = require('request-promise');
var cheerio = require('cheerio'); // Basically jQuery for node.js
var options = {
uri: 'https://friendspage.org',
transform: function(body) {
return cheerio.load(body);
}
};
rp(options)
.then(function($) {
// Process html like you would with jQuery...
var nxtPage = $("a[data-url$='nxtPageId']").attr('data');
// How do I use nxtPage here to go to that site
})
.catch(function(err) {
// Crawling failed or Cheerio choked...
});
What is the proper way to go to the link I have in nxtPage
? I still want to be able to use cheerio/jQuery on it. Do I need to repeat the whole var option = ...
thing inside the current then
function?
You can just create your own utility function that creates your options and then calls rp()
like this:
const rp = require('request-promise');
const cheerio = require('cheerio'); // Basically jQuery for node.js
// shared function
function getPage(url) {
const options = {
uri: url,
transform: function(body) {
return cheerio.load(body);
}
};
return rp(options);
}
getPage('https://friendspage.org').then($ => {
// Process html like you would with jQuery...
const nxtPage = $("a[data-url$='nxtPageId']").attr('data');
return getPage(nxtPage).then($ => {
// more processing here
});
}).catch(err => {
console.log(err);
// error handling here
});
This is just factoring code that you want to use in multiple places into a shared function. Nothing in particular to do with rp()
or cheerio
, just regular code factoring in Javascript (or any language).
Wrap inside a function and keep calling it with a condition so recursion breaks at some time.
(function repeatUntilAConditionIsMetInThen(uri = 'https://friendspage.org')
var options = {
uri,
transform: function(body) {
return cheerio.load(body);
}
};
rp(options)
.then(function($) {
var nxtPage = $("a[data-url$='nxtPageId']").attr('data');
//There should be some condition here otherwise it will be infinite loop
repeatUntilAConditionIsMetInThen(nxtPage);
})
.catch(function(err) {
});
})();