I have the following jQuery selection, helped by LGSon's answer to an earlier question...
$('div.content__article-body > *').not('aside').each( function( index, value ) {
console.log( value.outerHTML );
});
However, I need to learn how to write this in a way consistent with the result
portion of my code below...
I guess I need to chain the outerHTML
element on to the jQuery line, but I have not had much success with this.
In this example, the selection is for the "entry"
.
function pageFunction(context) {
// Called on every page the crawler visits, use it to extract data from it
var $ = context.jQuery;
// If page is START or a LIST,
if (context.request.label === 'START' || context.request.label === 'LIST') {
context.skipOutput();
// First, gather LIST page
$('ol.pagination li a').each(function() {
context.enqueuePage({
url: window.location.origin + $(this).attr('href'),
label: 'LIST'
});
});
// Then, gather every DETAIL page
$('h3>a').each(function(){
context.enqueuePage({
url: window.location.origin + $(this).attr('href'),
label: 'DETAIL'
});
});
// If page is actually a DETAIL target page
} else if (context.request.label === 'DETAIL') {
/* context.skipLinks(); */
var tags = [];
$('span.tags a').each( function() {
tags.push($(this).text());
});
result = {
"title": $('h1.entry-title').text(),
"excerpt": $('div.content-blog__body p strong:first').text().trim(),
"entry": $('div.content-blog__body').html().trim(),
"datestamp": $('meta[property="article:published_time"]').attr('content'),
tags: tags
};
}
return result;
}
Here is a simplified
pageFunction
, to show how to search a result set, using.find()
.As I now know how you meant (in linked question), this answer from the same (which I think should be credited as well, plus 1 from me) would get you a result set to work with, instead of an array of elements, which mine gives, though here I used jQuery's
append()
when create the new object.Stack snippet