I'm trying to scrape links on my school's course schedule website using Node.js, request, and cheerio. However, my code is not reaching all subject links.
Link to course schedule website here.
Below is my code:
var express = require('express');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/subjects', function(req, res) {
var URL = 'http://courseschedules.njit.edu/index.aspx?semester=2016s';
request(URL, function(error, response, body) {
if(!error) {
var $ = cheerio.load(body);
$('.courseList_section a').each(function() {
var text = $(this).text();
var link = $(this).attr('href');
console.log(text + ' --> ' + link);
});
}
else {
console.log('There was an error!');
}
});
});
app.listen('8080');
console.log('Magic happens on port 8080!');
My output can be found here.
As you can see from my output, some links are missing. More specifically, links from sections 'A', 'I (Continued)', and R '(Continued)'. These are also the first sections of each column.
Each section is contained in its own div with class name 'courseList_section' so I don't understand why '.courseList_section a' doesn't loop through all links. Am I missing something obvious? Any and all insight is very appreciated.
Thank you in advance!