I have the function below where I am trying to scrape 4 websites, and then combine the results into a spreadsheet. Is there a faster way to match over a large array that isn't the INDEX/MATCH formulas. My desired output would be (obv this is an example)
MLBID | FG_ID | PA | K | K% | wOBA
12345 | 12345 | 12 | 5 | 41.7% | .300
While the code I have below works, it takes wayyyy too long reaches the 6-minute limit of Google Script. The matching that I am trying to do is with ~4000 rows. I have commented my code as much as possible.
function minors_batting_stats() {
//this is the spreadsheet where I have a list of all of the IDs -- MLB and FG
var ids = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Player List");
//this is the output sheet
var mb18vR_sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("2018 minors bat vs R");
//various URLs I am trying to scrape
var mb18vR_PA_url = 'https://www.mlb.com/prospects/stats/search?level=11&level=12&level=13&level=14&level=15&level=16&pitcher_throws=R&batter_stands=&game_date_gt=&game_date_lt=&season=2017&home_away=&draft_year=&prospect=&player_type=batter&sort_by=results&sort_order=desc&group_by=name&min_pa=&min_pitches=#results'
var mb18vR_SO_url = 'https://www.mlb.com/prospects/stats/search?pa_result=strikeout&level=11&level=12&level=13&level=14&level=15&level=16&pitcher_throws=R&batter_stands=&game_date_gt=&game_date_lt=&season=2017&home_away=&draft_year=&prospect=&player_type=batter&sort_by=results&sort_order=desc&group_by=name&min_pa=&min_pitches=#results'
var mb18vR_wOBA_url = 'https://www.mlb.com/prospects/stats/search?level=11&level=12&level=13&level=14&level=15&level=16&pitcher_throws=R&batter_stands=&game_date_gt=&game_date_lt=&season=2017&home_away=&draft_year=&prospect=&player_type=batter&sort_by=woba&sort_order=desc&group_by=name&min_pa=&min_pitches=#results'
//creating an array for each scrape
var res = [];
var res1 = [];
var res2 = [];
var res3 = [];
//getting the MLB and FG ids from the spreadsheet
var mlbids = ids.getRange(1, 11, ids.getLastRow()).getValues();
var fgids = ids.getRange(1,9, ids.getLastRow()).getValues();
//scraping SO against RHP
var content_SO = UrlFetchApp.fetch(mb18vR_SO_url).getContentText();
var e_SO = Parser.data(content_SO).from('tbody').to('</tbody>').build();
var rows_SO = Parser.data(e_SO).from('<tr class="player_row"').to('</tr>').iterate();
for (var i=0; i<rows_SO.length; i++) { //rows.length
res1[i] = [];
res1[i][0] = Parser.data(rows_SO[i]).from('/player/').to('/').build();
var SOs = Parser.data(rows_SO[i]).from('<td align="left">').to('</td>').iterate();
res1[i][1] = SOs[1];
}
//scraping wOBA against RHP
var content_wOBA = UrlFetchApp.fetch(mb18vR_wOBA_url).getContentText();
var e_wOBA = Parser.data(content_wOBA).from('tbody').to('</tbody>').build();
var rows_wOBA = Parser.data(e_wOBA).from('<tr class="player_row"').to('</tr>').iterate();
for (var i=0; i<rows_wOBA.length; i++) { //rows.length
res2[i] = [];
res2[i][0] = Parser.data(rows_wOBA[i]).from('/player/').to('/').build();
var wOBAs = Parser.data(rows_wOBA[i]).from('<td align="left">').to('</td>').iterate();
res2[i][1] = wOBAs[2];
}
//scraping PA against RHP
var content = UrlFetchApp.fetch(mb18vR_PA_url).getContentText();
var e = Parser.data(content).from('tbody').to('</tbody>').build();
var rows = Parser.data(e).from('<tr class="player_row"').to('</tr>').iterate();
for (var i=0; i<rows.length; i++) { //rows.length
res[i] = [];
res[i][0] = Parser.data(rows[i]).from('/player/').to('/').build();
res[i][1] = [];
//matching the MLB_ID with FG_ID
var mlbID = res[i][0];
for(var j = 0; j<mlbids.length;j++){
if(mlbids[j] == mlbID){
res[i][1] = fgids[j];
}
}
var PAs = Parser.data(rows[i]).from('<td align="left">').to('</td>').iterate();
res[i][2] = PAs[1];
//matching the MLB_ID from PA (res) with SO (res1)
res[i][3] = 0;
for (var w=0; w<res1.length; w++) {
if (res[i][0] == res1[w][0]) {
res[i][3] = res1[w][1];
}
}
//Calculating K%
res[i][4] = res[i][3] / res[i][2]
//matching the MLB_ID from PA (res) with wOBA (res1)
res[i][5] = 0;
for (var v=0; v<res2.length; v++) {
if (res[i][0] == res2[v][0]) {
res[i][5] = res2[v][1];
}
}
}
//pasting values
mb18vR_sheet.getRange(2, 1, res.length, res[0].length).setValues(res);
}
The issue you have is that you are forcing your script to loop through large datasets many many times for each row of compared data. A better approach is to build a lookup object, which maps between a desired unique identifier and the row of the data array you want to access:
Usage:
By making a lookup object for your data arrays, you no longer have to re-search them and make comparisons, because you did the search once and stored the relationship, rather than discarding it every time. Note that the key that was made is based on a specific (and unique) property of the data. Without that relationship, this particular indexing approach won't work - but a different one will.