I am programming a Google Apps script within a spreadsheet. My use case includes iterating over a large set of folders that are children of a given one. The problem is that the processing takes longer than the maximum that Google allows (6 minutes), so I had to program my script to be able to resume later. I am creating a trigger to resume the task, but that is not part of my problem (at least, not the more important one at this moment).
My code looks like this (reduced to the minimum to illustrate my problem):
function launchProcess() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty(SOURCE_PARENT_FOLDER_KEY, SOURCE_PARENT_FOLDER_ID);
scriptProperties.deleteProperty(CONTINUATION_TOKEN_KEY);
continueProcess();
}
function continueProcess() {
try {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty(SOURCE_PARENT_FOLDER_KEY);
var continuationToken = scriptProperties.getProperty(CONTINUATION_TOKEN_KEY);
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFolders() : DriveApp.continueFolderIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
while (iterator.hasNext() && !timeLimitIsNear) {
var folder = iterator.next();
processFolder_(folder);
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
}
if (!iterator.hasNext()) {
scriptProperties.deleteProperty(CONTINUATION_TOKEN_KEY);
} else {
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty(CONTINUATION_TOKEN_KEY, contToken);
}
} catch (e) {
//sends a mail with the error
}
}
When launchProcess is invoked, it only prepares the program for the other method, continueProcess, that iterates over the set of folders. The iterator is obtained by using the continuation token, when it is present (it will not be there in the first invocation). When the time limit is near, continueProcess obtains the continuation token, saves it in a property and waits for the next invocation.
The problem I have is that the iterator is always returning the same set of folders although it has been built from different tokens (I have printed them, so I know they are different).
Any idea about what am I doing wrong?
Thank you in advance.
It appears that your loop was not built correctly. (edit : actually, probably also another issue about how we break the while loop, see my thoughts about that in comments)
Note also that there is no special reason to use a try/catch in this context since I see no reason that the hasNext()
method would return an error (but if you think so you can always add it)
here is an example that works, I added the trigger creation / delete lines to implement my test.
EDIT : code updated with logs and counter
var SOURCE_PARENT_FOLDER_ID = '0B3qSFd3iikE3MS0yMzU4YjQ4NC04NjQxLTQyYmEtYTExNC1lMWVhNTZiMjlhMmI'
var MAX_RUNNING_TIME = 5*35*6;
function launchProcessFolder() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty('SOURCE_PARENT_FOLDER_KEY', SOURCE_PARENT_FOLDER_ID);
scriptProperties.setProperty('counter', 0);
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.newTrigger('continueProcess').timeBased().everyMinutes(10).create();
continueProcessFolder();
}
function continueProcessFolder() {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty('SOURCE_PARENT_FOLDER_KEY');
var continuationToken = scriptProperties.getProperty('CONTINUATION_TOKEN_KEY');
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFolders() : DriveApp.continueFolderIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
var counter = Number(scriptProperties.getProperty('counter'));
while (iterator.hasNext() && !timeLimitIsNear) {
var folder = iterator.next();
counter++;
Logger.log(counter+' - '+folder.getName());
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
if (!iterator.hasNext()) {
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
Logger.log('******************no more folders**************');
break;
}
}
if(timeLimitIsNear){
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty('CONTINUATION_TOKEN_KEY', contToken);
scriptProperties.setProperty('counter', counter);
Logger.log('write to scriptProperties');
}
}
EDIT 2 :
(see also last comment)
Here is a test with the script modified to get files in a folder. From my different tests it appears that the operation is very fast and that I needed to set a quite short timeout limit to make it happen before reaching the end of the list.
I added a couple of Logger.log()
and a counter
to see exactly what was happening and to know for sure what was interrupting the while loop.
With the current values I can see that it works as expected, the first (and second) break happens with time limitation and the logger confirms that the token is written. On a third run I can see that all files have been dumped.
var SOURCE_PARENT_FOLDER_ID = '0B3qSFd3iikE3MS0yMzU4YjQ4NC04NjQxLTQyYmEtYTExNC1lMWVhNTZiMjlhMmI'
var MAX_RUNNING_TIME = 5*35*6;
function launchProcess() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty('SOURCE_PARENT_FOLDER_KEY', SOURCE_PARENT_FOLDER_ID);
scriptProperties.setProperty('counter', 0);
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.newTrigger('continueProcess').timeBased().everyMinutes(10).create();
continueProcess();
}
function continueProcess() {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty('SOURCE_PARENT_FOLDER_KEY');
var continuationToken = scriptProperties.getProperty('CONTINUATION_TOKEN_KEY');
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFiles() : DriveApp.continueFileIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
var counter = Number(scriptProperties.getProperty('counter'));
while (iterator.hasNext() && !timeLimitIsNear) {
var file = iterator.next();
counter++;
Logger.log(counter+' - '+file.getName());
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
if (!iterator.hasNext()) {
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
Logger.log('******************no more files**************');
break;
}
}
if(timeLimitIsNear){
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty('CONTINUATION_TOKEN_KEY', contToken);
scriptProperties.setProperty('counter', counter);
Logger.log('write to scriptProperties');
}
}
As of January 1, 2016 this is still a problem. The bug report lists a solution using the Advanced Drive API, which is documented here, under "Listing folders".
If you don't want to use Advanced services, an alternative solution would be to use the Folder Iterator to make an array of File Ids.
It appears to me that the Folder Iterator misbehaves only when created using DriveApp.continueFolderIterator()
. When using this method, only 100 Folders are included in the returned Folder Iterator.
Using DriveApp.getFolders()
and only getting Folder Ids, I am able to iterate through 694 folders in 2.734 seconds, according the Execution transcript.
function allFolderIds() {
var folders = DriveApp.getFolders(),
ids = [];
while (folders.hasNext()) {
var id = folders.next().getId();
ids.push(id);
}
Logger.log('Total folders: %s', ids.length);
return ids;
}
I used the returned array to work my way through all the folders, using a trigger. The Id array is too big to save in the cache, so I created a temp file and used the cache to save the temp file Id.
This is caused by a bug in GAS:
https://code.google.com/p/google-apps-script-issues/issues/detail?id=4116
It appears you're only storing a single continuation token. If you want to recursively iterate over a set of folders and allow the script to pause at any point (e.g. to avoid the timeout) and resume later, you'll need to store a bunch more continuation tokens (e.g. in an array of objects).
I've outlined a template that you can use here to get it working properly. This worked with thousands of nested files over the course of 30+ runs perfectly.