I am writing the Date and Subject from specific new emails to a new row of a Google Sheet.
- I apply a label to the new mail items with a filter.
- the script processes those labeled emails
- the label is removed
- A new label is applied, so that these emails won't be processed next time.
Problem: When there is a myLabel email, the script processes all emails in the same thread (eg same subject and sender) regardless of their label (even Inbox and Trash).
Question: How to only process new emails i.e. ones with the label myLabel - even when the thread of those messages extends outside the myLabel folder?
My current script:
function fetchmaildata() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName('mySheetName');
var label = GmailApp.getUserLabelByName('myLabel');
var threads = label.getThreads();
for (var i = 0; i < threads.length; i++)
{
var messages = threads[i].getMessages();
for (var j = 0; j < messages.length; j++)
{
var sub = messages[j].getSubject();
var dat = messages[j].getDate();
ss.appendRow([dat, sub])
}
threads[i].removeLabel(label);
threads[i].addLabel(newlabel);
}
}
I hacked a solution for my purposes by changing my for
loop to this:
for (var j = messages.length-1; j > messages.length-2; j--)
This says to process only the latest email in the thread, even when there is more than one email of a thread in the myLabel folder. Oddly, the script still changes the Labels of all the myLabel emails, but only the latest one of a thread gets written to the spreadsheet, so it works for me.
I had to make another change to the code because the above code does not run as a time-triggered scheduled task. I changed the code in this way and it now runs on a time schedule !!
//var ss = SpreadsheetApp.getActiveSpreadsheet();
var ss = SpreadsheetApp.openById("myGoogleSheetID");
A label can be on a thread due to being on a single message in said thread. Your code simply goes label -> all label threads -> all thread messages, rather than accessing only the messages in a thread with a given label. That's not really your fault - it's a limitation of the Gmail Service. There are two approaches that you can use to remedy this behavior:
The (enable-before-use "advanced service") Gmail REST API
The REST API supports detailed querying of messages, including per-message label status, with Gmail.Users.Messages.list
and the labelIds
optional argument. For example:
// Get all messages (not threads) with this label:
function getMessageIdsWithLabel_(labelClass) {
const labelId = labelClass.getId();
const options = {
labelIds: [ labelId ],
// Only retrieve the id metadata from each message.
fields: "nextPageToken,messages/id"
};
const messages = [];
// Could be multiple pages of results.
do {
var search = Gmail.Users.Messages.list("me", options);
if (search.messages && search.messages.length)
Array.prototype.push.apply(messages, search.messages);
options.pageToken = search.nextPageToken;
} while (options.pageToken);
// Return an array of the messages' ids.
return messages.map(function (m) { return m.id; });
}
Once using the REST API, there are other methods you might utilize, such as batch message label adjustment:
function removeLabelFromMessages_(messageIds, labelClass) {
const labelId = labelClass.getId();
const resource = {
ids: messageIds,
// addLabelIds: [ ... ],
removeLabelIds: [ labelId ]
};
// https://developers.google.com/gmail/api/v1/reference/users/messages/batchModify
Gmail.Users.Messages.batchModify(resource, "me");
}
Result:
function foo() {
const myLabel = /* get the Label somehow */;
const ids = getMessageIdsWithLabel_(myLabel);
ids.forEach(function (messageId) {
var msg = GmailApp.getMessageById(messageId);
/* do stuff with the message */
});
removeLabelFromMessages_(ids, myLabel);
}
Recommended Reading:
- Advanced Services
- Gmail Service
Messages#list
Message#batchModify
- Partial responses aka the 'fields' parameter
Tracked Processing
You could also store each message ID somewhere, and use the stored IDs to check if you've already processed a given message. The message Ids are unique.
This example uses a native JavaScript object for extremely fast lookups (vs. simply storing the ids in an array and needing to use Array#indexOf
). To maintain the processed ids between script execution, it uses a sheet on either the active workbook, or a workbook of your choosing:
var MSG_HIST_NAME = "___processedMessages";
function getProcessedMessages(wb) {
// Read from a sheet on the given spreadsheet.
if (!wb) wb = SpreadsheetApp.getActive();
const sheet = wb.getSheetByName(MSG_HIST_NAME)
if (!sheet) {
try { wb.insertSheet(MSG_HIST_NAME).hideSheet(); }
catch (e) { }
// By definition, no processed messages.
return {};
}
const vals = sheet.getSheetValues(1, 1, sheet.getLastRow(), 1);
return vals.reduce(function (acc, row) {
// acc is our "accumulator", and row is an array with a single message id.
acc[ row[0] ] = true;
return acc;
}, {});
}
function setProcessedMessages(msgObject, wb) {
if (!wb) wb = SpreadsheetApp.getActive();
if (!msgObject) return;
var sheet = wb.getSheetByName(MSG_HIST_NAME);
if (!sheet) {
sheet = wb.insertSheet(MSG_HIST_NAME);
if (!sheet)
throw new Error("Unable to make sheet for storing data");
try { sheet.hideSheet(); }
catch (e) { }
}
// Convert the object into a serializable 2D array. Assumes we only care
// about the keys of the object, and not the values.
const data = Object.keys(msgObject).map(function (msgId) { return [msgId]; });
if (data.length) {
sheet.getDataRange().clearContent();
SpreadsheetApp.flush();
sheet.getRange(1, 1, data.length, data[0].length).setValues(data);
}
}
Usage would be something like:
function foo() {
const myLabel = /* get label somehow */;
const processed = getProcessedMessages();
myLabel.getThreads().forEach(function (thread) {
thread.getMessages().forEach(function (msg) {
var msgId = msg.getId();
if (processed[msgId])
return; // nothing to do for this message.
processed[msgId] = true;
// do stuff with this message
});
// do more stuff with the thread
});
setProcessedMessages(processed);
// do other stuff
}
Recommended Reading:
- Is checking an object for a key more efficient than searching an array for a string?
Array#reduce
Array#map
Array#forEach