Google App Script Regex exec() returns nul

2019-09-16 03:45发布

问题:

I am writing a Google Apps script to create a calendar event based on automated emails I receive for jobs. I am using regex expressions to extract information that I need to populate the event in Google Calendar. So far, I have everything functioning as expected except for one function, getEndTime(), which should find the end time of the job, but presently returns null any time it's called. All of my other functions using exec() work fine.

I have read many other questions regarding exec() returning null and have fixed common issues, such as removing the 'g' tag and resetting the lastIndex to 0 before calling exec(). I have also checked my regex expression using regex101.com with the Javascript option, which shows the match that I expect for my text.

My regex expression that works on regex101, but not in my code is:

/(Substitute\s+Report\s+Times:\s+[0-9_ ]*:[0-9_ ]*\s+[A-Z_ ]*\s+-\s+)([0-9_ ]*:[0-9_ ]*\s+(AM|PM))(\r|\n)/

My code is:

function findJobs() {
//Searches Gmail for substitute jobs and creates an event on the calendar

  //Gets emails with 'NewJobs' label
  var label = GmailApp.getUserLabelByName("NewJobs");
  var threads = label.getThreads();
  for (var i = 0; i < threads.length; i++){

    var messages = threads[i].getMessages();
    Logger.log("Thread " + i);

    for (var j = 0; j < messages.length; j++) {
      Logger.log("Message " + j);

      //gets email body in plain text
      var body = messages[j].getPlainBody();
      Logger.log("Getting body..." + j);

      //gets school name
      var school = getSchool(body);
      Logger.log(school);

      //gets start time
      var starttime = getStartTime(body);
      Logger.log(starttime);

      //gets end time
      var endtime = getEndTime(body);
      Logger.log(endtime);

      //gets teacher name
      var teacher = getTeacher(body);
      Logger.log(teacher);

      //gets school address
      var address = getLocation(body);
      Logger.log(address);

      //gets date
      var startdate = getDate(body);
      Logger.log(startdate);

      CalendarApp.getDefaultCalendar().createEvent("Subbing - " + school, new Date(startdate + " " + starttime), new Date(startdate + " " + endtime), {location: address, description: teacher});
      //threads[j].removeLabel(label);
    }
  }
  Logger.log("--Done--");
}

function getSchool(text){
  //Gets the school name from an assignment email

  //Regular expression for school name
  var regex = /(School\s+:\s+)([a-zA-Z0-9_ ]*)(\r|\n)/;
  regex.lastIndex = 0;
  var match = regex.exec(text)[2];

  return match;
}

function getDate(text){
  //Gets the start date from an assignment email

  //Regular expression for start date
  var regex = /(Date:\s+)([0-9_ ]*\/[0-9_ ]*\/[0-9_ ]*)(\r|\n)/;
  regex.lastIndex = 0;
  var match = regex.exec(text)[2];

  return match;
}

function getStartTime(text){
  //Gets the start time from an assignment email

  //Regular expression for start time
  var regex = /(Substitute\s+Report\s+Times:\s+)([0-9_ ]*:[0-9_ ]*\s+(AM|PM))/;
  regex.lastIndex = 0;
  var match = regex.exec(text)[2];

  return match;
}

function getEndTime(text){
  //Gets the end time from an assignment email

  //Regular expression for end time
  var regex = /(Substitute\s+Report\s+Times:\s+[0-9_ ]*:[0-9_ ]*\s+[A-Z_ ]*\s+-\s+)([0-9_ ]*:[0-9_ ]*\s+(AM|PM))(\r|\n)/;
  regex.lastIndex = 0;
  Logger.log("End Time reset index...");
  var match = regex.exec(text)[2];
  Logger.log("End Time exec...");

  return match;
}

function getTeacher(text){
  //Gets the teacher name from an assignment email

  //Regular expression for teacher name
  var regex = /(Teacher\s+:\s+)([a-zA-Z0-9_ ]*,[a-zA-Z0-9_ ]*)(\r|\n)/;
  regex.lastIndex = 0;
  var match = regex.exec(text)[2];

  return match;
}

function getLocation(text){
  //Gets the location from an assignment email

  //Regular expression for location
  var regex = /(Address:\s+)(.*)(\r|\n)/;
  regex.lastIndex = 0;
  var match = regex.exec(text)[2];

  return match;
}

Here is an typical email I receive:

You have been assigned as a substitute for a job starting on 9/21/2017.
 The following are the details of the job:
*************
 Job Summary
*************
Starting On                : 9/21/2017
School                     : School Site
Title                      : Pre School Teacher
Teacher                    : Name, Teacher
Substitute                 : Name, Substitute
Confirmation #             : 123456

**********
 Job Days
**********
School

---------------------------------------
School Site
Date: 9/21/2017
Employee Times: 8:00 AM    - 3:30 PM
Substitute Report Times: 8:00 AM    - 3:30 PM

***********************************
School Contact Information
***********************************
School Site
-----------------------------------------------------------
Address: 123 Main Ave    Anytown , USA 555555
Phone: 5555555555
-----------------------------------------------------------
**********************
 Special Instructions
**********************



Please do not reply to this system generated message. If you need help or have additional questions, please send an email to abc@abc.com

Thank you for using the substitute assignment system. Powered by Aesop

回答1:

The pattern you're using seems overly complicated. I can't say for sure what's causing it to fail, but my guess would be the (\r|\n) at the end (note that you can just type [\r\n] instead if you really want to do this).

Give this pattern a try:

Substitute Report Times:.+ - (\d{1,2}:\d{1,2} [AP]M)

This assumes that the end time is always preceded by a hyphen and a space, which looks to be the case from the sample text you provided.