Google Apps Script for creating normalised Data fr

2019-09-23 18:37发布

In a Google Sheet, I have 3 raw data worksheets within one google spreadsheet ('Category1'),('Category2'),('Category3'). These worksheets are constantly updated by people in my business but unfortunately the data isn't in a normailised form to be able to run efficient queries.

I would like to create a script that automatically generates a normailised Output ('Category1 Output'),('Category2 Output'),('Category3 Output') of this raw information that automatically updates itself when someone makes a change in the raw tabs.

In the google sheet below, I have provided an example of what one Category needs to look like. 'Category1' worksheet is the raw that is constantly updated by everyone. 'Category1Output' is the final output worksheet that automatically updates itself when an edit is made in the 'Category1' worksheet.

Google Sheet Link

1条回答
淡お忘
2楼-- · 2019-09-23 19:06

The Questioner has three sheets in a defined columnar layout - essentially several rows per data set and several columns (one for each time period). These sheets are not being replaced, but a summarised version was sought where filtering could be used to focus effectively on relevant data. Thus, each sheet to be translated from columnar format to row-wise format.

The process itself is straightforward. The source data comprised 64 products @ 8 data rows per product. The output records were @1,350.

The questioner's code was hung up on the conversion of data to the output format. The use of 8 rows of data per product is important, and the code includes a check that the quotient the total number of rows of data divided by eight is an integer. In addition, the names of the Source sheet and the Output sheet are called by name (getSheetByName) so that the code can be easily applied to any named Input Sheet and any named Output sheet. The only proviso is that both sheets must exists beforehand.

Initial resolution of the questioner's code hiccup was successful and using the methodology of getDataRange and getValues before the loop greatly improved performance. There are two loops; one with a vertical orientation, moving through the rows of data; and the second with a horizontal orientation moving through the time-related columns. However performance was initially very inefficient and the code was timing out before completion.

I modified the code to build a single 2D array and save it to the Output sheet just once at the end of the loops. This had a dramatic effect on performance. Total time to complete dropped from several minutes to less than 5 seconds.


function so5243560403() {

    // Build a clean Output Sheet
    var ss = SpreadsheetApp.getActiveSpreadsheet();
    var SourceSheet = ss.getSheetByName("Category2");
    var Target = ss.getSheetByName("Category3Output");

    // Various variables
    var SourceHeaderRow = 9;
    var RowsPerProduct = 8;
    var ProductLengthTruncate = 11;
    var SourceArray = [];
    var i = 0;
    var w = 0;

    // get the bottom of the column
    var ColAvals = SourceSheet.getRange("A" + (SourceHeaderRow + 1) + ":A").getValues();
    var ColAlast = ColAvals.filter(String).length;
    //Logger.log("Last row in column A with data"+ColAlast);  //DEBUG
    var NumberofProducts = ColAlast / RowsPerProduct;
    var lastRow = SourceSheet.getLastRow();

    // Count the products and confirm eight rows each
    var prodtest = isInt1(NumberofProducts);
    if (!prodtest) {
        // Logger.log("NOT an integer!");
        SpreadsheetApp.getUi().alert("Number of Rows divided by rows by Product isn't an integer");
        return false;
    }

    // Get data to clear Target ready for new data
    var TargetlastRow = Target.getLastRow();
    var TargetlastColumn = Target.getLastColumn();
    // clear the content before re-building
    Target.getRange(2, 1, TargetlastRow, TargetlastColumn).clear({
        contentsOnly: true
    });

    // Get ALL the data on the SourceSheet
    var SourceRange = SourceSheet.getDataRange();
    var SourceValues = SourceRange.getValues();

    // create loop for rows of data; first row of data in array=9
    for (i = SourceHeaderRow; i < (SourceHeaderRow + ColAlast); i = i + 8) {

        // create loop for weeks (Week 1=Col5, Week 2=Col6... Week 52=Col56, etc) (actual column numbers are +1)
        for (w = 1; w < 53; w++) {

            // Test to see whether there's a value for Display; the only field ALWAYS populated
            if (SourceValues[i + 1][w + 4]) {

                // Get Product and data fields
                var Prodlen = SourceValues[i][3].length;
                var prodedit = SourceValues[i][3].substring(11, (SourceValues[i][3].length));
                var product = prodedit.trim();
                var catalogue = SourceValues[i][w + 4];
                var display = SourceValues[i + 1][w + 4];
                var ESP = SourceValues[i + 3][w + 4];
                var mechanic = SourceValues[i + 6][w + 4];
                var join1 = product+" | "+display+" | "+mechanic;
                var join2 = display+" | "+product+" | "+mechanic;
                // Start building an array
                SourceArray.push([w, product, catalogue, display, ESP, mechanic,join1,join2]);

            } // end if data exists - process this week

        } // end w - this week loop

    } // end i - this row loop 


    // Copy the data from the array to the Target sheet

    // count number of rows
    var SourceArraylen = SourceArray.length;

    // first row is #2, allowing for header row
    // first column = A
    // number of rows = length of array
    // number of columns = 6 (the fields puched to the array
    var TargetRange = Target.getRange(2, 1, SourceArraylen, 8);

    // set the array values on the Target sheet
    TargetRange.setValues(SourceArray);
}

function isInt1(value) {
    return !isNaN(value) && parseInt(Number(value)) == value && !isNaN(parseInt(value, 10));
}

UPDATE

The second element of the Questioner's code deals with updating data to an "Output Sheet" as changes are made to "Category" sheets. The questioner's code for the update was OK, but was missing the translation of the source range on the Category Sheet to establishing the target range on the Output sheet.

The solution involves a rubric based on mathematical number sequence. In this case, the mathematical sequence is the row numbers for products on the source sheet; each product occupies 8 rows, and the first row is #10, so the sequence is 10,18,26,34....

onEdit returns the range of the changed cell and getRow and getColumn can be used to establish the co-ordinates of the changed cell. The challenge is, knowing the actual row number that was changed, to establish what number in the sequence of rows (and hence the product name) that the actual row number represents. It is also extremely unlikely (eight to one) that the changed row will coincide with the first row for a product number.

So it is necessary to apply the algorithm for mathematical sequences - twice. The formula to determine the nth number in a sequence of numbers is An = A1 + (D x(n-1)), where A1 is number for the first row (in our case, 10), D= the difference between each number in the sequence (in our case, 8), and n= the number in the sequence (in our case, the changed row number).

The first pass is to establish the position number in the sequence of numbers(product groups) represented by the actual changed row. It is very likely that the outcome is not an integer, i.e. it does not coincide with the first row for a Product group. So, the result is rounded down to the nearest integer, and the algorithm is processed a second time.

However this time we know the position of the sequence number, and we solve to find the value of the number. In this case, the formula is ((An-A1)/D)+1. This will return the row number in the Source sheet corresponding to the first row for the relevant Product group. We use this to identify what type of field was changed (Category, Display, etc).

The column number indicates the week number. Week 1 starts in Column F, so get column enables us to establish whether the change took place in a week column (or whether it was to left of Column F). If to the left, then "not my problem", if in F or higher, then it needs to be noted.

Lastly, we do a getRangeValue for the Target sheet and look for a match of the week number in Column A AND the truncated Product name in column B. This provides the co-ordinates to setValue for the new value tracked from OnEdit.


 function OnEdit(e) {

    // Update relevant Outputsheets on changes in Category sheets

    var ss = SpreadsheetApp.getActiveSpreadsheet();

    // Establish variables

    var s1 = "Category1";
    var s2 = "Category2";
    var s3 = "Category3";
    var tsuffix = "Output";
    //Logger.log("Sheet information");//DEBUG
    //Logger.log("The sheets to track are s1= "+s1+", s2 = "+s2+", and s3 = "+s3+", and the Output suffix is "+tsuffix+". For example s1output = "+s1+tsuffix);// DEBUG

    var TargetSheet = "";
    var weekscolumnstart = 6; // Column F
    var startrow = 10; // applies to the Source sheet
    var rowsperProduct = 8; // applies to the source sheet
    var changedfield = 0;
    var changedfieldname = "";
    var n = 0;

    // Collect data from the event
    var range = e.range;
    var oldValue = e.oldValue;
    var value = e.value
    var source = e.source;
    var sheet = source.getActiveSheet();
    var ssname = sheet.getName();
    // Logger.log("Range: "+range.getA1Notation()+", old value = "+oldValue+", new value = "+value+", source = "+source+", ss = "+sheet+", sheet name = "+ssname); //DEBUG

    // get the co-ordinates of the change
    var SourceRow = range.getRow();
    var SourceColumn = range.getColumn();
    // Logger.log("the Column is "+SourceColumn+", and the Row is "+SourceRow);// DEBUG


    // the weeks range to the right, from column F (va = weekscolumnstart). So by knowing the column number of the even, we can calculate the week number that applied to the change.
    var weeknumber = (SourceColumn - weekscolumnstart + 1);

    switch (ssname) { // the field references are used in a GetValue statement where the column is a reference to a specific column 
        case s1:
            TargetSheet = s1 + tsuffix;
            //Logger.log("The Source sheet was "+ssname+", so the Target sheet is "+TargetSheet);// DEBUG
            break;
        case s2:
            TargetSheet = s2 + tsuffix;
            //Logger.log("The Source sheet was "+ssname+", so the Target sheet is "+TargetSheet);// DEBUG
            break;
        case s3:
            TargetSheet = s3 + tsuffix;
            //Logger.log("The Source sheet was "+ssname+", so the Target sheet is "+TargetSheet);// DEBUG
            break;
        default:
            TargetSheet = 0;
            //Logger.log("The change was made in a sheet that we don't need to track.");
    } // end switch


    // get product and other change information if the change is on a tracked sheet and in a relevant column.
    // evalue for the event on a non-relevant sheet or in a non-relevant column
    if (TargetSheet == 0 || weeknumber <= 0) {
        // do nothing 
    } else {
        //Logger.log("before calculating line number; the TargetSheet is "+TargetSheet);
        // The source has eight rows of data per Product; there is no predictability about which one of the eight will be chnaged for a given product.
        // However the sequence of all the rows follows a mathenmatical sequence, so by knowing the row, it is possible to determine the product grouping
        // And by knowing the product grouping, it is possible to determine the first row of the product group.
        // 
        // The formula for the position of a number n a mathematical sequence is: = an=a1+d(n-1)
        // where an = the "nth" number in the sequence (equates to the nth Product); a1 = the start row (var=startrow); d = difference between each group (var=rowsperProduct) and n=the actual row number.
        // In the first instance we know the row number from the event data, so we work backwards to solve for the position of that number in the sequence.
        // 
        // 1) calculate the starting row for this product
        // 2) (Row number - starting row) divided by rowsperProduct) plus one.
        // 3) There's only a one-in eight chance that it is an integer, so round down to get first row of this product sequence
        // 4) Then we work forwards; since we know the nth number, we can calculate the row number for the first row for that product.
        // 5) starting row plus (rowsperproduct x (seqwuence number minus 1))
        // By knowing the first row in the product sequence, and the row number that was chnaged, we can calculate which data set was chnaged.
        var productseq = (((SourceRow - startrow) / rowsperProduct) + 1);
        var productseqround = Math.floor(productseq);
        var productline = (startrow + (rowsperProduct * (productseqround - 1)));
        //Logger.log("the row number is "+SourceRow+", but the sequence number for this product is "+productseqround+", and the startrow for this product group = "+productline); //DEBUG

        // identify the field that has changed
        // Source Row number less Productline 
        // if 0 = Catalogue
        // if 1= Display
        // if 3 = ESP
        // if 6 = Mechanic

        var LineNumber = (SourceRow - productline);
        //Logger.log("the calculated Line number = "+LineNumber); //DEBUG

        switch (LineNumber) { // the field references are used in a GetValue statement where the column is a reference to a specific column 
            case 0:
                changedfield = 3;
                changedfieldname = "Catalogue";
                //Logger.log("the changed field was "+changedfieldname); // DEBUG
                break;
            case 1:
                changedfield = 4;
                changedfieldname = "Display";
                //Logger.log("the changed field was "+changedfieldname); // DEBUG
                break;
            case 3:
                changedfield = 5;
                changedfieldname = "ESP";
                //Logger.log("the changed field was "+changedfieldname); // DEBUG
                break;
            case 6:
                changedfield = 6;
                changedfieldname = "Mechanic";
                //Logger.log("the changed field was "+changedfieldname); // DEBUG
                break;
            default:
                //Logger.log("the changed field was none of the above");
                changedfield = 0;
        } //end switch

    } //end if


    // OK, let's get this party started..
    // evaluate the sheet
    if (TargetSheet == 0) {
        //Logger.log("Do nothing because it's not on a sheet we need to worry about"); //DEBUG
    }
    // evaluate the week applying to the change
    else if (weeknumber <= 0) {
        //Logger.log("whatever was changed wasn't one of the key fields"); //DEBUG
    }
    //evaluate the changed field
    else if (changedfield == 0) {
        //Logger.log("Do nothing because it's not a field that we're not worried about"); //DEBUG
    }
    // looks OK to go ahead  
    else {
        //Logger.log("the field was changed for week# "+weeknumber+", lets find the product");

        // trim the Product Code for searching on the Output Sheet  
        var LongProdName = sheet.getRange(productline, 4).getValue();
        var Prodedit = LongProdName.substring(11, (LongProdName.length));
        var ShortProdName = Prodedit.trim();
        //Logger.log("the Product Name is "+LongProdName+", shortened to: "+ShortProdName);// DEBUG

        // test for existence of the TargetSheet  
        var sheeterror = 1; // use this variable as the canary in the coal mine. Set to 1, prima facie error
        var target = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(TargetSheet);
        if (target != null) { // test for exitentce of target; 
            sheeterror = 0; // if target sheet exists, then set sheeterror to zero; that is, sound the all clear
        }
        if (sheeterror != 0) { // now test for a sheet erorr; anything other than zero means there is a problem
            SpreadsheetApp.getUi().alert("WARNING#1: \n\n The Output Sheet: " + TargetSheet + " does NOT exist.\n\n Product: " + LongProdName + ", \nWeek: " + weeknumber + ",\nField: " + changedfieldname + ", \nold value = " + oldValue + " \n new value = " + value + ".\n Date: " + (new Date()));
            // Logger.log("ERROR: The Outout sheet:" + TargetSheet + " doesn't exist. Data changed on sheet:" + ssname + ", Product: " + LongProdName + ", Week# " + weeknumber + ", Field: " + changedfieldname + ", old value=" + oldValue + ", new value=" + value + ", Date " + (new Date())); //DEBUG
            return false;
        }


        // set the data range for the Output sheet and get the data
        var TargetRange = target.getDataRange();
        var TargetValues = TargetRange.getValues();

        // setup the search string
        // Logger.log("target Range = "+TargetRange.getA1Notation()+", search string = '"+ShortProdName+"', week# is "+weeknumber);  // DEBUG
        // Logger.log("TargetValues length = "+TargetValues.length);
        // so lets find a match
        var outputmatch = 1; // use this variable as the canary in the coal mine for not finding a match. Set to 1 = prima facie error
        for (n = 0; n < TargetValues.length; ++n) {
            // iterate row by row and match the week (Column A) and Name (Column B)
            //Logger.log("n = "+n+", product = "+ShortProdName+", week = "+weeknumber);
            if (TargetValues[n][1] == ShortProdName && TargetValues[n][0] == weeknumber) {

                // when we find the result (row number), add plus one to accout for the array starting at zero.
                var result = n + 1;
                // Logger.log("Found a match, n = "+result);  //DEBUG

                // create the co-ordinates for the output cell
                // row number = result, column = chnagedfield calculated earlier
                // Logger.log("update range: row = "+result+", column = "+changedfield); //DEBUG
                var updatecell = target.getRange(result, changedfield);
                //Logger.log("The update cell is "+updatecell.getA1Notation());  // DEBUG

                // Update the cell for the new value
                updatecell.setValue(value);

                // Fix values for Display/Mechanic if they were updated
                if (changedfieldname == "Display") {
                    var displayvalue = value;
                } else {
                    var displayvalue = TargetValues[n][3];
                }
                if (changedfieldname == "Mechanic") {
                    var mechanicvalue = value;
                } else {
                    var mechanicvalue = TargetValues[n][5];
                }

                // define the join1 parameters  
                var join1 = TargetValues[n][1] + " | " + displayvalue + " | " + mechanicvalue; // Bundle, Display, Mechanic
                // set the range for join 1
                var updatejoin1 = target.getRange(result, 7);
                // update join1
                updatejoin1.setValue(join1);

                // define the join2 parameters  
                var join2 = displayvalue + " | " + TargetValues[n][1] + " | " + mechanicvalue; // Display, Bundle, Mechanic
                // set the range for join 2
                var updatejoin2 = target.getRange(result, 8);
                // update join2
                updatejoin2.setValue(join2);

                // the outputmatch value to zero 
                outputmatch = 0;
                //Logger.log("The update cell is "+updatecell.getA1Notation()+", and the new value is "+ value); //DEBUG
                //Logger.log("SUMMARY: Data changed on sheet:" + ssname + ", saved to Output sheet:" + TargetSheet + ", range: " + range.getA1Notation() + ", Product: " + LongProdName + ", Week# " + weeknumber + ", Field: " + changedfieldname + ", old value=" + oldValue + ", new value=" + value + ", Date " + (new Date())); //DEBUG
                return false;

            }

        } // end for n
        if (outputmatch != 0) { // now test for a faliure to update the output sheet; anything other than zero means there is a problem
            // create an error message if we were unable to find a match and could not update the output sheet field
            SpreadsheetApp.getUi().alert("WARNING#2: There was an unidentified problem.\n\n Output Sheet: " + TargetSheet + " does NOT appear to have been updated.\n\n Product: " + LongProdName + ", \nWeek: " + weeknumber + ",\nField: " + changedfieldname + ", \nold value = " + oldValue + " \n new value = " + value + ".\n Date: " + (new Date()));
            return false;
        }
    } // end if

}
查看更多
登录 后发表回答