Are Guids unique when using a U-SQL Extractor?

2019-08-21 05:40发布

As these questions point out, Guid.NewGuid will return the same value for all rows due to the enforced deterministic nature of U-SQL i.e if it's scaled out if an element (vertex) needs retrying then it should return the same value....

Guid.NewGuid() always return same Guid for all rows

auto_increment in U-SQL

However.... the code example in the officials documentation for a User Defined Extractor purposefully uses Guid.NewGuid().

I'm not querying the validity of the answers for the questions above, as they are from an authoritative source (the programme manager for u-sql, so very authoritative!). However, what I'm wondering if the action of using an Extractor means NewGuid can be used as normal? Is it simply within c# expressions in u-sql and User Defined Functions in which NewGuid is unsafe?

[SqlUserDefinedExtractor(AtomicFileProcessing = true)]
public class FullDescriptionExtractor : IExtractor
{
     private Encoding _encoding;
     private byte[] _row_delim;
     private char _col_delim;

    public FullDescriptionExtractor(Encoding encoding, string row_delim = "\r\n", char col_delim = '\t')
    {
         this._encoding = ((encoding == null) ? Encoding.UTF8 : encoding);
         this._row_delim = this._encoding.GetBytes(row_delim);
         this._col_delim = col_delim;

    }

    public override IEnumerable<IRow> Extract(IUnstructuredReader input, IUpdatableRow output)
    {
         string line;
         //Read the input line by line
         foreach (Stream current in input.Split(_encoding.GetBytes("\r\n")))
         {
        using (System.IO.StreamReader streamReader = new StreamReader(current, this._encoding))
         {
             line = streamReader.ReadToEnd().Trim();
             //Split the input by the column delimiter
             string[] parts = line.Split(this._col_delim);
             int count = 0; // start with first column
             foreach (string part in parts)
             {
    if (count == 0)
             {  // for column “guid”, re-generated guid
                 Guid new_guid = Guid.NewGuid();
                 output.Set<Guid>(count, new_guid);
             }
             else if (count == 2)
             {
                 // for column “user”, convert to UPPER case
                 output.Set<string>(count, part.ToUpper());

             }
             else
             {
                 // keep the rest of the columns as-is
                 output.Set<string>(count, part);
             }
             count += 1;
             }

         }
         yield return output.AsReadOnly();
         }
         yield break;
     }
}

https://docs.microsoft.com/en-us/azure/data-lake-analytics/data-lake-analytics-u-sql-programmability-guide#use-user-defined-extractors

0条回答
登录 后发表回答