How to query and extract data from CSV file direct

2019-09-03 14:46发布

I hardly use LINQ and not good in LINQ though.

I have developed output which display CSR wise and time interval wise data like below

+----------+--------------------+----------+----------+---------------+-------------+
| CSR Name |   Time Interval    | Incoming | OutGoing | Call Transfer | Missed Call |
+----------+--------------------+----------+----------+---------------+-------------+
| ACCTS    | 14:30:01--15:00:00 |        0 |        0 |             0 |           0 |
| ACCTS    | 15:00:01--15:30:00 |        0 |        0 |             1 |           0 |
| ACCTS    | 13:30:01--14:00:00 |        5 |        2 |             0 |           2 |
| ACCTS    | 14:00:01--14:30:00 |        1 |        0 |             0 |           0 |
| ACCTS    | 16:30:01--17:00:00 |        0 |        3 |             0 |           0 |
| ACCTS    | 17:00:01--17:30:00 |        4 |        0 |             1 |           2 |
| Christy  | 14:30:01--15:00:00 |        1 |        5 |             0 |           0 |
| Christy  | 15:00:01--15:30:00 |        2 |        7 |             1 |           0 |
| Christy  | 13:30:01--14:00:00 |        0 |       11 |             0 |           2 |
| SUZY     | 14:30:01--15:00:00 |        1 |        0 |             0 |           0 |
| SUZY     | 15:00:01--15:30:00 |        0 |        0 |             2 |           2 |
| SUZY     | 13:30:01--14:00:00 |        2 |        1 |             0 |           0 |
+----------+--------------------+----------+----------+---------------+-------------+

Here I am giving my C# program code which brings the above output.

private void FetchData(string tableName)
{
    TimeSpan tsStart, tsEnd;
    string strSql = "";
    srcTable = new DataTable();
    srcTable.TableName = "data";
    srcTable.Columns.Add("CSR Name");
    srcTable.Columns.Add("Time");
    srcTable.Columns.Add("Incoming Calls");
    srcTable.Columns.Add("Outgoing Calls");
    srcTable.Columns.Add("Calls Transfer");
    srcTable.Columns.Add("Missed Calls");
    DataRow dr = null;
    int incall = 0, outcall = 0, transfercall = 0, totmisscall = 0;
    bool flag = true;
    string StartTime = "", EndTime = "";

    string DayOfWeek = DateTime.Parse(dtVal.Value.ToString()).ToString("ddd");
    string st_Time = string.Empty, end_Time = string.Empty;

    st_Time="08:00:00";
    end_Time="17:30:00";

    // GetAllCSR function return datatable with all distinct CSR name excluding name start with VM and Voice Mail
    DataTable dtCSRName = GetAllCSR(tableName,txtCSRName.Text);
    string strCSRName = "";

    if (dtCSRName != null)
    {
        foreach (DataRow row in dtCSRName.Rows)
        {
            if (st_Time.Trim() != "" && end_Time.Trim() != "")
            {
                tsStart = new TimeSpan(DateTime.Parse(st_Time.Trim()).Hour, DateTime.Parse(st_Time.Trim()).Minute, DateTime.Parse(st_Time.Trim()).Second);
                tsEnd = new TimeSpan(DateTime.Parse(end_Time.Trim()).Hour, DateTime.Parse(end_Time.Trim()).Minute, DateTime.Parse(end_Time.Trim()).Second);
            }
            else
            {
                tsStart = new TimeSpan(09, 00, 00);
                tsEnd = new TimeSpan(17, 30, 0);
            }

    // iterate in all CSR name
            if (row["party1name"] != DBNull.Value)
            {
                strCSRName = row["party1name"].ToString();

                // iterate in all time interval like 08:00:00 to 08:30:00, 08:30:00 to 09:00:00, 09:00:00 to 09:30:00.....17:00:00 to 17:30:00
                while (tsStart <= tsEnd)
                {
                    if (!flag)
                    {
                        tsStart = new TimeSpan(tsStart.Hours, tsStart.Minutes, int.Parse("01"));
                    }
                    flag = false;

                    StartTime = tsStart.ToString();
                    tsStart = tsStart.Add(new TimeSpan(00, 30, 00));
                    EndTime = (tsStart.Hours >= 10 ? tsStart.Hours.ToString() : ("0" + tsStart.Hours.ToString())) + ":" + (tsStart.Minutes >= 10 ? tsStart.Minutes.ToString() : ("0" + tsStart.Minutes.ToString())) + ":00";

                    strSql = "select (select count(*) as incoming from " + tableName + " where direction='I' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and Is_Internal=0 and continuation=0 and RIGHT(convert(varchar,[call duration]),8)<> '00:00:00' ";
                    strSql = strSql + "and party1name='" + strCSRName + "') as incoming, ";

                    strSql = strSql + "(select count(*) as OutGoing from " + tableName + " ";
                    strSql = strSql + "where direction='O' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and Is_Internal=0 and continuation=0  and party1name not in ('Voice Mail') ";
                    strSql = strSql + "and party1name='" + strCSRName + "') as OutGoing, ";

                    strSql = strSql + "(select count(*) as CallTransfer from " + tableName + " ";
                    strSql = strSql + "where continuation=1  and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and RIGHT(convert(varchar,[call duration]),8)<> '00:00:00' and party1name not in ('Voice Mail') ";
                    strSql = strSql + "and party1name='" + strCSRName + "') as CallTransfer; ";

                    strSql = strSql + "SELECT count(*) as UnansweredCalls_DuringBusinessHours from ";
                    strSql = strSql + tableName + " where direction='I' and " + Environment.NewLine;
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) >='" + StartTime + "' and ";
                    strSql = strSql + "CONVERT(datetime,right([Call Start],8)) <='" + EndTime + "' ";
                    strSql = strSql + "and RIGHT(convert(varchar,[call duration]),8)= '00:00:00' and [Ring duration]>0 " + Environment.NewLine;
                    strSql = strSql + "and party1name='" + strCSRName + "'" + Environment.NewLine;

                    if (Business.CurrentCountry.CountryCode == "US" || Business.CurrentCountry.CountryCode == "MX" || Business.CurrentCountry.CountryCode == "ES" || Business.CurrentCountry.CountryCode == "NL" || Business.CurrentCountry.CountryCode == "PL")
                    {
                        strSql = strSql + " and Park_Time=0";
                    }

                    strSql = strSql + Environment.NewLine;

                    DataSet oDS = Common.GetDataSet(strSql, "");

                    if (oDS.Tables.Count > 0)
                    {
                        if (oDS.Tables[0].Rows.Count > 0)
                        {
                            dr = srcTable.NewRow();
                            dr[0] = strCSRName;
                            dr[1] = StartTime + "--" + EndTime;
                            if (oDS.Tables[0].Rows[0]["incoming"] != DBNull.Value)
                            {
                                dr[2] = oDS.Tables[0].Rows[0]["incoming"].ToString();
                                incall = incall + int.Parse(oDS.Tables[0].Rows[0]["incoming"].ToString());
                            }
                            else
                            {
                                dr[2] = "0";
                            }

                            if (oDS.Tables[0].Rows[0]["OutGoing"] != DBNull.Value)
                            {
                                dr[3] = oDS.Tables[0].Rows[0]["OutGoing"].ToString();
                                outcall = outcall + int.Parse(oDS.Tables[0].Rows[0]["OutGoing"].ToString());
                            }
                            else
                            {
                                dr[3] = "0";
                            }

                            if (oDS.Tables[0].Rows[0]["CallTransfer"] != DBNull.Value)
                            {
                                dr[4] = oDS.Tables[0].Rows[0]["CallTransfer"].ToString();
                                transfercall = transfercall + int.Parse(oDS.Tables[0].Rows[0]["CallTransfer"].ToString());
                            }
                            else
                            {
                                dr[4] = "0";
                            }
                        }

                        if (oDS.Tables[1].Rows.Count > 0)
                        {
                            if (oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"] != DBNull.Value)
                            {
                                dr[5] = oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"].ToString();
                                totmisscall = totmisscall + int.Parse(oDS.Tables[1].Rows[0]["UnansweredCalls_DuringBusinessHours"].ToString());
                            }
                        }
                        srcTable.Rows.Add(dr);
                    }
                }
            }
        }
    }

    txtIncomming.Text = incall.ToString();
    txtOutGoing.Text = outcall.ToString();
    txtCallTransfer.Text = transfercall.ToString();
    txtMissCall.Text = totmisscall.ToString();
    //srcTable.DefaultView.Sort = "[CSR Name], [Time]";

    DataSet oDs = new DataSet();
    oDs.Tables.Add(srcTable);

    this.outlookGrid1.ExpandIcon = global::BBA.Properties.Resources.Collapse;
    this.outlookGrid1.CollapseIcon = global::BBA.Properties.Resources.Expand;

    if (srcTable.Rows.Count > 0)
    {
        outlookGrid1.BindData(oDs, "data");
        View = "BoundCategory";
        DataGridViewCellEventArgs evt = new DataGridViewCellEventArgs(0, -1);
        object sender = new object();
        dgResult_CellClick(sender, evt);
        outlookGrid1.ExpandAll();
    }
    else
    {
        outlookGrid1.Rows.Clear();
        MessageBox.Show("No data found");
    }
}

So what I am doing? I am reading a CSV file first and then insert CSV file data into database table and later fetch those data with condition and form the actual data in 2 loops.

The above code and process works fine but slow. I was told to use LINQ only to read csf file data and form the output. I am weak though in LINQ so need help from you to form the right linq query to have exact output.

This way I tried so far to form the LINQ query but honestly not being able to arrange it properly. Here is my linq query which need to add more to be full code. Here I am pasting my working linq query which is slow but not throwing error.

void Main()
{
    var csvlines = File.ReadAllLines(@"M:\smdr(backup08-06-2015).csv");
    var csvLinesData = csvlines.Skip(1).Select(l => l.Split(',').ToArray());

    var users = csvLinesData.Select(data => new User
    {
        CSRName = data[12],
        Incomming = csvLinesData.Count(w => w[4] == "I" 
                                        &&  w[8] == "0"
                                        &&  w[10] == "0"
                                        &&  w[1].ToString().Substring(w[1].ToString().Length-8)!="00:00:00"),

        outgoing = csvLinesData.Count(w => w[4] == "O"
                                        && w[8] == "0"
                                        && w[10] == "0"
                                        &&  w[1].ToString().Substring(w[1].ToString().Length-8)!="00:00:00"),

    })
    .Where(u => !u.CSRName.Contains("VM") && u.CSRName != "Voice Mail").OrderBy(u=> u.CSRName)
    .ToList();
    users.Dump();
}

class User
{
    public string CSRName;
    public int outgoing;
    public int Incomming;

}

So my request to all who are expert in LINQ to please have a look at my C# code and help me to compose the full fledged linq query which bring the same output without iterating in loop.

here is my csv file url http://s000.tinyupload.com/?file_id=22225722172003614028 from where anyone can download. just click on test.csv file link if anyone is interested. thanks

标签: c# linq csv
2条回答
Animai°情兽
2楼-- · 2019-09-03 15:11

I don't personally think that Linq is going to be significantly faster than a well formed for loop. (unless you are taking advantage of PLinq) I think the bigger issue is that your data model may require a different design. What I would do is instead create a new class that represents a row and its important details only.

class CallInformation {
    public DateTime CallStart { get; private set; }
    public Boolean IsOutGoing {get; private set; }
    public String CSRName {get; private set; }
    public int InComingCount { get; set; }
    public int OutgoingCount { get; set; }

    public CallInformation(String[] parts) {
        IsOutGoing = parts[4] == "O";
        CallStart = DateTime.Parse(parts[0]);
        CSRName = parts[12];
    }
    //... Continue with the important properties
}

Now a IO Stream may also improve performance however i will exclude that for clarity. It is often much faster to sort and combine your items as you go instead of searching. If you can avoid heavy string comparison/creation you will often greatly improve performance.

From this part of your

Dictionary<String, CallInformation> callDictionary = new Dictionary<String,CallInformation>();
var csvLinesData = csvlines.Skip(1).Select(l => l.Split(',').ToArray());

foreach(string[] parts in csvLinesData) {
     //Then place this call into a sortedlist or Dictionary. 
     //Here i am counting up the incoming and outgoing calls. 
     if(callDictionary.containsKey(parts[12])) {
         if(parts[4] == "I") {
             callDictionary[parts[12]].InComingCount++;
         } else { 
             callDictionary[parts[12]].OutGoingCount++; 
         }
     } else {
         //Construct your new object based on this row.
         CallInformation call = new CallInformation(parts);
         callDictionary.add(call.CSRName, call);
     }
}

The sorted list can be sorted based on the StartTime or other field. the that governs your output. I am personally having trouble wrapping my head around the exact method you are grouping and processing. I personally believe your performance is based on the fact that you are querying your data set for information that will never be present. But please let me know about how you are grouping the information. I think this is more of how you are going to be grouping information.

If you are trying to bucket calls based on their time duration then you can then use the DateTime part to figure out which "bucket" in the Dictionary to place it in, instead of the CSR name i have done above (by adjusting the call.CSRName, and the containsKey[12]) you can instead create Time duration strings. By using the date time object you can make comparisons based on time differences instead of string comparisons.

To pull the data out of this collection you would actually want to use Linq to perform that

a query like

 callDictionary.Where((keyValuePair) => { return keyValuePair.Value.CSRName != "VM" && keyValuePair.Value.CSRName != "VoiceMail"; }).OrderBy(o.Value.StartTime);

The OrderBy clause is really up to you by the looks you are Since we are "bucking" things already we can just sort by the start time, to get finer control you would want the StartTime to be the start of the TimeDuration. of any 1 particular item and it will be approximately right. You could also add another OrderBy to sort it by the CSRName as well.

查看更多
Emotional °昔
3楼-- · 2019-09-03 15:20

Something like the snippet below:

private static IEnumerable<MyData> Parse(String csvFile, bool hasHeaderLine = false, char separator = ',') {
    return Read(csvFile)
        .Skip(hasHeaderLine ? 1 : 0)
        .Select(line => line.Split(separator, StringSplitOptions.None))
        .Select(parts => new MyData(
            parts[0],
            DateTime.Parse(parts[1]),
            TimeSpan.Parse(parts[2]),
            uint.Parse(parts[3]),
            uint.Parse(parts[4]),
            uint.Parse(parts[5]),
            uint.Parse(parts[6])
        ));
}

private static IEnumerable<String> Read(String csvFile) {
    using (var reader = new StreamReader(csvFile)) {
        String line;
        while ((line = reader.ReadLine()) != null)
            yield return line;
    }
}

private class MyData {
    private readonly string name;
    private readonly DateTime startTime;
    private readonly TimeSpan duration;
    private readonly uint incommingCalls, outgoingCalls, callTransfers, missedCalls;

    public MyData(string name, DateTime startTime, TimeSpan duration, uint incommingCalls, uint outgoingCalls, uint callTransfers, uint missedCalls) {
        this.name = name;
        this.startTime = startTime;
        this.duration = duration;
        this.incommingCalls = incommingCalls;
        this.outgoingCalls = outgoingCalls;
        this.callTransfers = callTransfers;
        this.missedCalls = missedCalls;
    }

    public string Name => this.name;
    public DateTime StartTime => this.startTime;
    public TimeSpan Duration => this.duration;
    public uint IncommingCalls => this.incommingCalls;
    public uint OutgoingCalls => this.outgoingCalls;
    public uint CallTransfers => this.callTransfers;
    public uint MissedCalls => this.missedCalls;

    public DateTime EndTime => this.startTime + this.duration;
}
查看更多
登录 后发表回答