I have gone through the Insert 2 million rows into SQL Server quickly link and found that I can do this by using Bulk insert. So I am trying to create the datatable (code as below), but as this is a huge file (more than 300K row) I am getting an OutOfMemoryEexception
in my code:
string line;
DataTable data = new DataTable();
string[] columns = null;
bool isInserted = false;
using (TextReader tr = new StreamReader(_fileName, Encoding.Default))
{
if (columns == null)
{
line = tr.ReadLine();
columns = line.Split(',');
}
for (int iColCount = 0; iColCount < columns.Count(); iColCount++)
{
data.Columns.Add("Column" + iColCount, typeof(string));
}
string[] columnVal;
while ((line = tr.ReadLine()) != null)
{
columnVal = line.Split(','); // OutOfMemoryException throwing in this line
data.Rows.Add(columnVal);
}
}
after long work I modified my code to as below but then also I am getting OutOfMemoryException at the time of adding rows into datatable
DataTable data = new DataTable();
string[] columns = null;
var line = string.Empty;
using (TextReader tr = new StreamReader(_fileName, Encoding.Default))
{
if (columns == null)
{
line = tr.ReadLine();
columns = line.Split(',');
}
for (int iColCount = 0; iColCount < columns.Count(); iColCount++)
{
data.Columns.Add("Column" + iColCount, typeof(string));
}
}
// Split the rows in 20000 rows in different list
var _fileList = File.ReadLines(_fileName, Encoding.Default).ToList();
var splitChunks = new List<List<string>>();
splitChunks = SplitFile(_fileList, 20000);
Parallel.ForEach(splitChunks, lstChunks =>
{
foreach (var rows in lstChunks)
{
string[] lineFields = rows.Split(',');
DataRow row = datatbl.NewRow();
for (int iCount = 0; iCount < lineFields.Count(); iCount++)
{
row[iCount] = lineFields[iCount] == string.Empty ? "" : lineFields[iCount].ToString();
}
datatbl.Rows.Add(row);
}
});
I can do the bulk insert for next level as the below code:
SqlConnection SqlConnectionObj = GetSQLConnection();
SqlBulkCopy bulkCopy = new SqlBulkCopy(SqlConnectionObj, SqlBulkCopyOptions.TableLock | SqlBulkCopyOptions.FireTriggers | SqlBulkCopyOptions.UseInternalTransaction, null);
bulkCopy.DestinationTableName = "TempTable";
bulkCopy.WriteToServer(data);
File contains below kind of data
4714,1370,AUSRICHTEN MASCHINELL
4870,1370,PLATTE STECKEN
0153,1900,CAULK GUN
0154,1900,NEW TERMINATOR
0360,1470,MU 186 MACCH. X LAV. S/A ASTE PS174
9113-H22,1970,MC DRILL BITS
Code need to convert this into 6 rows and 3 columns.
Is there any faster way to achieve the above functionality to read the file and create the datatable for bulk insert? So that I should not get memory out of index exception.
Thanks in advance.
The reason you are getting OutOfMemoryException
is because you are
creating an in memory data table and are trying to insert 300K rows
into it
This is a lot of data to put in memory.
Instead of this you should do is every certain amount of rows you read from the text file - you need to insert it into the database.
How you do it is up to you, you can use SQL, or bulk copy - but keep in mind, you can't read the entire text file and keep it in memory, so do it in chunks.
Solution with SqlBulkCopy.WriteToServer and IDataReader. I'm using CSV, but I hope it will easy to modify for other types. SqlBulkCopy
uses just 3 things from IDateReader
and we have to implement them:
public int FieldCount {get; }
public bool Read()
public object GetValue(int i)
All other properties and methods can be unimplemented. Interesting paper about SqlBulkCopy. Full code: https://dotnetfiddle.net/giG3Ai. Here is with cutted version:
namespace SqlBulkCopy
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Diagnostics;
using System.Data;
using System.Data.SqlClient;
public class CsvReader : IDataReader
{
private readonly char CSV_DELIMITER = ',';
private readonly StreamReader _sr;
private readonly Dictionary<string, Func<string, object>> _csv2SqlType;
private readonly string[] _headers;
private string _line;
private string[] _values;
public int FieldCount { get { return _headers.Length; } }
public CsvReader(string filePath, Dictionary<string, Func<string, object>> csvColumn2SqlTypeDict)
{
if (string.IsNullOrEmpty(filePath))
throw new ArgumentException("is null or empty", "filePath");
if (!System.IO.File.Exists(filePath))
throw new IOException(string.Format("{0} doesn't exist or access denied", filePath));
if (csvColumn2SqlTypeDict == null)
throw new ArgumentNullException("csvColumn2SqlTypeDict");
_sr = new StreamReader(filePath);
_csv2SqlType = csvColumn2SqlTypeDict;
_headers = ReadHeaders();
ValidateHeaders();
}
public object GetValue(int i)
{
// Get column value
var colValue = _values[i];
// Get column name
var colName = _headers[i];
// Try to convert to SQL type
try { return _csv2SqlType[colName](colValue); }
catch { return null; }
}
public bool Read()
{
if (_sr.EndOfStream) return false;
_line = _sr.ReadLine();
_values = _line.Split(CSV_DELIMITER);
// If row is invalid, go to next row
if (_values.Length != _headers.Length)
return Read();
return true;
}
public void Dispose()
{
_sr.Dispose();
}
private void ValidateHeaders()
{
if (_headers.Length != _csv2SqlType.Keys.Count)
throw new InvalidOperationException(string.Format("Read {0} columns, but csv2SqlTypeDict contains {1} columns", _headers.Length, _csv2SqlType.Keys));
foreach (var column in _headers)
{
if (!_csv2SqlType.ContainsKey(column))
throw new InvalidOperationException(string.Format("There is no convertor for column '{0}'", column));
}
}
private string[] ReadHeaders()
{
var headerLine = _sr.ReadLine();
if (string.IsNullOrEmpty(headerLine))
throw new InvalidDataException("There is no header in CSV!");
var headers = headerLine.Split(CSV_DELIMITER);
if (headers.Length == 0)
throw new InvalidDataException("There is no header in CSV after Split!");
return headers;
}
}
public class Program
{
public static void Main(string[] args)
{
// Converter from CSV columns to SQL columns
var csvColumn2SqlTypeDict = new Dictionary<string, Func<string, object>>
{
{ "int", (s) => Convert.ToInt32(s) },
{ "str", (s) => s },
{ "double", (s) => Convert.ToDouble(s) },
{ "date", (s) => Convert.ToDateTime(s) },
};
Stopwatch sw = Stopwatch.StartNew();
try
{
// example.csv
/***
int,str,double,date
1,abcd,2.5,15.04.2002
2,dab,2.7,15.04.2007
3,daqqb,4.7,14.04.2007
***/
using (var csvReader = new CsvReader("example.csv", csvColumn2SqlTypeDict))
{
// TODO!!! Modify to your Connection string
var cs = @"Server=localhost\SQLEXPRESS;initial catalog=TestDb;Integrated Security=true";
using (var loader = new SqlBulkCopy(cs, SqlBulkCopyOptions.Default))
{
// TODO Modify to your Destination table
loader.DestinationTableName = "Test";
// Write from csvReader to database
loader.WriteToServer(csvReader);
}
}
}
catch(Exception ex)
{
Console.WriteLine("Got an exception: {0}", ex);
Console.WriteLine("Press 'Enter' to quit");
Console.ReadLine();
return;
}
finally { sw.Stop(); }
Console.WriteLine("Data has been written in {0}", sw.Elapsed);
Console.WriteLine("Press 'Enter' to quit");
Console.ReadLine();
}
private static void ShowCsv(IDataReader dr)
{
int i = 0;
while (dr.Read())
{
Console.WriteLine("Row# {0}", i);
for (int j = 0; j < dr.FieldCount; j++)
{
Console.WriteLine("{0} => {1}", j, dr.GetValue(j));
}
i++;
}
}
}
}
I found forgetting a DataTable and using plain old SQLClient on a row-by-row basis was quicker. Simpler too. This also beat a streaming SQL function that was supposedly the fastest way to get data in a SQL Server DB.
Try it and measure the speed, see if its fast enough for you. If it isn't you could always try to re-format the file (if necessary) and let SQL Server do the work for you using its Bulk Insert.