Why is Searching Multiple Sheets So Slow?

2019-07-24 17:30发布

I started writing a small wrapper class to take care of my excel operations:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Excel = Microsoft.Office.Interop.Excel;
using System.Reflection; 

namespace CSVReader
{
    class ExcelManager
    {
        // Holds instance of application.
        public Excel.Application application;

        /**
         * Class Constructor.
         */
        public ExcelManager()
        {
            // Create a new application instance.
            application = new Excel.Application();
        }

        /**
         * Helper to open workbooks.
         */
        public void Open(string filename) {
           application.Workbooks.Open(filename, Type.Missing, Type.Missing, Type.Missing, Type.Missing,
                                                Type.Missing, Type.Missing, Type.Missing, Type.Missing,
                                                Type.Missing, Type.Missing, Type.Missing, Type.Missing,
                                                Type.Missing, Type.Missing);
        }

        /**
         */
        public Excel.Range Find(string search)
        {
            Excel.Workbooks books = application.Workbooks;

            Excel.Range currentFind = null;
            Excel.Range firstFind = null;

            // Search all workbooks.
            foreach(Excel.Workbook book in books)
            {
                // Get first sheet.
                Excel.Worksheet sheet = book.Worksheets.get_Item(1);

                // Get all data for sheet.
                Excel.Range firstCell = sheet.Range["A1", Type.Missing];
                Excel.Range lastCell  = sheet.Cells.SpecialCells(Excel.XlCellType.xlCellTypeLastCell, Type.Missing);
                Excel.Range sheetData = sheet.Range[firstCell, lastCell];

                currentFind = sheetData.Find(search, Type.Missing, 
                    Excel.XlFindLookIn.xlValues, Excel.XlLookAt.xlPart,
                    Excel.XlSearchOrder.xlByRows, Excel.XlSearchDirection.xlNext, false,
                    Type.Missing, Type.Missing);

                while (currentFind != null)
                {
                    // Keep track of the first range you find. 
                    if (firstFind == null)
                    {
                        firstFind = currentFind;
                    }

                    // If you didn't move to a new range, you are done.
                    else if (currentFind.get_Address(Type.Missing, Type.Missing, Excel.XlReferenceStyle.xlA1, Type.Missing, Type.Missing)
                          == firstFind.get_Address(Type.Missing, Type.Missing, Excel.XlReferenceStyle.xlA1, Type.Missing, Type.Missing))
                    {
                        break;
                    }

                    currentFind = sheetData.FindNext(currentFind);
                }
            }

            return currentFind;
        }
    }
}

I instantiate the class and tell it to load two workbooks and search for a string:

ExcelManager manager = new ExcelManager(); 

manager.Open(@"c:\test\test1.xls");
manager.Open(@"c:\test\test2.XLS");

Excel.Range result = manager.Find('test cell');
if (result != null)
{
    // Do something funky.
}
else
{
    // Use a log file instead.
    Console.WriteLine("item was not found found in the current sheet.");
}

The problem is when i run this code it is incredibly slow, even with small sized workbooks. My C# knowledge is minimal so I've been following tutorials the whole day. Is this a good way to go about searching multiple sheets? would using OLE be any faster? The purpose of this app is simply to run a check to summarize values that don't appear in any of the sheets in my open workbooks.

标签: c# excel
1条回答
我想做一个坏孩纸
2楼-- · 2019-07-24 17:31

My first response would be that interop uses your Excel installation to gather the information. Any initialization logic from the Excel installation will be run and will make for a very slow loading time of the code.

What you can do to test if this is the case: Benchmark which of the function calls make the searching slow. The find function or the loading of your ExcelManager class / the open function.

If it turns out the speed loss is not caused by the find function you might consider a library that parses the file itself instead of using interop.

查看更多
登录 后发表回答