What I want to do
I'm trying to use the Microsoft.Office.Interop.Excel namespace to open an Excel file (XSL or CSV, but sadly not XSLX) and import it into a DataSet. I don't have control over the worksheet or column names, so I need to allow for changes to them.
What I've tried
I've tried the OLEDB method of this in the past, and had a lot of problems with it (buggy, slow, and required prior knowledge of the Excel file's schema), so I want to avoid doing that again. What I'd like to do is use Microsoft.Office.Interop.Excel to import the workbook directly to a DataSet, or loop through the worksheets and load each one into a DataTable.
Believe it or not, I've had trouble finding resources for this. A few searches on StackOverflow have found mostly people trying to do the reverse (DataSet => Excel), or the OLEDB technique. Google hasn't been much more helpful.
What I've got so far
public void Load(string filename, Excel.XlFileFormat format = Excel.XlFileFormat.xlCSV)
{
app = new Excel.Application();
book = app.Workbooks.Open(Filename: filename, Format: format);
DataSet ds = new DataSet();
foreach (Excel.Worksheet sheet in book.Sheets)
{
DataTable dt = new DataTabl开发者_StackOverflow中文版e(sheet.Name);
ds.Tables.Add(dt);
//??? Fill dt from sheet
}
this.Data = ds;
}
I'm fine with either importing the entire book at once, or looping through one sheet at a time. Can I do this with Interop.Excel?
What about using Excel Data Reader (previously hosted here) an open source project on codeplex? Its works really well for me to export data from excel sheets.
The sample code given on the link specified:
FileStream stream = File.Open(filePath, FileMode.Open, FileAccess.Read);
//1. Reading from a binary Excel file ('97-2003 format; *.xls)
IExcelDataReader excelReader = ExcelReaderFactory.CreateBinaryReader(stream);
//...
//2. Reading from a OpenXml Excel file (2007 format; *.xlsx)
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream);
//...
//3. DataSet - The result of each spreadsheet will be created in the result.Tables
DataSet result = excelReader.AsDataSet();
//...
//4. DataSet - Create column names from first row
excelReader.IsFirstRowAsColumnNames = true;
DataSet result = excelReader.AsDataSet();
//5. Data Reader methods
while (excelReader.Read())
{
//excelReader.GetInt32(0);
}
//6. Free resources (IExcelDataReader is IDisposable)
excelReader.Close();
UPDATE
After some search around, I came across this article: Faster MS Excel Reading using Office Interop Assemblies. The article only uses Office Interop Assemblies
to read data from a given Excel Sheet. The source code is of the project is there too. I guess this article can be a starting point on what you trying to achieve. See if that helps
UPDATE 2
The code below takes an excel workbook
and reads all values found, for each excel worksheet
inside the excel workbook
.
private static void TestExcel()
{
ApplicationClass app = new ApplicationClass();
Workbook book = null;
Range range = null;
try
{
app.Visible = false;
app.ScreenUpdating = false;
app.DisplayAlerts = false;
string execPath = Path.GetDirectoryName(Assembly.GetExecutingAssembly().CodeBase);
book = app.Workbooks.Open(@"C:\data.xls", Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value);
foreach (Worksheet sheet in book.Worksheets)
{
Console.WriteLine(@"Values for Sheet "+sheet.Index);
// get a range to work with
range = sheet.get_Range("A1", Missing.Value);
// get the end of values to the right (will stop at the first empty cell)
range = range.get_End(XlDirection.xlToRight);
// get the end of values toward the bottom, looking in the last column (will stop at first empty cell)
range = range.get_End(XlDirection.xlDown);
// get the address of the bottom, right cell
string downAddress = range.get_Address(
false, false, XlReferenceStyle.xlA1,
Type.Missing, Type.Missing);
// Get the range, then values from a1
range = sheet.get_Range("A1", downAddress);
object[,] values = (object[,]) range.Value2;
// View the values
Console.Write("\t");
Console.WriteLine();
for (int i = 1; i <= values.GetLength(0); i++)
{
for (int j = 1; j <= values.GetLength(1); j++)
{
Console.Write("{0}\t", values[i, j]);
}
Console.WriteLine();
}
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
finally
{
range = null;
if (book != null)
book.Close(false, Missing.Value, Missing.Value);
book = null;
if (app != null)
app.Quit();
app = null;
}
}
In the above code, values[i, j]
is the value that you need to be added to the dataset
. i
denotes the row, whereas, j
denotes the column.
Have you seen this one? From http://www.aspspider.com/resources/Resource510.aspx:
public DataTable Import(String path)
{
Microsoft.Office.Interop.Excel.Application app = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.Workbook workBook = app.Workbooks.Open(path, 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
Microsoft.Office.Interop.Excel.Worksheet workSheet = (Microsoft.Office.Interop.Excel.Worksheet)workBook.ActiveSheet;
int index = 0;
object rowIndex = 2;
DataTable dt = new DataTable();
dt.Columns.Add("FirstName");
dt.Columns.Add("LastName");
dt.Columns.Add("Mobile");
dt.Columns.Add("Landline");
dt.Columns.Add("Email");
dt.Columns.Add("ID");
DataRow row;
while (((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 1]).Value2 != null)
{
row = dt.NewRow();
row[0] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 1]).Value2);
row[1] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 2]).Value2);
row[2] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 3]).Value2);
row[3] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 4]).Value2);
row[4] = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)workSheet.Cells[rowIndex, 5]).Value2);
index++;
rowIndex = 2 + index;
dt.Rows.Add(row);
}
app.Workbooks.Close();
return dt;
}
object[,] valueArray = (object[,])excelRange.get_Value(XlRangeValueDataType.xlRangeValueDefault);
//Get the column names
for (int k = 0; k < valueArray.GetLength(1); )
{
//add columns to the data table.
dt.Columns.Add((string)valueArray[1,++k]);
}
//Load data into data table
object[] singleDValue = new object[valueArray.GetLength(1)];
//value array first row contains column names. so loop starts from 1 instead of 0
for (int i = 1; i < valueArray.GetLength(0); i++)
{
Console.WriteLine(valueArray.GetLength(0) + ":" + valueArray.GetLength(1));
for (int k = 0; k < valueArray.GetLength(1); )
{
singleDValue[k] = valueArray[i+1, ++k];
}
dt.LoadDataRow(singleDValue, System.Data.LoadOption.PreserveChanges);
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Reflection;
using Microsoft.Office.Interop.Excel;
namespace trg.satmap.portal.ParseAgentSkillMapping
{
class ConvertXLStoDT
{
private StringBuilder errorMessages;
public StringBuilder ErrorMessages
{
get { return errorMessages; }
set { errorMessages = value; }
}
public ConvertXLStoDT()
{
ErrorMessages = new StringBuilder();
}
public System.Data.DataTable XLStoDTusingInterOp(string FilePath)
{
#region Excel important Note.
/*
* Excel creates XLS and XLSX files. These files are hard to read in C# programs.
* They are handled with the Microsoft.Office.Interop.Excel assembly.
* This assembly sometimes creates performance issues. Step-by-step instructions are helpful.
*
* Add the Microsoft.Office.Interop.Excel assembly by going to Project -> Add Reference.
*/
#endregion
Microsoft.Office.Interop.Excel.Application excelApp = null;
Microsoft.Office.Interop.Excel.Workbook workbook = null;
System.Data.DataTable dt = new System.Data.DataTable(); //Creating datatable to read the content of the Sheet in File.
try
{
excelApp = new Microsoft.Office.Interop.Excel.Application(); // Initialize a new Excel reader. Must be integrated with an Excel interface object.
//Opening Excel file(myData.xlsx)
workbook = excelApp.Workbooks.Open(FilePath, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
Microsoft.Office.Interop.Excel.Worksheet ws = (Microsoft.Office.Interop.Excel.Worksheet)workbook.Sheets.get_Item(1);
Microsoft.Office.Interop.Excel.Range excelRange = ws.UsedRange; //gives the used cells in sheet
ws = null; // now No need of this so should expire.
//Reading Excel file.
object[,] valueArray = (object[,])excelRange.get_Value(Microsoft.Office.Interop.Excel.XlRangeValueDataType.xlRangeValueDefault);
excelRange = null; // you don't need to do any more Interop. Now No need of this so should expire.
dt = ProcessObjects(valueArray);
}
catch (Exception ex)
{
ErrorMessages.Append(ex.Message);
}
finally
{
#region Clean Up
if (workbook != null)
{
#region Clean Up Close the workbook and release all the memory.
workbook.Close(false, FilePath, Missing.Value);
System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
#endregion
}
workbook = null;
if (excelApp != null)
{
excelApp.Quit();
}
excelApp = null;
#endregion
}
return (dt);
}
/// <summary>
/// Scan the selected Excel workbook and store the information in the cells
/// for this workbook in an object[,] array. Then, call another method
/// to process the data.
/// </summary>
private void ExcelScanIntenal(Microsoft.Office.Interop.Excel.Workbook workBookIn)
{
//
// Get sheet Count and store the number of sheets.
//
int numSheets = workBookIn.Sheets.Count;
//
// Iterate through the sheets. They are indexed starting at 1.
//
for (int sheetNum = 1; sheetNum < numSheets + 1; sheetNum++)
{
Worksheet sheet = (Worksheet)workBookIn.Sheets[sheetNum];
//
// Take the used range of the sheet. Finally, get an object array of all
// of the cells in the sheet (their values). You can do things with those
// values. See notes about compatibility.
//
Range excelRange = sheet.UsedRange;
object[,] valueArray = (object[,])excelRange.get_Value(XlRangeValueDataType.xlRangeValueDefault);
//
// Do something with the data in the array with a custom method.
//
ProcessObjects(valueArray);
}
}
private System.Data.DataTable ProcessObjects(object[,] valueArray)
{
System.Data.DataTable dt = new System.Data.DataTable();
#region Get the COLUMN names
for (int k = 1; k <= valueArray.GetLength(1); k++)
{
dt.Columns.Add((string)valueArray[1, k]); //add columns to the data table.
}
#endregion
#region Load Excel SHEET DATA into data table
object[] singleDValue = new object[valueArray.GetLength(1)];
//value array first row contains column names. so loop starts from 2 instead of 1
for (int i = 2; i <= valueArray.GetLength(0); i++)
{
for (int j = 0; j < valueArray.GetLength(1); j++)
{
if (valueArray[i, j + 1] != null)
{
singleDValue[j] = valueArray[i, j + 1].ToString();
}
else
{
singleDValue[j] = valueArray[i, j + 1];
}
}
dt.LoadDataRow(singleDValue, System.Data.LoadOption.PreserveChanges);
}
#endregion
return (dt);
}
}
}
Quiet Late though!.
This method is properly tested and it converts the excel to DataSet
.
public DataSet Dtl()
{
//Instance reference for Excel Application
Microsoft.Office.Interop.Excel.Application objXL = null;
//Workbook refrence
Microsoft.Office.Interop.Excel.Workbook objWB = null;
DataSet ds = new DataSet();
try
{
objXL = new Microsoft.Office.Interop.Excel.Application();
objWB = objXL.Workbooks.Open(@"Book1.xlsx");//Your path to excel file.
foreach (Microsoft.Office.Interop.Excel.Worksheet objSHT in objWB.Worksheets)
{
int rows = objSHT.UsedRange.Rows.Count;
int cols = objSHT.UsedRange.Columns.Count;
DataTable dt = new DataTable();
int noofrow = 1;
//If 1st Row Contains unique Headers for datatable include this part else remove it
//Start
for (int c = 1; c <= cols; c++)
{
string colname = objSHT.Cells[1, c].Text;
dt.Columns.Add(colname);
noofrow = 2;
}
//END
for (int r = noofrow; r <= rows; r++)
{
DataRow dr = dt.NewRow();
for (int c = 1; c <= cols; c++)
{
dr[c - 1] = objSHT.Cells[r, c].Text;
}
dt.Rows.Add(dr);
}
ds.Tables.Add(dt);
}
//Closing workbook
objWB.Close();
//Closing excel application
objXL.Quit();
return ds;
}
catch (Exception ex)
{
objWB.Saved = true;
//Closing work book
objWB.Close();
//Closing excel application
objXL.Quit();
//Response.Write("Illegal permission");
return ds;
}
}
Years after everyone's answer, I too want to present how I did it for my project
/// <summary>
/// /Reads an excel file and converts it into dataset with each sheet as each table of the dataset
/// </summary>
/// <param name="filename"></param>
/// <param name="headers">If set to true the first row will be considered as headers</param>
/// <returns></returns>
public DataSet Import(string filename, bool headers = true)
{
var _xl = new Excel.Application();
var wb = _xl.Workbooks.Open(filename);
var sheets = wb.Sheets;
DataSet dataSet = null;
if (sheets != null && sheets.Count != 0)
{
dataSet = new DataSet();
foreach (var item in sheets)
{
var sheet = (Excel.Worksheet)item;
DataTable dt = null;
if (sheet != null)
{
dt = new DataTable();
var ColumnCount = ((Excel.Range)sheet.UsedRange.Rows[1, Type.Missing]).Columns.Count;
var rowCount = ((Excel.Range)sheet.UsedRange.Columns[1, Type.Missing]).Rows.Count;
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[1, j + 1];
var column = new DataColumn(headers ? cell.Value : string.Empty);
dt.Columns.Add(column);
}
for (int i = 0; i < rowCount; i++)
{
var r = dt.NewRow();
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[i + 1 + (headers ? 1 : 0), j + 1];
r[j] = cell.Value;
}
dt.Rows.Add(r);
}
}
dataSet.Tables.Add(dt);
}
}
_xl.Quit();
return dataSet;
}
精彩评论