I have wrote a code that can read an excel 2007 file using Microsoft Data Access Engine the below code snippet works fine for most of the files but it happens to work for most of the excel files i.e. .xlsx,.xls but when it fails at objConn.Open(); for excel files that have excel formatting problems please refer to the image below
It would failed to open the ole Db Connection stating e开发者_JAVA百科rror External table is not in the expected format . One more problem with this import procedure is that
OleDbCommand objCmdSelect = new OleDbCommand("SELECT * FROM "+ SelectionSheet, objConn);
is not able to read sheets starting with spaces any help with solving this issue would be highly appreciated.
public DataTable ReadExcel(string Path, ArrayList IgnoreString, ArrayList IgnoreColumn)
{
DataTable dtReturn = new DataTable();
DataTable dtPrintable = new DataTable();
DataTable dtTemp = new DataTable();
try
{
string sConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" +
"Data Source=" + Path + ";" + "Extended Properties=\"Excel 12.0 Xml;HDR=YES;IMEX=1;\"";
OleDbConnection objConn = new OleDbConnection(sConnectionString);
objConn.Open();
DataTable dtSheetnames = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
DataTable dtTesting = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.DbInfoLiterals, new object[] {});
DataTable dtTesting2 = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables_Info, new object[] { });
string SelectionSheet = dtSheetnames.Rows[0][2].ToString();
if (SelectionSheet.Contains("'") )
{
SelectionSheet = SelectionSheet.Remove(0, 1);
SelectionSheet = "[" + SelectionSheet;
SelectionSheet = SelectionSheet.Remove(SelectionSheet.Length - 1, 1);
// -- Mod by zeemz on 23 dec
// string PrintArea = SelectionSheet + "Print_Area]";
SelectionSheet = SelectionSheet + "]";
}
else
{
SelectionSheet = "["+ SelectionSheet + "]";
}
//OleDbCommandBuilder objCmdBuilder = new OleDbCommandBuilder(
OleDbCommand objCmdSelect = new OleDbCommand("SELECT * FROM "+ SelectionSheet, objConn);
OleDbDataAdapter objAdapter = new OleDbDataAdapter();
DataSet objDataSet = new DataSet();
DataSet PrintAreads = new DataSet();
objAdapter.SelectCommand = objCmdSelect;
objAdapter.Fill(objDataSet);
// -- Mod by zeemz on 23 dec
//objCmdSelect.CommandText = "SELECT * FROM " + PrintArea;
//objAdapter.Fill(PrintAreads);
objConn.Close();
dtReturn = objDataSet.Tables[0].Copy();
// dtPrintable = PrintAreads.Tables[0].Copy();
// -- Mod by zeemz on 23 dec
//if (dtPrintable.Columns.Count != dtReturn.Columns.Count)
//{
// int TotalPrintable = dtPrintable.Columns.Count;
// int TotalComing = dtReturn.Columns.Count;
// int StartRemovingPos = TotalComing - TotalPrintable;
// for (int i = TotalPrintable; dtPrintable.Columns.Count != dtReturn.Columns.Count; i++)
// {
// dtReturn.Columns.RemoveAt(i);
// i = i - 1 ;
// }
//}
int iCount = 0;
while (iCount <= dtReturn.Rows.Count - 1)
{
if (isRowEmpty(dtReturn.Rows[iCount]))
{
dtReturn.Rows.RemoveAt(iCount);
}
else
{
iCount += 1;
}
}
//now applying the filters
//column ignore
for (int i = IgnoreColumn.Count - 1; i >= 0; i--)
{
dtReturn.Columns.RemoveAt((int)IgnoreColumn[i]);
}
//string ignore
for (int i = IgnoreString.Count - 1; i >= 0; i--)
{
for (int j = dtReturn.Rows.Count - 1; j >= 0; i--)
{
foreach (DataColumn dCol in dtReturn.Columns)
{
if (dtReturn.Rows[j][dCol.ColumnName].ToString().ToLower().Contains(IgnoreString[i].ToString().ToLower()))
{
dtReturn.Rows.RemoveAt(j);
break;
}
}
}
}
/* Hack to get rid of DateTime Columns */
// added by zeemz
dtTemp = dtReturn.Clone();
dtTemp.Clear();
foreach (DataColumn tempColumn in dtTemp.Columns)
{
// if (tempColumn.DataType == typeof(DateTime))
// {
tempColumn.DataType = typeof(String);
// }
}
foreach (DataRow tempRow in dtReturn.Rows)
{
DataRow insRow = dtTemp.NewRow();
foreach (DataColumn tempColumn in dtReturn.Columns)
{
if (tempColumn.DataType == typeof(DateTime))
{
if (!String.IsNullOrEmpty(tempRow[tempColumn.ColumnName.ToString()].ToString()))
{
insRow[tempColumn.ColumnName.ToString()] = Convert.ToDateTime(tempRow[tempColumn.ColumnName.ToString()].ToString()).ToString("yyyyMMddhhmmss");
}
else
{
insRow[tempColumn.ColumnName.ToString()] = "";
}
}
else
{
insRow[tempColumn.ColumnName.ToString()] = tempRow[tempColumn.ColumnName.ToString()].ToString();
}
}
dtTemp.Rows.Add(insRow);
}
}
catch (Exception ex)
{
throw ex;
}
return dtTemp;
}
I happen to find out the problem exists when you modify the xlsx file manually or programatically without using excel once you modify the xlsx the format doesn't stay intact and the above mentioned error comes up because OleDbDataAdapter is unable to handle the modified file and Excel it self can fix the corrupt files shows the file as expected.
精彩评论