using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; using System.Text; using System.Text.RegularExpressions; using System.Threading; using mXparser = org.mariuszgromada.math.mxparser; using Excel = Microsoft.Office.Interop.Excel; namespace RobvanderWoude { class ExtractExcel { #region Global Variables static readonly string progver = "1.00"; static bool closewithoutprompt = false; static Excel.Application excelApp; static readonly object readOnly = true; static readonly object missing = Missing.Value; static readonly object noSaveChanges = false; static readonly object saveChanges = true; static Excel.Workbook workbookSource; static Excel.Worksheet workseetSource; static Excel.Workbook workbookTarget; static Excel.Worksheet worksheetTarget; static string sourcefile = string.Empty; static string sourcesheet = string.Empty; static string targetfile = string.Empty; static string targetsheet = string.Empty; static int startrowtarget = 1; static List rows = new List( ); static List columns = new List( ); static string query = string.Empty; static readonly string parentfolder = Path.GetDirectoryName( new System.Uri( System.Reflection.Assembly.GetExecutingAssembly( ).CodeBase ).LocalPath ); #endregion Global Variables static int Main(string[] args) { int rc = 0; #region mXparser License Requirements mXparser.License.iConfirmNonCommercialUse( "Rob van der Woude" ); string resource = "ExtractExcel.MathParser.org-mXparser.LICENSE.txt"; string readme = "MathParser.org-mXparser.LICENSE.txt"; if ( !File.Exists( Path.Combine( parentfolder, resource ) ) ) { ExtractEmbeddedResource( resource, readme ); } #endregion mXparser License Requirements #region Parse Command Line if ( args.Length < 6 || args.Contains( "/?" ) ) { return ShowHelp( ); } for ( int i = 0; i < args.Length - 1; i++ ) { switch (args[i].ToUpper( ) ) { case "/SOURCE": sourcefile = args[i + 1]; if ( args.Length > i + 1 && args[i + 2][0] != '/' ) { sourcesheet = args[i + 2]; } break; case "/TARGET": targetfile = args[i + 1]; if ( args[i + 2][0] != '/' ) { targetsheet = args[i + 2]; } break; case "/ROWS": rows = Range2Rows( args[i + 1] ); break; case "/COLS": case "/COLUMNS": columns = Range2Columns( args[i + 1] ); break; case "/OFFSET": startrowtarget = int.Parse( args[i + 1] ); break; case "/WHERE": query = args[i + 1]; break; } } if ( columns.Count == 0 ) { if ( rows.Count == 0 ) { return ShowHelp( "Specify rows or columns or both" ); } else { columns = Range2Columns( "A..Z" ); } } if ( rows.Count == 0 ) { rows = Range2Rows( "1..100" ); } #endregion Parse Command Line // Make sure Excel is not active KillExcelIfActive( ); excelApp = new Excel.Application( ); #region Prepare Excel Source and Target Files if ( IsOpened( sourcefile, excelApp ) ) { Console.Error.WriteLine( "Excel file \"{0}\" in use.\nPlease close it and try again.", sourcefile ); return -1; } excelApp.Visible = true; workbookSource = excelApp.Workbooks.Open( sourcefile, 0, readOnly, missing, "", "", false, Excel.XlPlatform.xlWindows, "", true, false, 0, true, false, false ); if ( string.IsNullOrEmpty( sourcesheet ) ) { workseetSource = (Excel.Worksheet)workbookSource.Worksheets[1]; } else if ( int.TryParse( sourcesheet, out int sheetindex ) ) { workseetSource = (Excel.Worksheet)workbookSource.Worksheets[sheetindex]; } else { workseetSource = (Excel.Worksheet)workbookSource.Worksheets[sourcesheet]; } workseetSource = (Excel.Worksheet)workbookSource.ActiveSheet; if ( File.Exists( targetfile ) ) { if ( IsOpened( targetfile, excelApp ) ) { Console.Error.WriteLine( "Excel file \"{0}\" in use.\nPlease close it and try again.", targetfile ); return -1; } workbookTarget = excelApp.Workbooks.Open( targetfile, 0, readOnly, missing, "", "", false, Excel.XlPlatform.xlWindows, "", true, false, 0, true, false, false ); } else { // Create target file workbookTarget = excelApp.Workbooks.Add( ); workbookTarget.SaveAs( targetfile ); targetsheet = string.Empty; } if ( string.IsNullOrEmpty( targetsheet ) ) { worksheetTarget = (Excel.Worksheet)workbookTarget.Worksheets[1]; } else if ( int.TryParse( targetsheet, out int sheetindex ) ) { worksheetTarget = (Excel.Worksheet)workbookTarget.Worksheets[sheetindex]; } else { worksheetTarget = (Excel.Worksheet)workbookTarget.Worksheets[targetsheet]; } worksheetTarget = (Excel.Worksheet)workbookTarget.ActiveSheet; #endregion Prepare Excel Source and Target Files try { int targetrow = startrowtarget; foreach ( int sourcerow in rows ) { if ( CopyExcelRow( sourcerow, targetrow ) ) { targetrow++; } else { rc = -1; } } } catch ( Exception ex ) { Console.Error.WriteLine( "Error: {0}\n{1}", ex.Message, ex.StackTrace ); rc = -1; } finally { if ( !ShutdownExcel( ) ) { rc = -1; } } return rc; } static bool CopyExcelRow( int sourcerow, int targetrow ) { bool success = true; if ( !string.IsNullOrWhiteSpace( query ) ) { // Replace cell name with cell value // based on code by Vladimir // https://stackoverflow.com/a/62778031 string currentquery = query; string pattern = @"\b([A-Z]+)\b"; RegexOptions options = RegexOptions.None; // case sensitive Match match; while ( ( match = Regex.Match( currentquery, pattern, options ) ).Success ) { var group = match.Groups[1]; var sb = new StringBuilder( ); // Anything before the match if ( match.Index > 0 ) { sb.Append( currentquery.Substring( 0, match.Index ) ); } // The match itself var startIndex = group.Index - match.Index; var length = group.Length; var original = match.Value; var prior = original.Substring( 0, startIndex ); var trailing = original.Substring( startIndex + length ); sb.Append( prior ); object cellvalue = ReadCell( workseetSource, sourcerow, group.Value ); if ( cellvalue == null ) { return false; } if ( cellvalue.GetType( ) == typeof( Single ) || cellvalue.GetType( ) == typeof( Double ) ) { sb.Append( FormatFloat( cellvalue ) ); } else { sb.Append( cellvalue ); } sb.Append( trailing ); // Anything after the match if ( match.Index + match.Length < currentquery.Length ) { sb.Append( currentquery.Substring( match.Index + match.Length ) ); } currentquery = sb.ToString( ); } org.mariuszgromada.math.mxparser.Expression expression = new org.mariuszgromada.math.mxparser.Expression( currentquery ); if ( !expression.checkSyntax( ) ) { return false; } bool valid = ( expression.calculate( ) == 1.0 ); if ( valid ) { int targetcolumn = 1; foreach ( string sourcecolumn in columns ) { object cellValue = ReadCell( workseetSource, sourcerow, sourcecolumn ); if ( !WriteCell( worksheetTarget, targetrow, targetcolumn, cellValue ) ) { success = false; } targetcolumn++; } } } return success; } static bool Empty( object value ) { if ( value == null ) { return true; } if ( string.IsNullOrEmpty( value.ToString( ) ) ) { return true; } return false; } static void ExtractEmbeddedResource( string resourcename, string filename = null ) { var embeddedResource = Assembly.GetExecutingAssembly( ).GetManifestResourceNames( ).FirstOrDefault( s => string.Compare( s, resourcename, true ) == 0 ); if ( string.IsNullOrWhiteSpace( filename ) ) { filename = resourcename; } // Ensure that the file is in the program's parent folder filename = Path.Combine( parentfolder, Path.GetFileName( filename ) ); resourcename = "ExtractExcel." + resourcename; Stream stream = Assembly.GetExecutingAssembly( ).GetManifestResourceStream( embeddedResource ); byte[] bytes = new byte[(int)stream.Length]; stream.Read( bytes, 0, bytes.Length ); stream.Close( ); File.WriteAllBytes( filename, bytes ); } static string FormatFloat( Double val ) { return val.ToString( new CultureInfo( "en-US" ) ); } static string FormatFloat( object val ) { return ( (Double)val ).ToString( new CultureInfo( "en-US" ) ); } static string FormatFloat( Single val ) { return val.ToString( new CultureInfo( "en-US" ) ); } static bool InRange( char col, string colrange ) { return InRange( col.ToString( ), colrange ); } static bool InRange( string col, string colrange ) { List columns = Range2Columns( colrange ); if ( columns.Contains( col.ToUpper( ) ) ) { return true; } return false; } static bool InRange( int col, string colrange ) { List columns = Range2Columns( colrange ); if ( columns.Contains( col.ToString( ) ) ) { return true; } if ( columns.Contains( ( (char)( col + 64 ) ).ToString( ) ) ) { return true; } return false; } static bool InRange( int col, int[] colrange ) { return ( colrange.Contains( col ) ); } static bool IsOpened( string excelfile, Excel.Application xlAppRef ) { bool isOpened = true; try { // wbook should be: "name-of-the-workbook.xlsx". Otherwise it will always raise the // exception and never return true var test = xlAppRef.Workbooks.Item[Path.GetFileName( excelfile )]; } catch ( Exception ) { isOpened = false; } return isOpened; } static bool IsActive( string progname ) { using ( Process prog = Process.GetProcessesByName( progname ).FirstOrDefault( ) ) { return ( prog != null ); } } static void KillExcel( ) { try { foreach ( Process proc in Process.GetProcessesByName( "excel" ) ) { proc.Kill( ); } } catch { // ignore } } static void KillExcelIfActive( ) { bool excelactive = IsActive( "excel" ); if ( excelactive ) { if ( closewithoutprompt ) { KillExcel( ); } else { Thread.Sleep( 1000 ); if ( !IsActive( "excel" ) ) { return; } string message; message = "Excel is still active.\nClose it before continuing.\n\n"; message += "Press Enter when Excel is closed, or Escape to abort."; Console.WriteLine( message ); bool validkey = false; while ( !validkey ) { ConsoleKey answer = Console.ReadKey( true ).Key; if ( answer == ConsoleKey.Enter ) { closewithoutprompt = true; KillExcel( ); validkey = true; } else if ( answer == ConsoleKey.Escape ) { Environment.Exit( -1 ); validkey = true; } } } } } static List Range2Rows( string range ) { List rows = new List( ); foreach ( string part in range.Split( ',' ) ) { if ( part.Contains( ".." ) ) { char[] separator = ".".ToCharArray( ); StringSplitOptions options = StringSplitOptions.RemoveEmptyEntries; int startrange = int.Parse( part.Split( separator, options )[0] ); int endrange = int.Parse( part.Split( separator, options )[1] ); for ( int i = (int)startrange; i <= (int)endrange; i++ ) { rows.Add( i ); } } else { rows.Add( int.Parse( part ) ); } } return rows; } static List Range2Columns( string range ) { List columns = new List( ); foreach ( string part in range.Split( ',' ) ) { if ( part.Contains( ".." ) ) { char startrange = part[0]; char endrange = part[part.Length - 1]; for ( int i = (int)startrange; i <= (int)endrange; i++ ) { columns.Add( ( (char)i ).ToString( ).ToUpper( ) ); } } else { columns.Add( part.ToUpper( ) ); } } return columns; } static object ReadCell( Excel.Worksheet worksheet, int row, string col ) { int colnum = Convert.ToInt32( col.ToUpper( )[0] ) - 64; Excel.Range cell = (Excel.Range)worksheet.Range[worksheet.Cells[row, colnum], worksheet.Cells[row, colnum]]; return cell.Value; } static object ReadCell( Excel.Worksheet worksheet, int row, int col ) { Excel.Range cell = (Excel.Range)worksheet.Range[worksheet.Cells[row, col], worksheet.Cells[row, col]]; return cell.Value; } static int ShowHelp( params string[] errmsg ) { #region Error Message if ( errmsg.Length > 0 ) { List errargs = new List( errmsg ); errargs.RemoveAt( 0 ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.Red; Console.Error.Write( "ERROR:\t" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) ); Console.ResetColor( ); } #endregion Error Message #region Help Text /* ExtractExcel, Version 1.00 Filter values from a source spreadsheet and write them to a target spreadsheet Usage: ExtractExcel /SOURCE src /TARGET tgt /COLS "cols" [ options ] Where: src is the source file name and optional sheet name or index (e.g. source.xlsx "Sheet 2"; default sheet index 1) tgt is the target file name and optional sheet name or index (e.g. target.xlsx 3; default sheet index 1; file is created if it does not exist, in which case sheet will be ignored) cols is a list or range of columns to be read from the source file (letters, e.g. "A..D,F,AA..AG", case insensitive, mandatory unless rows are specified, default if rows specified: "A..Z") Options: /ROWS rows list or range of rows to be used from source file (numeric, e.g. "1,3,5..8,14", default: "1..100") /OFFSET startrow first row in target sheet to be written (numeric, default: 1) /WHERE query filter rule, source row will be skipped if it does not comply (see notes; default: do not skip) Notes: Queries format is "column operator value/column", e.g. "A < 0", "B > C" or "AF != 1.5"; whitespace is allowed; columns must be in upper case! Queries are interpreted by mXparser (https://mathparser.org/) See its tutorial at https://mathparser.org/mxparser-tutorial/ for details on more complex filter expressions, e.g. "(A=10)|(B<0)". Return code ("errorlevel") -1 in case of errors, otherwise 0. Credits: Code to replace column by cell value by Vladimir https://stackoverflow.com/a/62778031 Query parsing by mXparser https://mathparser.org/ Manipulating Excel files by Microsoft.Office.Interop.Excel https://www.nuget.org/packages/Microsoft.Office.Interop.Excel Written by Rob van der Woude https://www.robvanderwoude.com */ #endregion Help Text #region Display Help Text Console.Error.WriteLine( ); Console.Error.WriteLine( "ExtractExcel, Version {0}", progver ); Console.Error.WriteLine( "Filter values from a source spreadsheet and write them to a target spreadsheet" ); Console.Error.WriteLine( ); Console.Error.Write( "Usage: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "ExtractExcel /SOURCE src /TARGET tgt /COLS \"cols\" [ options ]" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.Write( "Where: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "src" ); Console.ResetColor( ); Console.Error.WriteLine( " is the source file name and optional sheet name or index" ); Console.Error.WriteLine( " (e.g. source.xlsx \"Sheet 2\"; default sheet index 1)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " tgt" ); Console.ResetColor( ); Console.Error.WriteLine( " is the target file name and optional sheet name or index" ); Console.Error.WriteLine( " (e.g. target.xlsx 3; default sheet index 1; file is created" ); Console.Error.WriteLine( " if it does not exist, in which case sheet will be ignored)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " cols" ); Console.ResetColor( ); Console.Error.WriteLine( " is a list or range of columns to be read from the source file" ); Console.Error.WriteLine( " (letters, e.g. \"A..D,F,AA..AG\", case insensitive, mandatory" ); Console.Error.WriteLine( " unless rows are specified, default if rows specified: \"A..Z\")" ); Console.Error.WriteLine( ); Console.Error.Write( "Options: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/ROWS rows" ); Console.ResetColor( ); Console.Error.WriteLine( " list or range of rows to be used from source file" ); Console.Error.WriteLine( " (numeric, e.g. \"1,3,5..8,14\", default: \"1..100\")" ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /OFFSET startrow" ); Console.ResetColor( ); Console.Error.WriteLine( " first row in target sheet to be written" ); Console.Error.WriteLine( " (numeric, default: 1)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /WHERE query" ); Console.ResetColor( ); Console.Error.WriteLine( " filter rule, source row will be skipped if it does" ); Console.Error.WriteLine( " not comply (see notes; default: do not skip)" ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Notes: Queries format is \"column operator value/column\", e.g. \"A < 0\", \"B > C\"" ); Console.Error.WriteLine( " or \"AF != 1.5\"; whitespace is allowed; columns must be in upper case!" ); Console.Error.Write( " Queries are interpreted by mXparser (" ); Console.ForegroundColor = ConsoleColor.DarkGray; Console.Error.Write( "https://mathparser.org/" ); Console.ResetColor( ); Console.Error.WriteLine( ")" ); Console.Error.Write( " See its tutorial at " ); Console.ForegroundColor= ConsoleColor.DarkGray; Console.Error.Write( "https://mathparser.org/mxparser-tutorial/" ); Console.ResetColor( ); Console.Error.WriteLine( " for" ); Console.Error.WriteLine( " details on more complex filter expressions, e.g. \"(A=10)|(B<0)\"." ); Console.Error.WriteLine( " Return code (\"errorlevel\") -1 in case of errors, otherwise 0." ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Credits: Code to replace column by cell value by Vladimir" ); Console.ForegroundColor = ConsoleColor.DarkGray; Console.Error.WriteLine( " https://stackoverflow.com/a/62778031" ); Console.ResetColor( ); Console.Error.WriteLine( " Query parsing by mXparser" ); Console.ForegroundColor = ConsoleColor.DarkGray; Console.Error.WriteLine( " https://mathparser.org/" ); Console.ResetColor( ); Console.Error.WriteLine( " Manipulating Excel files by Microsoft.Office.Interop.Excel" ); Console.ForegroundColor = ConsoleColor.DarkGray; Console.Error.WriteLine( " https://www.nuget.org/packages/Microsoft.Office.Interop.Excel" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Written by Rob van der Woude" ); Console.Error.WriteLine( "https://www.robvanderwoude.com" ); #endregion Display Help Text return -1; } static bool ShutdownExcel( ) { try { workbookSource.Close( noSaveChanges ); workbookTarget.Close( saveChanges ); excelApp.Quit( ); return true; } catch ( Exception ex ) { Console.Error.WriteLine( "Foutmelding bij afluiten Excel: " + ex.Message ); Console.Error.WriteLine( ex.StackTrace ); return false; } } static bool WriteCell( Excel.Worksheet worksheet, int row, int col, object value ) { Excel.Range cell = (Excel.Range)worksheet.Range[worksheet.Cells[row, col], worksheet.Cells[row, col]]; cell.Value = value; object trueval = ReadCell( worksheet, row, col ); if ( Empty( value ) ) { return Empty( trueval ); } if ( Empty( trueval ) ) { return Empty( value ); } return ( value.ToString( ) == trueval.ToString( ) ); } } }