Rob van der Woude's Scripting Pages
Powered by GeSHi

Source code for word2txt.cs

(view source code of word2txt.cs as plain text)

  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.IO.Compression;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Text.RegularExpressions;
  8. using System.Windows.Forms;
  9. using Word = Microsoft.Office.Interop.Word;
  10.  
  11.  
  12. namespace RobvanderWoude
  13. {
  14. 	internal class Word2Txt
  15. 	{
  16. 		static string progver = "1.05";
  17.  
  18.  
  19. 		static string plaintext = string.Empty;
  20.  
  21.  
  22. 		static int Main( string[] args )
  23. 		{
  24. 			int rc = 0;
  25. 			string document = string.Empty;
  26. 			bool success = false;
  27. 			bool usexmlencoding = false;
  28. 			string xmlencoding = string.Empty;
  29. 			Encoding encoding = null;
  30.  
  31.  
  32. 			#region Parse Command Line
  33.  
  34. 			if ( args.Length == 0 || args.Length > 2 )
  35. 			{
  36. 				return ShowHelp( );
  37. 			}
  38.  
  39. 			foreach ( string arg in args )
  40. 			{
  41. 				if ( arg[0] == '/' )
  42. 				{
  43. 					if ( arg == "/?" )
  44. 					{
  45. 						return ShowHelp( );
  46. 					}
  47. 					else if ( arg.StartsWith( "/D", StringComparison.OrdinalIgnoreCase ) )
  48. 					{
  49. 						usexmlencoding = true;
  50. 					}
  51. 					else if ( arg.ToUpper( ) == "/E" )
  52. 					{
  53. 						return ListEncodings( );
  54. 					}
  55. 					else
  56. 					{
  57. 						return ShowHelp( "Invalid command line switch {0}", arg );
  58. 					}
  59. 				}
  60. 				else
  61. 				{
  62. 					if ( string.IsNullOrWhiteSpace( document ) )
  63. 					{
  64. 						document = arg;
  65. 						if ( !File.Exists( document ) )
  66. 						{
  67. 							return ShowHelp( "File \"{0}\" not found", document );
  68. 						}
  69. 					}
  70. 					else if ( encoding == null )
  71. 					{
  72. 						encoding = GetEncoding( arg );
  73. 						if ( encoding == null )
  74. 						{
  75. 							return ShowHelp( "Invalid encoding \"{0}\"", args[1] );
  76. 						}
  77. 					}
  78. 					else
  79. 					{
  80. 						return ShowHelp( "Too many command line arguments" );
  81. 					}
  82. 				}
  83. 			}
  84.  
  85. 			if ( string.IsNullOrWhiteSpace( document ) )
  86. 			{
  87. 				return ShowHelp( );
  88. 			}
  89.  
  90. 			#endregion Parse Command Line
  91.  
  92.  
  93. 			#region Extract Text
  94.  
  95. 			// First try using Word if possible
  96. 			if ( IsWordInstalled( ) )
  97. 			{
  98. 				// If Word is installed, this program can handle ANY document format that is recognized by Word
  99. 				success = ReadWordFile( document );
  100. 			}
  101.  
  102. 			// if Word isn't available or could not extract any text, try plan B
  103. 			if ( !success || string.IsNullOrWhiteSpace( plaintext ) )
  104. 			{
  105. 				rc = 1;
  106. 				string ext = Path.GetExtension( document ).ToLower( );
  107. 				if ( ext == ".doc" )
  108. 				{
  109. 					success = ReadDocFile( document );
  110. 				}
  111. 				else if ( ext == ".docx" || ext == ".odt" )
  112. 				{
  113. 					success = ReadDocxOrOdtFile( document );
  114. 				}
  115. 				else if ( ext == ".rtf" )
  116. 				{
  117. 					success = ReadRTFFile( document );
  118. 				}
  119. 				else if ( ext == ".wpd" )
  120. 				{
  121. 					success = ReadWPDFile( document );
  122. 				}
  123. 				else
  124. 				{
  125. 					return ShowHelp( "If Word is not installed or fails to extract text, this program can only handle .DOC, .DOCX, .ODT and .WPD files" );
  126. 				}
  127. 			}
  128.  
  129. 			#endregion Extract Text
  130.  
  131.  
  132. 			#region Cleanup Text and Display Result
  133.  
  134. 			if ( success && !string.IsNullOrWhiteSpace( plaintext ) )
  135. 			{
  136. 				// convert stray carriage returns to carriage return/linefeed pairs
  137. 				plaintext = ConvertStrayCarriageReturns( plaintext ).Trim( "\n\r\t ".ToCharArray( ) );
  138.  
  139. 				if ( usexmlencoding )
  140. 				{
  141. 					encoding = GetEncoding( xmlencoding );
  142. 				}
  143.  
  144. 				if ( encoding == null )
  145. 				{
  146. 					// send text to console using default output encoding
  147. 					Console.WriteLine( plaintext );
  148. 				}
  149. 				else
  150. 				{
  151. 					// temporarily change output encoding and send text to console
  152. 					Encoding oldencoding = Console.OutputEncoding;
  153. 					Console.OutputEncoding = encoding;
  154. 					Console.WriteLine( plaintext );
  155. 					Console.OutputEncoding = oldencoding;
  156. 				}
  157. 			}
  158. 			else
  159. 			{
  160. 				rc = 2;
  161. 			}
  162.  
  163. 			#endregion Cleanup Text and Display Result
  164.  
  165.  
  166. 			return rc;
  167. 		}
  168.  
  169.  
  170. 		static string ConvertStrayCarriageReturns( string text )
  171. 		{
  172. 			// convert stray carriage returns to carriage return/linefeed pairs
  173. 			// search for stray carriage returns (\r), i.e. the ones NOT followed by linefeeds (\n)
  174. 			Regex regex = new Regex( "\r(?!\n)" );
  175. 			// replace each matching stray carriage return by a carriage return/linefeed pair
  176. 			text = regex.Replace( text, Environment.NewLine );
  177. 			return text;
  178. 		}
  179.  
  180.  
  181. 		static Encoding GetEncoding( string myencoding )
  182. 		{
  183. 			if ( string.IsNullOrEmpty( myencoding ) )
  184. 			{
  185. 				return null;
  186. 			}
  187. 			// Get a list of available encodings
  188. 			EncodingInfo[] encodings = Encoding.GetEncodings( );
  189. 			// Try correctly spelled encodings first
  190. 			foreach ( EncodingInfo encoding in encodings )
  191. 			{
  192. 				if ( encoding.Name.ToLower( ) == myencoding.ToLower( ) )
  193. 				{
  194. 					return Encoding.GetEncoding( encoding.CodePage );
  195. 				}
  196. 			}
  197. 			// No direct match found, try again, ignoring dashes
  198. 			foreach ( EncodingInfo encoding in encodings )
  199. 			{
  200. 				if ( encoding.Name.Replace( "-", "" ).ToLower( ) == myencoding.Replace( "-", "" ).ToLower( ) )
  201. 				{
  202. 					return Encoding.GetEncoding( encoding.CodePage );
  203. 				}
  204. 			}
  205. 			// Still no match, try codepages
  206. 			foreach ( EncodingInfo encoding in encodings )
  207. 			{
  208. 				if ( encoding.CodePage.ToString( ) == myencoding )
  209. 				{
  210. 					return Encoding.GetEncoding( encoding.CodePage );
  211. 				}
  212. 			}
  213. 			// Still no match, giving up
  214. 			return null;
  215. 		}
  216.  
  217.  
  218. 		static bool IsWordInstalled( )
  219. 		{
  220. 			// Source: "How to Check Whether Word is Installed in the System or Not" by Tadit Dash
  221. 			// https://www.codeproject.com/Tips/689968/How-to-Check-Whether-Word-is-Installed-in-the-Syst
  222. 			return ( Type.GetTypeFromProgID( "Word.Application" ) != null );
  223. 		}
  224.  
  225.  
  226. 		static int ListEncodings( )
  227. 		{
  228. 			try
  229. 			{
  230. 				Console.Clear( );
  231. 			}
  232. 			catch
  233. 			{
  234. 				// Console.Clear( ) throws an IO exception if the output is redirected
  235. 			}
  236. 			int columnwidth = 8;
  237. 			EncodingInfo[] allencodings = Encoding.GetEncodings( );
  238. 			List<string> allencodingnames = new List<string>( );
  239. 			foreach ( EncodingInfo enc in allencodings )
  240. 			{
  241. 				allencodingnames.Add( enc.Name );
  242. 			}
  243. 			allencodingnames.Sort( );
  244. 			foreach ( string enc in allencodingnames )
  245. 			{
  246. 				columnwidth = Math.Max( columnwidth, enc.Length );
  247. 			}
  248. 			Console.WriteLine( "{0,-" + columnwidth + "}   {1}", "Encoding", "CodePage" );
  249. 			Console.WriteLine( "{0,-" + columnwidth + "}   {1}", "========", "========" );
  250. 			foreach ( string enc in allencodingnames )
  251. 			{
  252. 				Console.WriteLine( "{0,-" + columnwidth + "}   {1}", enc, GetEncoding( enc ).CodePage );
  253. 			}
  254. 			return 0;
  255. 		}
  256.  
  257.  
  258. 		static bool ReadDocFile( string docfile )
  259. 		{
  260. 			string doccontent = string.Empty;
  261. 			try
  262. 			{
  263. 				StreamReader sr = new StreamReader( docfile, false );
  264. 				doccontent = sr.ReadToEnd( ).Trim( "\n\t ".ToCharArray( ) );
  265. 				sr.Close( );
  266. 			}
  267. 			catch ( IOException )
  268. 			{
  269. 				ShowHelp( "Access to file \"{0}\" denied", docfile );
  270. 				return false;
  271. 			}
  272. 			if ( doccontent.Length == 0 )
  273. 			{
  274. 				return false;
  275. 			}
  276. 			if ( doccontent.Contains( "[Content_Types]" ) )
  277. 			{
  278. 				doccontent = doccontent.Substring( 0, doccontent.IndexOf( "[Content_Types]" ) );
  279. 			}
  280. 			Regex regex = new Regex( "[^\\000\\015\\367\\377]{20,}" );
  281. 			MatchCollection matches = regex.Matches( doccontent );
  282. 			if ( matches.Count == 0 )
  283. 			{
  284. 				return false;
  285. 			}
  286. 			plaintext = string.Empty;
  287. 			foreach ( Match match in matches )
  288. 			{
  289. 				string matchingtext = match.Value.Trim( "\n\t ".ToCharArray( ) );
  290. 				if ( Encoding.UTF8.GetByteCount( matchingtext ) == matchingtext.Length && !matchingtext.Contains( (char)4 ) )
  291. 				{
  292. 					plaintext += matchingtext + "\n";
  293. 				}
  294. 			}
  295. 			return true;
  296. 		}
  297.  
  298.  
  299. 		static bool ReadDocxOrOdtFile( string docfile )
  300. 		{
  301. 			string contentfile;
  302. 			string ext = Path.GetExtension( docfile ).ToLower( );
  303. 			if ( ext == ".odt" ) // OpenOffice document
  304. 			{
  305. 				contentfile = "content.xml";
  306. 			}
  307. 			else if ( ext == ".docx" ) // MS Office document
  308. 			{
  309. 				contentfile = "document.xml";
  310. 			}
  311. 			else
  312. 			{
  313. 				return false;
  314. 			}
  315.  
  316. 			string tempfile = Path.GetTempFileName( );
  317. 			string content = string.Empty;
  318. 			bool success = false;
  319.  
  320. 			try
  321. 			{
  322. 				// Open document as ZIP file and extract the XML file containing the text content
  323. 				using ( ZipArchive archive = ZipFile.OpenRead( docfile ) )
  324. 				{
  325. 					foreach ( ZipArchiveEntry entry in archive.Entries )
  326. 					{
  327. 						if ( entry.Name.ToLower( ) == contentfile )
  328. 						{
  329. 							entry.ExtractToFile( tempfile, true );
  330. 							success = true;
  331. 						}
  332. 					}
  333. 				}
  334. 			}
  335. 			catch ( IOException )
  336. 			{
  337. 				ShowHelp( "Access to file \"{0}\" denied", docfile );
  338. 				return false;
  339. 			}
  340.  
  341. 			if ( success )
  342. 			{
  343. 				// Read the text content from the extracted file
  344. 				StreamReader sr = new StreamReader( tempfile );
  345. 				content = sr.ReadToEnd( ).Trim( "\n\r\t ".ToCharArray( ) );
  346. 				sr.Close( );
  347. 			}
  348.  
  349. 			// Delete the extracted file
  350. 			File.Delete( tempfile );
  351.  
  352. 			if ( success )
  353. 			{
  354. 				// The first 100 characters of the extracted XML usually contain its encoding;
  355. 				// this encoding will be used if the /D command line switch was used
  356. 				Regex regex = new Regex( " encoding=\"([^\"]+)\"" );
  357. 				string xmlencoding = regex.Match( content, 0, 100 ).Groups[1].Value;
  358. 				// insert newlines after headers, list items and paragraphs
  359. 				regex = new Regex( "</(text|w):(h|p)>" );
  360. 				plaintext = regex.Replace( content, "\n\n" );
  361. 				regex = new Regex( "<w:br/>" );
  362. 				plaintext = regex.Replace( plaintext, "\n\n" );
  363. 				// remove all XML tags
  364. 				regex = new Regex( "<[^>]+>" );
  365. 				plaintext = regex.Replace( plaintext, "" );
  366. 			}
  367. 			return success;
  368. 		}
  369.  
  370.  
  371. 		static bool ReadRTFFile( string rtffile )
  372. 		{
  373. 			// Use a hidden RichTextBox to convert RTF to plain text, by Wendy Zang
  374. 			// https://social.msdn.microsoft.com/Forums/vstudio/en-US/6e56af9b-d7d3-49f3-9ec4-80edde3fe54b/reading-modifying-rtf-files?forum=csharpgeneral#a64345e9-cfcb-43be-ab18-c08fae02cb2a
  375. 			RichTextBox rtbox = new RichTextBox( );
  376. 			string rtftext = string.Empty;
  377. 			try
  378. 			{
  379. 				rtftext = File.ReadAllText( rtffile );
  380. 				rtbox.Rtf = rtftext;
  381. 				plaintext = rtbox.Text;
  382. 			}
  383. 			catch ( IOException )
  384. 			{
  385. 				return false;
  386. 			}
  387. 			return true;
  388. 		}
  389.  
  390.  
  391. 		static bool ReadWordFile( string wordfile )
  392. 		{
  393. 			Word.Application wordapp = new Word.Application( );
  394. 			object savechanges = Word.WdSaveOptions.wdDoNotSaveChanges;
  395. 			bool success = false;
  396. 			try
  397. 			{
  398. 				wordapp.Visible = false;
  399. 				Word.Document worddoc = wordapp.Documents.Open( wordfile );
  400. 				wordapp.Selection.WholeStory( );
  401. 				plaintext = worddoc.Content.Text;
  402. 				worddoc.Close( ref savechanges );
  403. 				success = true;
  404. 			}
  405. 			catch ( Exception )
  406. 			{
  407. 				success = false;
  408. 			}
  409. 			finally
  410. 			{
  411. 				wordapp.Quit( ref savechanges );
  412. 			}
  413. 			return success;
  414. 		}
  415.  
  416.  
  417. 		static bool ReadWPDFile( string wpfile )
  418. 		{
  419. 			string wpcontent = File.ReadAllText( wpfile, Encoding.UTF8 );
  420.  
  421. 			// Remove (most of) the WPD file header - WARNING: regex pattern depends on Encoding used for StreamReader!
  422. 			Regex regex = new Regex( "^[\\w\\W]*\\000{8,}([^\\w]+[B-HJ-NP-TV-Z\\d])*[^\\w-]+", RegexOptions.IgnoreCase );
  423. 			wpcontent = regex.Replace( wpcontent, "" );
  424.  
  425. 			plaintext = string.Empty;
  426.  
  427. 			// WPD file format info based on http://justsolve.archiveteam.org/wiki/WordPerfect
  428. 			// Modified for spaces, linefeeds and e acute by yours truly
  429. 			// More modifications are required for accented characters
  430. 			bool skip = false;
  431. 			int resume = -1;
  432. 			foreach ( char c in wpcontent )
  433. 			{
  434. 				int i = (int)c;
  435. 				if ( !skip )
  436. 				{
  437. 					if ( i == 63 || i == 128 || i == 160 || i == 65533 )
  438. 					{
  439. 						plaintext += ' ';
  440. 					}
  441. 					else if ( i >= 169 && i != 172 && i <= 174 )
  442. 					{
  443. 						plaintext += '-';
  444. 					}
  445. 					else if ( i == 10 || i == 13 || i == 208 )
  446. 					{
  447. 						plaintext += Environment.NewLine;
  448. 					}
  449. 					else if ( i >= 192 && i <= 236 )
  450. 					{
  451. 						skip = true;
  452. 						resume = i;
  453. 					}
  454. 					else if ( i == 15 )
  455. 					{
  456. 						plaintext += (char)233;
  457. 					}
  458. 					else if ( i <= 31 || ( i >= 129 && i <= 159 ) || ( i >= 161 && i <= 168 ) || i == 172 || ( i >= 175 && i <= 191 ) || ( i >= 237 && i <= 255 ) )
  459. 					{
  460. 						// control characters, ignore
  461. 					}
  462. 					else
  463. 					{
  464. 						plaintext += c;
  465. 					}
  466. 				}
  467. 				else if ( skip && i == resume )
  468. 				{
  469. 					skip = false;
  470. 					resume = -1;
  471. 				}
  472. 			}
  473. 			return !string.IsNullOrWhiteSpace( plaintext );
  474. 		}
  475.  
  476.  
  477. 		static int ShowHelp( params string[] errmsg )
  478. 		{
  479. 			#region Help Text
  480.  
  481. 			/*
  482. 			Word2Txt,  Version 1.05
  483. 			Extract plain text from a Word document and send it to the screen
  484.  
  485. 			Usage:   Word2Txt    "wordfile"  [ encoding | /D ]
  486.  
  487. 			or:      Word2Txt    /E
  488.  
  489. 			Where:   wordfile    is the path of the Word document to be read
  490. 			                     (no wildcards allowed)
  491. 			         encoding    force use of alternative encoding for plain
  492. 			                     text, e.g. UTF-8 to preserve accented characters
  493. 			                     or IBM437 to convert unicode quotes to ASCII
  494. 			         /D          use the encoding specified in the document file
  495. 			                     (for .DOCX and .ODT only, if Word isn't available)
  496. 			         /E          list all available encodings
  497.  
  498. 			Notes:   If a "regular" (MSI based) Microsoft Word (2007 or later)
  499. 			         installation is detected, this program will use Word to read the
  500. 			         text from the Word file, which may be ANY file format recognized
  501. 			         by Word.
  502. 			         If Word was already active when this program is started, any other
  503. 			         opened document(s) will be left alone, and only the document opened
  504. 			         by this program will be closed.
  505. 			         If Word is not available, or if it encounters unreadable content
  506. 			         (i.e. the file is corrupted), the text can still be extracted, but
  507. 			         only from .DOC, .DOCX, .ODT, .RTF and .WPD files.
  508. 			         If the specified encoding does not match any available encoding name,
  509. 			         the program will try again, ignoring dashes; if that does not provide
  510. 			         a match, the program will try matching the specified encoding with
  511. 			         the available encodings' codepages.
  512. 			         This program requires .NET 4.5.
  513. 			         Return code ("errorlevel") 0 means Word encountered no errors and
  514. 			         some text was extracted from the file; 1 means Word is not available
  515. 			         or the file was corrupted; 2 means either command line errors or the
  516. 			         program failed to extract any text.
  517.  
  518. 			Written by Rob van der Woude
  519. 			https://www.robvanderwoude.com
  520. 			*/
  521.  
  522. 			#endregion Help Text
  523.  
  524.  
  525. 			#region Error Message
  526.  
  527. 			if ( errmsg.Length > 0 )
  528. 			{
  529. 				List<string> errargs = new List<string>( errmsg );
  530. 				errargs.RemoveAt( 0 );
  531. 				Console.Error.WriteLine( );
  532. 				Console.ForegroundColor = ConsoleColor.Red;
  533. 				Console.Error.Write( "ERROR:\t" );
  534. 				Console.ForegroundColor = ConsoleColor.White;
  535. 				Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) );
  536. 				Console.ResetColor( );
  537. 			}
  538.  
  539. 			#endregion Error Message
  540.  
  541.  
  542. 			#region Display Help Text
  543.  
  544. 			Console.Error.WriteLine( );
  545.  
  546. 			Console.Error.WriteLine( "Word2Txt,  Version {0}", progver );
  547.  
  548. 			Console.Error.WriteLine( "Extract plain text from a Word document and send it to the screen" );
  549.  
  550. 			Console.Error.WriteLine( );
  551.  
  552. 			Console.Error.Write( "Usage:   " );
  553. 			Console.ForegroundColor = ConsoleColor.White;
  554. 			Console.Error.WriteLine( "Word2Txt    \"wordfile\"  [ encoding | /D ]" );
  555. 			Console.ResetColor( );
  556.  
  557. 			Console.Error.WriteLine( );
  558.  
  559. 			Console.Error.Write( "or:      " );
  560. 			Console.ForegroundColor = ConsoleColor.White;
  561. 			Console.Error.WriteLine( "Word2Txt    /E" );
  562. 			Console.ResetColor( );
  563.  
  564. 			Console.Error.WriteLine( );
  565.  
  566. 			Console.Error.Write( "Where:   " );
  567. 			Console.ForegroundColor = ConsoleColor.White;
  568. 			Console.Error.Write( "wordfile" );
  569. 			Console.ResetColor( );
  570. 			Console.Error.WriteLine( "    is the path of the Word document to be read" );
  571.  
  572. 			Console.Error.WriteLine( "                     (no wildcards allowed)" );
  573.  
  574. 			Console.ForegroundColor = ConsoleColor.White;
  575. 			Console.Error.Write( "         encoding" );
  576. 			Console.ResetColor( );
  577. 			Console.Error.WriteLine( "    force use of alternative encoding for plain" );
  578.  
  579. 			Console.Error.Write( "                     text, e.g. " );
  580. 			Console.ForegroundColor = ConsoleColor.White;
  581. 			Console.Error.Write( "UTF-8" );
  582. 			Console.ResetColor( );
  583. 			Console.Error.WriteLine( " to preserve accented characters" );
  584.  
  585. 			Console.Error.Write( "                     or " );
  586. 			Console.ForegroundColor = ConsoleColor.White;
  587. 			Console.Error.Write( "IBM437" );
  588. 			Console.ResetColor( );
  589. 			Console.Error.WriteLine( " to convert unicode quotes to ASCII" );
  590.  
  591. 			Console.ForegroundColor = ConsoleColor.White;
  592. 			Console.Error.Write( "         /D" );
  593. 			Console.ResetColor( );
  594. 			Console.Error.WriteLine( "          use the encoding specified in the document file" );
  595.  
  596. 			Console.Error.WriteLine( "                     (for .DOCX and .ODT only, if Word isn't available)" );
  597.  
  598. 			Console.ForegroundColor = ConsoleColor.White;
  599. 			Console.Error.Write( "         /E" );
  600. 			Console.ResetColor( );
  601. 			Console.Error.WriteLine( "          list all available encodings" );
  602.  
  603. 			Console.Error.WriteLine( );
  604.  
  605. 			Console.Error.WriteLine( "Notes:   If a \"regular\" (MSI based) Microsoft Word (2007 or later)" );
  606.  
  607. 			Console.Error.WriteLine( "         installation is detected, this program will use Word to read the" );
  608.  
  609. 			Console.Error.WriteLine( "         recognized text from the Word file, which may be ANY file format" );
  610.  
  611. 			Console.Error.WriteLine( "         by Word." );
  612.  
  613. 			Console.Error.WriteLine( "         If Word was already active when this program is started, any other" );
  614.  
  615. 			Console.Error.WriteLine( "         opened document(s) will be left alone, and only the document opened" );
  616.  
  617. 			Console.Error.WriteLine( "         by this program will be closed." );
  618.  
  619. 			Console.Error.WriteLine( "         If Word is not available, or if it encounters unreadable content" );
  620.  
  621. 			Console.Error.WriteLine( "         (i.e. the file is corrupted), the text can still be extracted, but" );
  622.  
  623. 			Console.Error.WriteLine( "         only from .DOC, .DOCX, .ODT, .RTF and .WPD files." );
  624.  
  625. 			Console.Error.WriteLine( "         If the specified encoding does not match any available encoding name," );
  626.  
  627. 			Console.Error.WriteLine( "         the program will try again, ignoring dashes; if that does not provide" );
  628.  
  629. 			Console.Error.WriteLine( "         a match, the program will try matching the specified encoding with" );
  630.  
  631. 			Console.Error.WriteLine( "         the available encodings' codepages." );
  632.  
  633. 			Console.Error.WriteLine( "         This program requires .NET 4.5." );
  634.  
  635. 			Console.Error.WriteLine( "         Return code (\"errorlevel\") 0 means Word encountered no errors and" );
  636.  
  637. 			Console.Error.WriteLine( "         some text was extracted from the file; 1 means Word is not available" );
  638.  
  639. 			Console.Error.WriteLine( "         or the file was corrupted; 2 means either command line errors or the" );
  640.  
  641. 			Console.Error.WriteLine( "         program failed to extract any text." );
  642.  
  643. 			Console.Error.WriteLine( );
  644.  
  645. 			Console.Error.WriteLine( "Written by Rob van der Woude" );
  646.  
  647. 			Console.Error.WriteLine( "https://www.robvanderwoude.com" );
  648.  
  649. 			#endregion Display Help Text
  650.  
  651.  
  652. 			return 2;
  653. 		}
  654. 	}
  655. }

page last modified: 2024-04-16; loaded in 0.0099 seconds