Rob van der Woude's Scripting Pages
Powered by GeSHi

Source code for dedup.cs

(view source code of dedup.cs as plain text)

  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Text.RegularExpressions;
  5.  
  6.  
  7. namespace RobvanderWoude
  8. {
  9. 	class DeDup
  10. 	{
  11. 		static string progver = "1.02";
  12.  
  13.  
  14. 		static int Main( string[] args )
  15. 		{
  16. 			#region Initialize Variables
  17.  
  18. 			bool ignorecase = false;
  19. 			bool ignorewhitespace = false;
  20. 			bool isredirected = Console.IsInputRedirected; // Requires .NET Framework 4.5
  21. 			bool returnduplicates = false;
  22. 			bool sortoutput = false;
  23. 			bool trimoutput = false;
  24. 			int redirectnum = ( isredirected ? 1 : 0 );
  25. 			int arguments = args.Length + redirectnum;
  26. 			int rc = 0;
  27. 			string filename = string.Empty;
  28. 			string input = String.Empty;
  29.  
  30. 			#endregion Initialize Variables
  31.  
  32.  
  33. 			#region Command Line Parsing
  34.  
  35. 			if ( arguments == 0 )
  36. 			{
  37. 				return ShowHelp( );
  38. 			}
  39. 			if ( arguments > 1 )
  40. 			{
  41. 				for ( int i = 1 - redirectnum; i < args.Length; i++ )
  42. 				{
  43. 					if ( args[i][0] != '/' || args[i].Length < 2 )
  44. 					{
  45. 						return ShowHelp( "Invalid command line argument \"{0}\"", args[i] );
  46. 					}
  47. 					switch ( args[i][1].ToString( ).ToUpper( ) )
  48. 					{
  49. 						case "C":
  50. 							if ( ignorecase )
  51. 							{
  52. 								return ShowHelp( "Duplicate command line switch /C" );
  53. 							}
  54. 							ignorecase = true;
  55. 							break;
  56. 						case "/R":
  57. 							if ( returnduplicates )
  58. 							{
  59. 								return ShowHelp( "Duplicate command line switch /R" );
  60. 							}
  61. 							returnduplicates = true;
  62. 							break;
  63. 						case "S":
  64. 							if ( sortoutput )
  65. 							{
  66. 								return ShowHelp( "Duplicate command line switch /S" );
  67. 							}
  68. 							sortoutput = true;
  69. 							break;
  70. 						case "T":
  71. 							if ( trimoutput )
  72. 							{
  73. 								return ShowHelp( "Duplicate command line switch /T" );
  74. 							}
  75. 							trimoutput = true;
  76. 							break;
  77. 						case "W":
  78. 							if ( ignorewhitespace )
  79. 							{
  80. 								return ShowHelp( "Duplicate command line switch /W" );
  81. 							}
  82. 							ignorewhitespace = true;
  83. 							break;
  84. 						default:
  85. 							return ShowHelp( "Invalid command line switch {0}", args[i] );
  86. 					}
  87. 				}
  88. 			}
  89. 			if ( isredirected )
  90. 			{
  91. 				// Read the redirected Standard Input
  92. 				input = Console.In.ReadToEnd( );
  93. 			}
  94. 			else
  95. 			{
  96. 				filename = args[0];
  97. 				// Check if the file name is valid
  98. 				if ( filename.IndexOf( "/" ) > -1 )
  99. 				{
  100. 					return ShowHelp( );
  101. 				}
  102.  
  103. 				if ( filename.IndexOfAny( "?*".ToCharArray( ) ) > -1 )
  104. 				{
  105. 					return ShowHelp( "Wildcards not allowed" );
  106. 				}
  107.  
  108. 				// Check if the file exists
  109. 				if ( File.Exists( filename ) )
  110. 				{
  111. 					// Read the file content
  112. 					using ( StreamReader file = new StreamReader( filename ) )
  113. 					{
  114. 						input = file.ReadToEnd( );
  115. 					}
  116. 				}
  117. 				else
  118. 				{
  119. 					return ShowHelp( "File not found: \"" + filename + "\"" );
  120. 				}
  121. 			}
  122.  
  123. 			#endregion Command Line Parsing
  124.  
  125.  
  126. 			#region Check Each Line
  127.  
  128. 			List<string> deduplines = new List<string>( );
  129. 			Regex regex = new Regex( @"[\t ]+" );
  130. 			foreach ( string line in input.Split( "\n\r".ToCharArray( ) ) )
  131. 			{
  132. 				string checkline = line;
  133. 				if ( ignorewhitespace )
  134. 				{
  135. 					checkline = regex.Replace( checkline, " " );
  136. 				}
  137. 				if ( trimoutput )
  138. 				{
  139. 					checkline = checkline.Trim( );
  140. 				}
  141. 				if ( !String.IsNullOrWhiteSpace( checkline ) || !ignorewhitespace )
  142. 				{
  143. 					if ( ignorecase )
  144. 					{
  145. 						bool found = false;
  146. 						foreach ( string storedline in deduplines )
  147. 						{
  148. 							if ( storedline.ToLower( ) == checkline.ToLower( ) )
  149. 							{
  150. 								found = true;
  151. 								rc++;
  152. 							}
  153. 						}
  154. 						if ( !found )
  155. 						{
  156. 							deduplines.Add( checkline );
  157. 						}
  158. 					}
  159. 					else
  160. 					{
  161. 						if ( !deduplines.Contains( checkline ) )
  162. 						{
  163. 							deduplines.Add( checkline );
  164. 							rc++;
  165. 						}
  166. 					}
  167. 				}
  168. 			}
  169.  
  170. 			#endregion Check Each Line
  171.  
  172.  
  173. 			#region Display Results
  174.  
  175. 			if ( sortoutput )
  176. 			{
  177. 				deduplines.Sort( );
  178. 			}
  179.  
  180. 			foreach ( string line in deduplines )
  181. 			{
  182. 				Console.WriteLine( line );
  183. 			}
  184.  
  185. 			#endregion Display Results
  186.  
  187.  
  188. 			return rc;
  189. 		}
  190.  
  191.  
  192. 		// Displays help text
  193. 		static int ShowHelp( params string[] errmsg )
  194. 		{
  195. 			#region Error Message
  196.  
  197. 			if ( errmsg.Length > 0 )
  198. 			{
  199. 				List<string> errargs = new List<string>( errmsg );
  200. 				errargs.RemoveAt( 0 );
  201. 				Console.Error.WriteLine( );
  202. 				Console.ForegroundColor = ConsoleColor.Red;
  203. 				Console.Error.Write( "ERROR:\t" );
  204. 				Console.ForegroundColor = ConsoleColor.White;
  205. 				Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) );
  206. 				Console.ResetColor( );
  207. 			}
  208.  
  209. 			#endregion Error Message
  210.  
  211. 			#region Help Text
  212.  
  213. 			/*
  214. 			DeDup.exe,  Version 1.02
  215. 			Remove duplicate lines from a text file or from redirected input
  216.  
  217. 			Usage:   DeDup.exe  filename  [ options ]
  218. 			   or:   some_command  |  DeDup.exe  [ options ]
  219.  
  220. 			Where:   filename      file to be investigated
  221. 			         some_command  command whose Standard Output is to be investigated
  222.  
  223. 			Options: /C            ignore Case
  224. 			         /R            Return code equals number of duplicates removed
  225. 			         /S            Sort results
  226. 			         /T            Trim leading and trailing whitespace from output
  227. 			         /W            ignore Whitespace (any combination of tabs and/or
  228. 			                       spaces will be replaced by a single space in output,
  229. 			                       empty lines or lines containing only whitespace will
  230. 			                       be removed from output)
  231.  
  232. 			Notes:   The filtered output is sent to the screen (Standard Output).
  233. 			         In case of duplicate lines, only the first match is returned.
  234. 			         Return code ("errorlevel") equals the number of removed
  235. 			         duplicates if /R is used, -1 in case of errors, or 0 otherwise.
  236. 			         This version of the program requires .NET Framework 4.5.
  237.  
  238. 			Written by Rob van der Woude
  239. 			http://www.robvanderwoude.com
  240. 			*/
  241.  
  242. 			Console.Error.WriteLine( );
  243.  
  244. 			Console.Error.WriteLine( );
  245.  
  246. 			Console.Error.WriteLine( "DeDup,  Version {0}", progver );
  247.  
  248. 			Console.Error.WriteLine( "Remove duplicate lines from a text file or from redirected input" );
  249.  
  250. 			Console.Error.WriteLine( );
  251.  
  252. 			Console.Error.Write( "Usage:   " );
  253. 			Console.ForegroundColor = ConsoleColor.White;
  254. 			Console.Error.WriteLine( "DeDup.exe  filename  [ options ]" );
  255. 			Console.ResetColor( );
  256.  
  257. 			Console.Error.Write( "   or:   some_command  " );
  258. 			Console.ForegroundColor = ConsoleColor.White;
  259. 			Console.Error.WriteLine( "|  DeDup.exe  [ options ]" );
  260. 			Console.ResetColor( );
  261.  
  262. 			Console.Error.WriteLine( );
  263.  
  264. 			Console.Error.Write( "Where:   " );
  265. 			Console.ForegroundColor = ConsoleColor.White;
  266. 			Console.Error.Write( "filename" );
  267. 			Console.ResetColor( );
  268. 			Console.Error.WriteLine( "      file to be investigated" );
  269.  
  270. 			Console.ForegroundColor = ConsoleColor.White;
  271. 			Console.Error.Write( "         some_command" );
  272. 			Console.ResetColor( );
  273. 			Console.Error.WriteLine( "  command whose Standard Output is to be investigated" );
  274.  
  275. 			Console.Error.WriteLine( );
  276.  
  277. 			Console.Error.Write( "Options: " );
  278. 			Console.ForegroundColor = ConsoleColor.White;
  279. 			Console.Error.Write( "/C" );
  280. 			Console.ResetColor( );
  281. 			Console.Error.Write( "            ignore " );
  282. 			Console.ForegroundColor = ConsoleColor.White;
  283. 			Console.Error.Write( "C" );
  284. 			Console.ResetColor( );
  285. 			Console.Error.WriteLine( "ase" );
  286.  
  287. 			Console.ForegroundColor = ConsoleColor.White;
  288. 			Console.Error.Write( "         /R            R" );
  289. 			Console.ResetColor( );
  290. 			Console.Error.WriteLine( "eturn code equals number of duplicates removed" );
  291.  
  292. 			Console.ForegroundColor = ConsoleColor.White;
  293. 			Console.Error.Write( "         /S            S" );
  294. 			Console.ResetColor( );
  295. 			Console.Error.WriteLine( "ort results" );
  296.  
  297. 			Console.ForegroundColor = ConsoleColor.White;
  298. 			Console.Error.Write( "         /T            T" );
  299. 			Console.ResetColor( );
  300. 			Console.ResetColor( );
  301. 			Console.Error.WriteLine( "rim leading and trailing whitespace from output" );
  302.  
  303. 			Console.ForegroundColor = ConsoleColor.White;
  304. 			Console.Error.Write( "         /W" );
  305. 			Console.ResetColor( );
  306. 			Console.Error.Write( "            ignore " );
  307. 			Console.ForegroundColor = ConsoleColor.White;
  308. 			Console.Error.Write( "W" );
  309. 			Console.ResetColor( );
  310. 			Console.Error.WriteLine( "hitespace (any combination of tabs and/or" );
  311.  
  312. 			Console.Error.WriteLine( "                       spaces will be replaced by a single space in output," );
  313.  
  314. 			Console.Error.WriteLine( "                       empty lines or lines containing only whitespace will" );
  315.  
  316. 			Console.Error.WriteLine( "                       be removed from output)" );
  317.  
  318. 			Console.Error.WriteLine( );
  319.  
  320. 			Console.Error.WriteLine( "Notes:   The filtered output is sent to the screen (Standard Output)." );
  321.  
  322. 			Console.Error.WriteLine( "         In case of duplicate lines, only the first match is returned." );
  323.  
  324. 			Console.Error.WriteLine( "         Return code (\"errorlevel\") equals the number of removed" );
  325.  
  326. 			Console.Error.Write( "         duplicates if " );
  327. 			Console.ForegroundColor = ConsoleColor.White;
  328. 			Console.Error.Write( "/R" );
  329. 			Console.ResetColor( );
  330. 			Console.Error.WriteLine( " is used, -1 in case of errors, or 0 otherwise." );
  331.  
  332. 			Console.Error.WriteLine( "         This version of the program requires .NET Framework 4.5." );
  333.  
  334. 			Console.Error.WriteLine( );
  335.  
  336. 			Console.Error.WriteLine( "Written by Rob van der Woude" );
  337.  
  338. 			Console.Error.WriteLine( "http://www.robvanderwoude.com" );
  339.  
  340. 			#endregion Help Text
  341.  
  342. 			return -1;
  343. 		}
  344. 	}
  345. }
  346.  

page last modified: 2024-04-16; loaded in 0.0095 seconds