Rob van der Woude's Scripting Pages
Powered by GeSHi

Source code for testsitemap.cs

(view source code of testsitemap.cs as plain text)

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.IO;
  5. using System.Net;
  6. using System.Threading;
  7. using System.Xml;
  8.  
  9.  
  10. namespace RobvanderWoude
  11. {
  12. 	internal class TestSitemap
  13. 	{
  14. 		static readonly string progver = "1.04";
  15.  
  16. 		static int delay = 250;
  17.  
  18.  
  19. 		static int Main( string[] args )
  20. 		{
  21. 			#region Initialize Variables
  22.  
  23. 			string sitemap = string.Empty;
  24. 			string title = Console.Title;
  25. 			int urlcount = 0;
  26. 			int errorcount = 0;
  27. 			int redirectscount = 0;
  28. 			bool countredirectsaserrors = false;
  29. 			bool excludequeries = false;
  30. 			bool excludelanguage = false;
  31. 			bool quietmode = false;
  32. 			Stopwatch stopwatch = new Stopwatch( );
  33.  
  34.  
  35. 			#endregion Initialize Variables
  36.  
  37.  
  38. 			#region Command Line Arguments
  39.  
  40. 			if ( args.Length == 0 || args.Length > 4 )
  41. 			{
  42. 				return ShowHelp( );
  43. 			}
  44.  
  45. 			foreach ( string arg in args )
  46. 			{
  47. 				if ( arg[0] != '/' )
  48. 				{
  49. 					if ( !string.IsNullOrWhiteSpace( sitemap ) )
  50. 					{
  51. 						return ShowHelp( "Duplicate sitemap argument \"{0}\"", arg );
  52. 					}
  53. 					if ( !File.Exists( arg ) )
  54. 					{
  55. 						return ShowHelp( "File \"{0}\" not found", arg );
  56. 					}
  57. 					if ( !QuickTestXML( arg ) )
  58. 					{
  59. 						return ShowHelp( "File \"{0}\" is not a valid XML file", arg );
  60. 					}
  61. 					sitemap = arg;
  62. 				}
  63. 				else if ( arg == "/?" )
  64. 				{
  65. 					return ShowHelp( );
  66. 				}
  67. 				else if ( arg.Length > 3 && arg.ToUpper( ).StartsWith( "/D:" ) )
  68. 				{
  69. 					if ( delay != 250 )
  70. 					{
  71. 						return ShowHelp( "Duplicate command line switch /D" );
  72. 					}
  73. 					if ( !Int32.TryParse( arg.Substring( 3 ), out delay ) )
  74. 					{
  75. 						return ShowHelp( "Invalid delay value \"{0}\"", arg );
  76. 					}
  77. 					delay = Math.Max( delay, 250 );
  78. 				}
  79. 				else if ( arg.ToUpper( ) == "/Q" )
  80. 				{
  81. 					if ( quietmode )
  82. 					{
  83. 						return ShowHelp( "Duplicate command line switch /Q" );
  84. 					}
  85. 					quietmode = true;
  86. 				}
  87. 				else if ( arg.ToUpper( ) == "/R" )
  88. 				{
  89. 					if ( countredirectsaserrors )
  90. 					{
  91. 						return ShowHelp( "Duplicate command line switch /R" );
  92. 					}
  93. 					countredirectsaserrors = true;
  94. 				}
  95. 				else if ( arg.ToUpper( ) == "/XL" )
  96. 				{
  97. 					if ( excludelanguage )
  98. 					{
  99. 						return ShowHelp( "Duplicate command line switch /XL" );
  100. 					}
  101. 					excludelanguage = true;
  102. 				}
  103. 				else if ( arg.ToUpper( ) == "/XQ" )
  104. 				{
  105. 					if ( excludequeries )
  106. 					{
  107. 						return ShowHelp( "Duplicate command line switch /XQ" );
  108. 					}
  109. 					excludequeries = true;
  110. 				}
  111. 				else
  112. 				{
  113. 					return ShowHelp( "Invalid command line switch \"{0}\'", arg );
  114. 				}
  115. 			}
  116.  
  117. 			// No queries? No language
  118. 			excludelanguage = excludelanguage || excludequeries;
  119.  
  120. 			#endregion Command Line Arguments
  121.  
  122.  
  123. 			stopwatch.Start( );
  124.  
  125.  
  126. 			#region Read XML and Test URLs
  127.  
  128. 			Console.Title = "Reading sitemap";
  129. 			int matchingurls = 0;
  130. 			XmlReader testxml = XmlReader.Create( sitemap );
  131. 			while ( testxml.Read( ) )
  132. 			{
  133. 				if ( testxml.NodeType == XmlNodeType.Text )
  134. 				{
  135. 					if ( testxml.Value.StartsWith( "http" ) )
  136. 					{
  137. 						string url = testxml.Value;
  138. 						if ( !( excludequeries && url.Contains( "?" ) ) )
  139. 						{
  140. 							if ( !( excludelanguage && ( url.Contains( "?lang=" ) || url.Contains( "&lang=" ) || url.Contains( "&lang=" ) ) ) )
  141. 							{
  142. 								matchingurls++;
  143. 							}
  144. 						}
  145. 					}
  146. 				}
  147. 			}
  148. 			testxml.Close( );
  149.  
  150. 			Console.Title = string.Format( "  0% tested, 0 errors and 0 redirections in 0 of {0} URLs so far", matchingurls );
  151. 			int percentage = 0;
  152. 			SortedList<string, int> results = new SortedList<string, int>( );
  153. 			XmlReader xml = XmlReader.Create( sitemap );
  154. 			while ( xml.Read( ) )
  155. 			{
  156. 				if ( xml.NodeType == XmlNodeType.Text )
  157. 				{
  158. 					if ( xml.Value.StartsWith( "http" ) )
  159. 					{
  160. 						string url = xml.Value;
  161. 						if ( !( excludequeries && url.Contains( "?" ) ) )
  162. 						{
  163. 							if ( !( excludelanguage && ( url.Contains( "?lang=" ) || url.Contains( "&lang=" ) || url.Contains( "&amp;lang=" ) ) ) )
  164. 							{
  165. 								int result = WebTest( url );
  166. 								urlcount++;
  167. 								results[url] = result;
  168. 								if ( result == 200 )
  169. 								{
  170. 									if ( !quietmode )
  171. 									{
  172. 										Console.ForegroundColor = ConsoleColor.Green;
  173. 										Console.WriteLine( "{0}\t{1}", result, url );
  174. 									}
  175. 								}
  176. 								else if ( result > 300 && result < 400 )
  177. 								{
  178. 									redirectscount++;
  179. 									Console.ForegroundColor = ConsoleColor.DarkYellow;
  180. 									Console.WriteLine( "{0}\t{1}", result, url );
  181. 								}
  182. 								else
  183. 								{
  184. 									errorcount++;
  185. 									Console.ForegroundColor = ConsoleColor.Red;
  186. 									Console.WriteLine( "{0}\t{1}", result, url );
  187. 								}
  188. 								Console.ResetColor( );
  189. 							}
  190. 						}
  191. 						percentage = (int) ( urlcount * 100 / matchingurls );
  192. 						Console.Title = string.Format( "{0,3}% tested, {1} errors and {2} redirections in {3} of {4} URLs so far", percentage, errorcount, redirectscount, urlcount, matchingurls );
  193. 					}
  194. 				}
  195. 			}
  196.  
  197. 			xml.Close( );
  198. 			Console.Title = title;
  199.  
  200. 			#endregion Read XML and Test URLs
  201.  
  202.  
  203. 			#region Show Summary
  204.  
  205. 			stopwatch.Stop( );
  206. 			Console.WriteLine( "\nTesting {0} URLs took {1:0.0} seconds, {2} errors and {3} redirections encountered", urlcount, stopwatch.Elapsed.TotalSeconds, errorcount, redirectscount );
  207.  
  208. 			if ( errorcount > 0 || ( redirectscount > 0 && countredirectsaserrors ) )
  209. 			{
  210. 				string message = string.Format( "\n{0} error{1} and {2} redirection{3} encountered:", errorcount, ( errorcount == 1 ? "" : "s" ), redirectscount, ( redirectscount == 1 ? "" : "s" ) );
  211. 				Console.WriteLine( "\n" );
  212. 				Console.WriteLine( message );
  213. 				Console.WriteLine( new string( '=', message.Length ) );
  214. 				foreach ( KeyValuePair<string, int> result in results )
  215. 				{
  216. 					if ( result.Value != 200 )
  217. 					{
  218. 						Console.WriteLine( "{0}\t{1}", result.Value, result.Key );
  219. 					}
  220. 				}
  221. 			}
  222.  
  223. 			#endregion Show Summary
  224.  
  225.  
  226. 			int rc = errorcount;
  227. 			if ( countredirectsaserrors )
  228. 			{
  229. 				rc += redirectscount;
  230. 			}
  231. 			return rc;
  232. 		}
  233.  
  234.  
  235. 		static bool QuickTestXML( string file )
  236. 		{
  237. 			// Check if the file starts with "<?xml"
  238. 			StreamReader test = new StreamReader( file );
  239. 			string firstline = test.ReadLine( );
  240. 			test.Close( );
  241. 			return ( firstline.StartsWith( "<?xml", StringComparison.OrdinalIgnoreCase ) );
  242. 		}
  243.  
  244.  
  245. 		static int WebTest( string url )
  246. 		{
  247. 			Thread.Sleep( 250 );
  248. 			try
  249. 			{
  250. 				// Next 3 lines allow secure connections
  251. 				ServicePointManager.Expect100Continue = true;
  252. 				ServicePointManager.SecurityProtocol |= SecurityProtocolType.Tls12;
  253. 				ServicePointManager.SecurityProtocol |= SecurityProtocolType.Ssl3;
  254.  
  255. 				HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create( url );
  256. 				webrequest.AllowAutoRedirect = false;
  257. 				HttpWebResponse response = (HttpWebResponse)webrequest.GetResponse( );
  258. 				webrequest.Abort( );
  259.  
  260. 				return (int)response.StatusCode;
  261. 			}
  262. 			catch ( Exception e )
  263. 			{
  264. 				Console.Error.WriteLine( e.Message );
  265. 				return 0;
  266. 			}
  267. 		}
  268.  
  269.  
  270. 		#region Error handling
  271.  
  272. 		public static int ShowHelp( params string[] errmsg )
  273. 		{
  274. 			#region Error Message
  275.  
  276. 			if ( errmsg.Length > 0 )
  277. 			{
  278. 				List<string> errargs = new List<string>( errmsg );
  279. 				errargs.RemoveAt( 0 );
  280. 				Console.Error.WriteLine( );
  281. 				Console.ForegroundColor = ConsoleColor.Red;
  282. 				Console.Error.Write( "ERROR:\t" );
  283. 				Console.ForegroundColor = ConsoleColor.White;
  284. 				Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) );
  285. 				Console.ResetColor( );
  286. 			}
  287.  
  288. 			#endregion Error Message
  289.  
  290.  
  291. 			#region Help Text
  292.  
  293. 			/*
  294. 			TestSitemap.exe,  Version 1.04
  295. 			Test all URLs encountered in an XML sitemap
  296.  
  297. 			Usage:   TestSitemap.exe  sitemap  [ options ]
  298.  
  299. 			Where:   sitemap    path of XML sitemap file
  300.  
  301. 			Options: /D:nn      Delay of nn milliseconds between URL tests
  302. 			                    (default: 250 ms)
  303. 			         /Q         Quiet mode: display errors and redirections only
  304. 			                    (default: show all)
  305. 			         /R         Redirects count as errors (default: redirects are
  306. 			                    displayed as such but not counted as errors)
  307. 			         /XL        eXclude Language specifications, e.g. ?lang=en
  308. 			         /XQ        eXclude all Queries, i.e. "?" and everything after that
  309. 			                    (/XQ automatically implies /XL as well)
  310.  
  311. 			Note:    Return code equals the number of failed URL tests, or -1 in case
  312. 			         of command line errors.
  313.  
  314. 			Written by Rob van der Woude
  315. 			https://www.robvanderwoude.com
  316. 			*/
  317.  
  318. 			#endregion Help Text
  319.  
  320.  
  321. 			#region Display help
  322.  
  323. 			Console.Error.WriteLine( );
  324.  
  325. 			Console.Error.WriteLine( "TestSitemap.exe,  Version {0}", progver );
  326.  
  327. 			Console.Error.WriteLine( "Test all URLs encountered in an XML sitemap" );
  328.  
  329. 			Console.Error.WriteLine( );
  330.  
  331. 			Console.Error.Write( "Usage:   " );
  332. 			Console.ForegroundColor = ConsoleColor.White;
  333. 			Console.Error.WriteLine( "TestSitemap.exe  sitemap  [ options ]" );
  334. 			Console.ResetColor( );
  335.  
  336. 			Console.Error.WriteLine( );
  337.  
  338. 			Console.Error.Write( "Where:   " );
  339. 			Console.ForegroundColor = ConsoleColor.White;
  340. 			Console.Error.Write( "sitemap" );
  341. 			Console.ResetColor( );
  342. 			Console.Error.Write( "    path of XML " );
  343. 			Console.ForegroundColor = ConsoleColor.White;
  344. 			Console.Error.Write( "sitemap" );
  345. 			Console.ResetColor( );
  346. 			Console.Error.WriteLine( " file" );
  347.  
  348. 			Console.Error.WriteLine( );
  349.  
  350. 			Console.Error.Write( "Options: " );
  351. 			Console.ForegroundColor = ConsoleColor.White;
  352. 			Console.Error.Write( "/D:nn      D" );
  353. 			Console.ResetColor( );
  354. 			Console.Error.Write( "elay of " );
  355. 			Console.ForegroundColor = ConsoleColor.White;
  356. 			Console.Error.Write( "nn" );
  357. 			Console.ResetColor( );
  358. 			Console.Error.WriteLine( " milliseconds between URL tests" );
  359.  
  360. 			Console.Error.WriteLine( "                    (default: 250 ms)" );
  361.  
  362. 			Console.ForegroundColor = ConsoleColor.White;
  363. 			Console.Error.Write( "         /Q         Q" );
  364. 			Console.ResetColor( );
  365. 			Console.Error.WriteLine( "uiet mode: display errors and redirections only" );
  366.  
  367. 			Console.Error.WriteLine( "                    (default: show all)" );
  368.  
  369. 			Console.ForegroundColor = ConsoleColor.White;
  370. 			Console.Error.Write( "         /R         R" );
  371. 			Console.ResetColor( );
  372. 			Console.Error.WriteLine( "edirects count as errors (default: redirects are" );
  373.  
  374. 			Console.Error.WriteLine( "                    displayed as such but not counted as errors)" );
  375.  
  376. 			Console.ForegroundColor = ConsoleColor.White;
  377. 			Console.Error.Write( "         /XL" );
  378. 			Console.ResetColor( );
  379. 			Console.Error.Write( "        e" );
  380. 			Console.ForegroundColor = ConsoleColor.White;
  381. 			Console.Error.Write( "X" );
  382. 			Console.ResetColor( );
  383. 			Console.Error.Write( "clude " );
  384. 			Console.ForegroundColor = ConsoleColor.White;
  385. 			Console.Error.Write( "L" );
  386. 			Console.ResetColor( );
  387. 			Console.Error.WriteLine( "anguage specifications, e.g. ?lang=en" );
  388.  
  389. 			Console.ForegroundColor = ConsoleColor.White;
  390. 			Console.Error.Write( "         /XQ" );
  391. 			Console.ResetColor( );
  392. 			Console.Error.Write( "        e" );
  393. 			Console.ForegroundColor = ConsoleColor.White;
  394. 			Console.Error.Write( "X" );
  395. 			Console.ResetColor( );
  396. 			Console.Error.Write( "clude all " );
  397. 			Console.ForegroundColor = ConsoleColor.White;
  398. 			Console.Error.Write( "Q" );
  399. 			Console.ResetColor( );
  400. 			Console.Error.WriteLine( "ueries, i.e. \"?\" and everything after that" );
  401.  
  402. 			Console.Error.Write( "                    (" );
  403. 			Console.ForegroundColor = ConsoleColor.White;
  404. 			Console.Error.Write( "/XQ" );
  405. 			Console.ResetColor( );
  406. 			Console.Error.Write( " automatically implies " );
  407. 			Console.ForegroundColor = ConsoleColor.White;
  408. 			Console.Error.Write( "/XL" );
  409. 			Console.ResetColor( );
  410. 			Console.Error.WriteLine( " as well)" );
  411.  
  412. 			Console.Error.WriteLine( );
  413.  
  414. 			Console.Error.WriteLine( "Note:    Return code equals the number of failed URL tests, or -1 in case" );
  415.  
  416. 			Console.Error.WriteLine( "         of command line errors." );
  417.  
  418. 			Console.Error.WriteLine( );
  419.  
  420. 			Console.Error.WriteLine( "Written by Rob van der Woude" );
  421.  
  422. 			Console.Error.WriteLine( "https://www.robvanderwoude.com" );
  423.  
  424. 			#endregion Display Help
  425.  
  426.  
  427. 			return -1;
  428. 		}
  429.  
  430. 		#endregion Error handling
  431. 	}
  432. }

page last modified: 2024-04-16; loaded in 0.0123 seconds