Rob van der Woude's Scripting Pages
Powered by GeSHi

Source code for wgetie.cs

(view source code of wgetie.cs as plain text)

  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Text;
  5. using System.Threading;
  6.  
  7.  
  8. namespace RobvanderWoude
  9. {
  10. 	class WGetIE
  11. 	{
  12. 		static string progver = "1.02";
  13.  
  14.  
  15. 		static int Main( string[] args )
  16. 		{
  17. 			string url = null;
  18. 			string file = null;
  19. 			int timeout = 5;
  20. 			int width = 1000000;
  21. 			int height = 1000000;
  22.  
  23.  
  24. 			#region Command Line Parsing
  25.  
  26. 			if ( args.Length > 1 && args.Length < 5 )
  27. 			{
  28. 				url = args[0];
  29. 				file = args[1];
  30. 				if ( !url.StartsWith( "http://" ) && !url.StartsWith( "https://" ) )
  31. 				{
  32. 					return ErrorMessage( "Invalid URL specified:\n\t\'{0}\"", url );
  33. 				}
  34. 				try
  35. 				{
  36. 					file = Path.GetFullPath( file );
  37. 					string parentfolder = Directory.GetParent( file ).FullName;
  38. 					if ( !Directory.Exists( parentfolder ) )
  39. 					{
  40. 						return ErrorMessage( "Invalid folder specified:\n\t\"{0}\"", parentfolder );
  41. 					}
  42. 				}
  43. 				catch
  44. 				{
  45. 					return ErrorMessage( "Invalid file specified:\n\t\"{0}\"", file );
  46. 				}
  47. 				if ( args.Length > 2 )
  48. 				{
  49. 					try
  50. 					{
  51. 						timeout = Convert.ToInt32( args[2] );
  52. 					}
  53. 					catch ( Exception )
  54. 					{
  55. 						return ErrorMessage( "Invalid timeout: \"{0}\"", args[2] );
  56. 					}
  57. 					if ( timeout < 5 )
  58. 					{
  59. 						return ErrorMessage( "Invalid timeout: \"{0}\"", timeout.ToString( ) );
  60. 					}
  61. 				}
  62. 				if ( args.Length > 3 )
  63. 				{
  64. 					try
  65. 					{
  66. 						width = Convert.ToInt32( args[3] );
  67. 					}
  68. 					catch ( Exception )
  69. 					{
  70. 						return ErrorMessage( "Invalid width: \"{0}\"", args[3] );
  71. 					}
  72. 					if ( width < 240 )
  73. 					{
  74. 						return ErrorMessage( "Invalid width: \"{0}\"", width.ToString( ) );
  75. 					}
  76. 				}
  77. 			}
  78. 			else
  79. 			{
  80. 				return ErrorMessage( );
  81. 			}
  82. 			if ( String.IsNullOrEmpty( file ) || String.IsNullOrEmpty( url ) )
  83. 			{
  84. 				return 1;
  85. 			}
  86.  
  87. 			#endregion Command Line Parsing
  88.  
  89.  
  90. 			#region Read URL
  91.  
  92. 			SHDocVw.InternetExplorer ie = new SHDocVw.InternetExplorer( );
  93. 			string useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko";
  94. 			string userlanguage = "en-US,en-UK;q=0.5";
  95. 			string html = String.Empty;
  96. 			int digits = timeout.ToString( ).Length;
  97. 			try
  98. 			{
  99. 				ie.Navigate( url, Type.Missing, Type.Missing, Type.Missing, String.Format( "User-Agent: {0}; Accept-Language: {1};", useragent, userlanguage ) );
  100. 				ie.Height = height;
  101. 				ie.Width = width;
  102. 				Console.Write( new String( ' ', digits ) );
  103. 				for ( int i = timeout; i > 0; i-- )
  104. 				{
  105. 					Console.Write( new String( '\b', digits ) );
  106. 					Console.Write( new String( ' ', digits ) );
  107. 					Console.Write( new String( '\b', digits ) );
  108. 					Console.Write( String.Format( "{0,-" + digits + "}", i ) );
  109. 					Thread.Sleep( 1000 );
  110. 				}
  111. 				Console.Write( new String( '\b', digits ) );
  112. 				Console.Write( new String( ' ', digits ) );
  113. 				Console.Write( new String( '\b', digits ) );
  114. 				html = ie.Document.Body.innerHTML;
  115. 			}
  116. 			catch ( Exception e )
  117. 			{
  118. 				return ErrorMessage( e.Message );
  119. 			}
  120. 			ie.Quit( );
  121.  
  122. 			#endregion Read URL
  123.  
  124.  
  125. 			#region Write to File
  126.  
  127. 			if ( String.IsNullOrWhiteSpace( html ) )
  128. 			{
  129. 				return ErrorMessage( "No text could be retrieved from the specified URL" );
  130. 			}
  131. 			else
  132. 			{
  133. 				StreamWriter sw = new StreamWriter( file, false, Encoding.UTF8 );
  134. 				sw.Write( html );
  135. 				sw.Close( );
  136. 				return 0;
  137. 			}
  138.  
  139. 			#endregion Write to File
  140. 		}
  141.  
  142.  
  143. 		static int ErrorMessage( params string[] errmsg )
  144. 		{
  145. 			/*
  146. 			WGetIE.exe,  Version 1.02
  147. 			Save a web page to a file, using Internet Explorer
  148.  
  149. 			Usage:    WGetIE.exe  url  file  [ seconds  [ width ] ]
  150.  
  151. 			Where:    url       is URL of the page to save
  152. 			          file      is the output file
  153. 			          seconds   is the timeout in seconds
  154. 			                    (minimum: 5; default: 5)
  155. 			          width     is the virtual browser window width
  156. 			                    (minimum: 240; default: 1,000,000)
  157.  
  158. 			Written by Rob van der Woude
  159. 			http://www.robvanderwoude.com
  160. 			*/
  161. 			if ( errmsg.Length > 0 )
  162. 			{
  163. 				List<string> errargs = new List<string>( errmsg );
  164. 				errargs.RemoveAt( 0 );
  165. 				Console.Error.WriteLine( );
  166. 				Console.ForegroundColor = ConsoleColor.Red;
  167. 				Console.Error.Write( "ERROR:\t" );
  168. 				Console.ForegroundColor = ConsoleColor.White;
  169. 				Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) );
  170. 				Console.ResetColor( );
  171.  
  172. 			}
  173.  
  174. 			Console.Error.WriteLine( );
  175.  
  176. 			Console.Error.WriteLine( "WGetIE.exe,  Version {0}", progver );
  177.  
  178. 			Console.Error.WriteLine( "Save a web page to a file, using Internet Explorer" );
  179.  
  180. 			Console.Error.WriteLine( );
  181.  
  182. 			Console.Error.Write( "Usage:    " );
  183. 			Console.ForegroundColor = ConsoleColor.White;
  184. 			Console.Error.WriteLine( "WGetIE.exe  url  file  [ seconds  [ width ] ]" );
  185. 			Console.ResetColor( );
  186.  
  187. 			Console.Error.WriteLine( );
  188.  
  189. 			Console.Error.Write( "Where:    " );
  190. 			Console.ForegroundColor = ConsoleColor.White;
  191. 			Console.Error.Write( "url" );
  192. 			Console.ResetColor( );
  193. 			Console.Error.WriteLine( "       is URL of the page to save" );
  194.  
  195. 			Console.ForegroundColor = ConsoleColor.White;
  196. 			Console.Error.Write( "          file" );
  197. 			Console.ResetColor( );
  198. 			Console.Error.WriteLine( "      is the output file" );
  199.  
  200. 			Console.ForegroundColor = ConsoleColor.White;
  201. 			Console.Error.Write( "          seconds" );
  202. 			Console.ResetColor( );
  203. 			Console.Error.WriteLine( "   is the timeout in seconds" );
  204.  
  205. 			Console.Error.WriteLine( "                    (minimum: 5; default: 5)" );
  206.  
  207. 			Console.ForegroundColor = ConsoleColor.White;
  208. 			Console.Error.Write( "          width" );
  209. 			Console.ResetColor( );
  210. 			Console.Error.WriteLine( "     is the virtual browser window width" );
  211.  
  212. 			Console.Error.WriteLine( "                    (minimum: 240; default: 1,000,000)" );
  213.  
  214. 			Console.Error.WriteLine( );
  215.  
  216. 			Console.Error.WriteLine( "Written by Rob van der Woude" );
  217.  
  218. 			Console.Error.WriteLine( "http://www.robvanderwoude.com" );
  219.  
  220. 			return 1;
  221. 		}
  222. 	}
  223. }
  224.  

page last modified: 2024-04-16; loaded in 0.0071 seconds