(view source code of sitemap.cs as plain text)
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text.RegularExpressions;
namespace RobvanderWoude
{
class SiteMap
{
static string progver = "1.03";
static string phpexe = "php.exe";
static int Main( string[] args )
{
bool usefilefilter = false;
bool usephp = false;
bool userobots = true;
bool userooturl = false;
bool usewhatsnew = false;
bool useworkingdir = false;
bool verbose = true;
List<string> phpfiles = new List<string>( );
string progfile = Assembly.GetEntryAssembly( ).Location;
string progdir = Directory.GetParent( progfile ).Name;
string filefilter = "*.html *.php";
string phpfilter = "*.php";
string startdir = Directory.GetCurrentDirectory( ); // Program will return to this directory when done
string workingdir = startdir; // Default working directory is the current directory
string rooturl = String.Empty;
string whatsnew = "whatsnew.*";
if ( args.Length == 0 || args.Length > 7 )
{
return ShowHelp( );
}
foreach ( string arg in args )
{
if ( arg == "/?" || arg.Length < 2 )
{
return ShowHelp( );
}
if ( arg.Substring( 0, 2 ).ToUpper( ) == "/I" )
{
if ( !userobots )
{
return ShowHelp( "Duplicate command line switch /I" );
}
userobots = false;
}
else if ( arg.Substring( 0, 2 ).ToUpper( ) == "/P" )
{
if ( usephp )
{
return ShowHelp( "Duplicate command line switch /P" );
}
usephp = true;
if ( arg.Length > 3 && arg[2] == ':' )
{
phpfilter = arg.Substring( 3 );
}
else
{
phpfilter = "*.php";
}
}
else if ( arg.ToUpper( ) == "/Q" )
{
if ( !verbose )
{
return ShowHelp( "Duplicate command line switch /Q" );
}
verbose = false;
}
else if ( arg.Substring( 0, 2 ).ToUpper( ) == "/W" )
{
if ( usewhatsnew )
{
return ShowHelp( "Duplicate command line switch /W" );
}
usewhatsnew = true;
if ( arg.Length > 3 && arg[2] == ':' )
{
whatsnew = arg.Substring( 3 );
}
if ( Directory.GetFiles( workingdir, whatsnew ).Length > 0 )
{
whatsnew = Path.GetFileNameWithoutExtension( whatsnew );
}
else
{
return ShowHelp( "WhatsNew file not found: \"{0}\"", whatsnew );
}
}
else if ( arg.IndexOf( '*' ) == 0 )
{
if ( usefilefilter )
{
return ShowHelp( "Duplicate file filters: \"{0}\" and \"{1}\"", filefilter, arg );
}
filefilter = arg;
usefilefilter = true;
}
else
{
if ( Directory.Exists( arg ) )
{
if ( useworkingdir )
{
return ShowHelp( "Duplicate working directories: \"{0}\" and \"{1}\"", workingdir, arg );
}
workingdir = arg;
useworkingdir = true;
}
else if ( arg.IndexOf( "http://" ) == 0 || arg.IndexOf( "https://" ) == 0 )
{
if ( userooturl )
{
return ShowHelp( "Duplicate domain prefixes: \"{0}\" and \"{1}\"", rooturl, arg );
}
rooturl = arg;
userooturl = true;
}
else
{
if ( arg.IndexOf( ":\\" ) > -1 )
{
return ShowHelp( "Invalid working directory: \"{0}\"", arg );
}
else
{
return ShowHelp( "Invalid command line argument: \"{0}\"", arg );
}
}
}
}
// Domain prefix is a mandatory command line argument
if ( String.IsNullOrEmpty( rooturl ) )
{
return ShowHelp( "Please specify a domain prefix" );
}
// Go to the specified working directory (required for PHP includes)
Directory.SetCurrentDirectory( workingdir );
string excludefile = Path.Combine( workingdir, "sitemap.exclude" );
string robotsfile = Path.Combine( workingdir, "robots.txt" );
string sitemapfile = Path.Combine( workingdir, "sitemap.xml" );
// Find the location of PHP.EXE in case /P switch is used
if ( usephp )
{
if ( File.Exists( Path.Combine( workingdir, "php.exe" ) ) )
{
phpexe = Path.Combine( workingdir, "php.exe" );
}
else
{
foreach ( string folder in Environment.ExpandEnvironmentVariables( "%PATH%" ).Split( ";".ToCharArray( ) ) )
{
if ( phpexe == "php.exe" && File.Exists( Path.Combine( folder, "php.exe" ) ) )
{
phpexe = Path.Combine( folder, "php.exe" );
}
}
if ( phpexe == "php.exe" )
{
return ShowHelp( "PHP.EXE not found in %PATH%" );
}
}
}
// List all files matching filespec
Dictionary<string, string> allfiles = new Dictionary<string, string>( );
foreach ( string file in Directory.GetFiles( workingdir, filefilter ) )
{
allfiles.Add( Path.GetFileName( file ), String.Empty );
}
if ( allfiles.Count == 0 )
{
return ShowHelp( "No matching files found for \"{0}\"", filefilter );
}
if ( usephp )
{
phpfiles = Directory.GetFiles( workingdir, phpfilter ).ToList<string>( );
}
// List all files to be excluded
List<string> excludedfiles = new List<string>( );
if ( File.Exists( excludefile ) )
{
foreach ( string line in File.ReadLines( excludefile ).ToList<string>( ) )
{
foreach ( string file in Directory.GetFiles( workingdir, line ) )
{
excludedfiles.Add( Path.GetFileName( file ) );
}
}
}
if ( userobots && File.Exists( robotsfile ) )
{
string pattern = @"^\s*Disallow\s*:\s*/([^\n\r]+[^\n\r/])$";
Regex regex = new Regex( pattern, RegexOptions.IgnoreCase );
foreach ( string line in File.ReadLines( robotsfile ).ToList<string>( ) )
{
if ( regex.IsMatch( line ) )
{
Match match = regex.Match( line );
string filespec = match.Groups[1].Captures[0].ToString( ).Replace( '/', '\\' );
if ( !Directory.Exists( Path.Combine( workingdir, filespec ) ) )
{
try
{
foreach ( string file in Directory.GetFiles( workingdir, filespec ) )
{
if ( !excludedfiles.Contains( Path.GetFileName( file ) ) )
{
excludedfiles.Add( Path.GetFileName( file ) );
}
}
}
catch
{
}
}
}
}
}
// Determine lastmod for each file in list
string[] allfilenames = allfiles.Keys.ToArray<string>( );
foreach ( string file in allfilenames )
{
string filename = Path.GetFileName( file );
if ( excludedfiles.Contains( filename ) )
{
// Remove files to be excluded from files list
allfiles.Remove( filename );
}
else
{
string fullpath = Path.Combine( workingdir, file );
if ( usephp && phpfiles.Contains( fullpath ) )
{
// Use PHP to generate content, then extract lastmod from generated content
allfiles[filename] = PHPRender( fullpath );
}
else
{
// Determine file's last modified date
int year = File.GetLastWriteTime( fullpath ).Date.Year;
int month = File.GetLastWriteTime( fullpath ).Date.Month;
int day = File.GetLastWriteTime( fullpath ).Date.Day;
string lastmod = String.Format( "{0:0000}-{1:00}-{2:00}", year, month, day );
allfiles[filename] = lastmod;
}
}
}
// Quick and dirty: write list to XML
string xml = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n";
foreach ( string file in allfiles.Keys )
{
string filename = file;
string lastmod = allfiles[file];
if ( usewhatsnew && Path.GetFileNameWithoutExtension( file ) == whatsnew )
{
// whatsnew.* gets the timestamp of the last modified file
lastmod = allfiles.Values.Max<string>( );
}
if ( verbose )
{
Console.WriteLine( "{0}\t{1}", lastmod, file );
}
if ( Path.GetFileNameWithoutExtension( file ) == "index" )
{
filename = String.Empty;
}
xml += String.Format( " <url>\n <loc>{0}{1}</loc>\n <lastmod>{2}</lastmod>\n </url>\n", rooturl, filename, lastmod );
}
xml += "</urlset>";
// Write XML to sitemap file
File.WriteAllText( sitemapfile, xml );
// Go back to the original starting directory
Directory.SetCurrentDirectory( startdir );
if ( verbose )
{
Console.WriteLine( "\nHandled {0} files", allfiles.Count );
}
return 0;
}
static string PHPRender( string file )
{
DateTime filetime = File.GetLastWriteTime( file );
string lastmod = String.Format( "{0:0000}-{1:00}-{2:00}", filetime.Year, filetime.Month, filetime.Day );
string phptext = String.Empty;
// Use PHP to render content
ProcessStartInfo phpproc = new ProcessStartInfo( );
phpproc.UseShellExecute = false;
phpproc.CreateNoWindow = true;
phpproc.RedirectStandardOutput = true;
phpproc.FileName = phpexe;
phpproc.Arguments = "-f \"" + file + "\"";
using ( Process process = Process.Start( phpproc ) )
{
using ( StreamReader reader = process.StandardOutput )
{
phptext = reader.ReadToEnd( );
}
}
// Extract last modified date from rendered content
Regex regex = new Regex( @"[12]\d\d\d-[01]\d-[0-3]\d" );
if ( regex.IsMatch( phptext ) )
{
foreach ( Match match in regex.Matches( phptext ) )
{
if ( String.Compare( match.ToString( ), lastmod ) > 0 )
{
lastmod = match.ToString( );
}
}
}
return lastmod;
}
static int ShowHelp( params string[] errmsg )
{
/*
SiteMap, Version 1.01
Create a Google sitemap for your website source directory
Usage: SITEMAP.EXE domain [ workingdir ] [ filespec ] [ options ]
Where: "domain" the domain prefix to be added, including protocol and
trailing forward slash, e.g. "http://www.example.com/"
"workingdir" the source files' location (default: current directory)
"filespec" the source file filter (default: "*.html *.php")
Options: /I Ignore "robots.txt" (see Notes below)
/P[:filter] use PHP to generate file content for files matching
"filter", then search the generated content for the
latest date in yyyy-mm-dd format
/Q Quiet mode: do not display matching file names
/W[:file] specify a "What's new" file which will be listed
with the timestamp of the last modified file
(default file name: "whatsnew.*")
Notes: To use the /P switch, PHP.EXE must be found in the PATH.
If no "filter" is specified with the /P switch, the *.php part of
"filespec" will be used (if "filespec" isn't specified either,
its default value "*.php" is used).
The program looks for a list of excluded files in an optional
file named "sitemap.exclude", and for "disallowed" files in
"robots.txt", both located in the working directory. Use /I
to completely ignore "robots.txt".
Written by Rob van der Woude
http://www.robvanderwoude.com
*/
if ( errmsg.Length > 0 )
{
List<string> errargs = new List<string>( errmsg );
errargs.RemoveAt( 0 );
Console.Error.WriteLine( );
Console.ForegroundColor = ConsoleColor.Red;
Console.Error.Write( "ERROR:\t" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) );
Console.ResetColor( );
}
Console.Error.WriteLine( );
Console.Error.WriteLine( "SiteMap, Version {0}", progver );
Console.Error.WriteLine( "Create a Google sitemap for your website source directory" );
Console.Error.WriteLine( );
Console.Error.Write( "Usage: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.WriteLine( "SITEMAP.EXE domain [ workingdir ] [ filespec ] [ options ]" );
Console.ResetColor( );
Console.Error.WriteLine( );
Console.Error.Write( "Where: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "\"domain\"" );
Console.ResetColor( );
Console.Error.WriteLine( " the domain prefix to be added, including protocol and" );
Console.Error.WriteLine( " trailing forward slash, e.g. \"http://www.example.com/\"" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " \"workingdir\"" );
Console.ResetColor( );
Console.Error.WriteLine( " the source files' location (default: current directory)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " \"filespec\"" );
Console.ResetColor( );
Console.Error.WriteLine( " the source file filter (default: \"*.html *.php\")" );
Console.Error.Write( "Options: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "/I I" );
Console.ResetColor( );
Console.Error.WriteLine( "gnore \"robots.txt\" (see Notes below)" );
Console.Error.Write( " /P[:filter]" );
Console.ResetColor( );
Console.Error.Write( " use " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "P" );
Console.ResetColor( );
Console.Error.WriteLine( "HP to generate file content for files matching " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " \"filter\"" );
Console.ResetColor( );
Console.Error.WriteLine( ", then search the generated content for the" );
Console.Error.WriteLine( " latest date in yyyy-mm-dd format" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /Q Q" );
Console.ResetColor( );
Console.Error.WriteLine( "uiet mode: do not display matching file names" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /W[:file]" );
Console.ResetColor( );
Console.Error.Write( " specify a \"" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "W" );
Console.ResetColor( );
Console.Error.Write( "hat's new\" " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "file" );
Console.ResetColor( );
Console.Error.WriteLine( " which will be listed" );
Console.Error.WriteLine( " with the timestamp of the last modified file" );
Console.Error.Write( " (default " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "file" );
Console.ResetColor( );
Console.Error.WriteLine( " name: \"whatsnew.*\")" );
Console.Error.WriteLine( );
Console.Error.Write( "Notes: To use the " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "/P" );
Console.ResetColor( );
Console.Error.WriteLine( " switch, PHP.EXE must be found in the PATH." );
Console.Error.Write( " If no " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "\"filter\"" );
Console.ResetColor( );
Console.Error.Write( " is specified with the " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "/P" );
Console.ResetColor( );
Console.Error.WriteLine( " switch, the *.php part of " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " \"filespec\"" );
Console.ResetColor( );
Console.Error.Write( " will be used (if " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "\"filespec\"" );
Console.ResetColor( );
Console.Error.WriteLine( " isn't specified either," );
Console.Error.WriteLine( " its default value \"*.php\" is used)." );
Console.Error.WriteLine( " The program looks for a list of excluded files in an optional" );
Console.Error.WriteLine( " file named \"sitemap.exclude\", and for \"disallowed\" files in" );
Console.Error.WriteLine( " \"robots.txt\", both located in the working directory. Use /I" );
Console.Error.WriteLine( " to completely ignore \"robots.txt\"." );
Console.Error.WriteLine( );
Console.Error.WriteLine( "Written by Rob van der Woude" );
Console.Error.WriteLine( "http://www.robvanderwoude.com" );
return 1;
}
}
}
page last modified: 2024-04-16; loaded in 0.0175 seconds