(view source code of splittextfile.cs as plain text)
# define DEBUG
#undef DEBUG
using System;
using System.IO;
using System.Text;
namespace RobvanderWoude
{
class SplitTextFile
{
static int Main( string[] args )
{
#region Command Line Parsing
bool linebreak = false;
int maxfiles = 0;
Encoding enc = null;
if ( args.Length < 2 || args[0] == "/?" )
{
return WriteError( );
}
if ( !File.Exists( args[0] ) )
{
return WriteError( "File not found" );
}
string bigfile = args[0];
string chunk = args[1].ToUpper( );
int filesize = Convert.ToInt32( ( new FileInfo( bigfile ) ).Length );
int chunksize = 0;
try
{
for ( int i = 2; i < args.Length; i++ )
{
if ( args[i].ToUpper( ) == "/BREAK" )
{
linebreak = true;
}
else if ( args[i].ToUpper( ).Substring( 0, 7 ) == "/COUNT:" )
{
maxfiles = Convert.ToInt32( args[i].Substring( 7 ) );
}
else if ( args[i].ToUpper( ).Substring( 0, 5 ) == "/ENC:" )
{
switch ( args[i].ToUpper( ).Substring( 5 ) )
{
case "ANSI":
case "ASCII":
enc = Encoding.ASCII;
break;
case "UNICODE":
enc = Encoding.Unicode;
break;
case "UNICODEBE":
case "UNICODE-BE":
enc = Encoding.BigEndianUnicode;
break;
case "UTF7":
case "UTF-7":
enc = Encoding.UTF7;
break;
case "UTF8":
case "UTF-8":
enc = Encoding.UTF8;
break;
case "UTF32":
case "UTF-32":
enc = Encoding.UTF32;
break;
default:
return WriteError( "Invalid encoding" );
}
}
else
{
return WriteError( "Invalid command line argument(s)" );
}
}
if ( chunk.IndexOf( "KB" ) > -1 )
{
chunk = chunk.Substring( 0, chunk.Length - 2 );
chunksize = Convert.ToInt32( chunk ) * 1024;
}
else if ( chunk.IndexOf( "MB" ) > -1 )
{
chunk = chunk.Substring( 0, chunk.Length - 2 );
chunksize = Convert.ToInt32( chunk ) * 1024 * 1024;
}
else
{
chunksize = Convert.ToInt32( chunk );
}
// Try to get proper encoding of bigfile
if ( enc == null )
{
enc = GetEncoding( bigfile );
}
}
catch ( FormatException )
{
return WriteError( "Invalid chunk size" );
}
#if DEBUG
Console.WriteLine( );
Console.WriteLine( "File name : {0}", bigfile );
Console.WriteLine( "Chunk size : {0} ({1} Bytes)", args[1], chunksize );
Console.WriteLine( "Break at line end : {0}", linebreak );
Console.WriteLine( "File encoding : {0}", enc.BodyName );
Console.WriteLine( "Maximum # chunks : {0}", maxfiles );
Console.ReadKey( );
#endif
#endregion Command Line Parsing
try
{
using ( FileStream fsi = File.Open( bigfile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite ) )
using ( BufferedStream bsi = new BufferedStream( fsi ) )
using ( StreamReader sri = new StreamReader( bsi, enc ) )
{
int index = 0;
char[] buffer = new char[chunksize];
string chunkname = Directory.GetCurrentDirectory( ) + "\\" + Path.GetFileNameWithoutExtension( bigfile );
string chunkext = Path.GetExtension( bigfile );
int count = 0;
while ( sri.Read( buffer, 0, chunksize ) > 0 )
{
if ( maxfiles == 0 || count < maxfiles )
{
count += 1;
string chunkout = chunkname + "." + count + chunkext;
int length = Math.Max( 0, Math.Min( chunksize, filesize - index ) );
using ( FileStream fso = File.Open( chunkout, FileMode.Create, FileAccess.ReadWrite, FileShare.Read ) )
using ( BufferedStream bso = new BufferedStream( fso ) )
using ( StreamWriter swo = new StreamWriter( bso, enc ) )
{
swo.Write( buffer, 0, length );
if ( linebreak )
{
swo.WriteLine( sri.ReadLine( ) );
}
}
index = Math.Min( index + chunksize, filesize );
}
}
}
return 0;
}
catch ( Exception e )
{
return WriteError( e.Message );
}
}
/// <summary>
/// Determines a text file's encoding by analyzing its byte order mark (BOM).
/// Defaults to ASCII when detection of the text file's endianness fails.
/// </summary>
/// <param name="filename">The text file to analyze.</param>
/// <returns>The detected encoding.</returns>
public static Encoding GetEncoding( string filename )
{
// Code found on http://stackoverflow.com/a/19283954
// Read the BOM
var bom = new byte[4];
using ( var file = new FileStream( filename, FileMode.Open ) ) file.Read( bom, 0, 4 );
// Analyze the BOM
if ( bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76 ) return Encoding.UTF7;
if ( bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf ) return Encoding.UTF8;
if ( bom[0] == 0xff && bom[1] == 0xfe ) return Encoding.Unicode; //UTF-16LE
if ( bom[0] == 0xfe && bom[1] == 0xff ) return Encoding.BigEndianUnicode; //UTF-16BE
if ( bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff ) return Encoding.UTF32;
return Encoding.Default;
}
#region Error Handling
public static int WriteError( Exception e = null )
{
return WriteError( e == null ? null : e.Message );
}
public static int WriteError( string errorMessage )
{
if ( string.IsNullOrEmpty( errorMessage ) == false )
{
Console.Error.WriteLine( );
Console.ForegroundColor = ConsoleColor.Red;
Console.Error.Write( "ERROR: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.WriteLine( errorMessage );
Console.ResetColor( );
}
/*
SplitTextFile, Version 0.50 beta
Split really big files in manageable chunks
Usage: SPLITTEXTFILE bigfilename chunksize [ options ]
Where: bigfilename is the file to be split up
chunksize is the size of the split off chunks
(e.g. 2048 or 2KB or 64MB)
Options: /BREAK split at line break (slightly increases chunk size)
/COUNT:nnn limit chunk count to first nnn files
/ENC:encoding force encoding (ASCII, UTF-7, UTF-8, UTF-32,
Unicode or UnicodeBE)
Note: Output chunks will be located in the current directory and have the
same name and extension as the (big) input file, with an added index
number between the file name and extension (e.g. bigfilename.1.txt).
Written by Rob van der Woude
http://www.robvanderwoude.com
*/
string fullpath = Environment.GetCommandLineArgs( ).GetValue( 0 ).ToString( );
string[] program = fullpath.Split( '\\' );
string exeName = program[program.GetUpperBound( 0 )];
exeName = exeName.Substring( 0, exeName.IndexOf( '.' ) );
Console.Error.WriteLine( );
Console.Error.WriteLine( "{0}, Version 0.50 beta", exeName );
Console.Error.WriteLine( "Split really big files in manageable chunks" );
Console.Error.WriteLine( );
Console.Error.Write( "Usage: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.WriteLine( "{0} bigfilename chunksize [ options ]", exeName.ToUpper( ) );
Console.ResetColor( );
Console.Error.WriteLine( );
Console.Error.Write( "Where: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "bigfilename" );
Console.ResetColor( );
Console.Error.WriteLine( " is the file to be split up" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " chunksize" );
Console.ResetColor( );
Console.Error.WriteLine( " is the size of the split off chunks" );
Console.Error.WriteLine( " (e.g. 2048 or 2KB or 64MB)" );
Console.Error.WriteLine( );
Console.Error.Write( "Options: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "/BREAK" );
Console.ResetColor( );
Console.Error.Write( " split at line " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "break" );
Console.ResetColor( );
Console.Error.WriteLine( " (slightly increases chunk size)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /COUNT:nnn" );
Console.ResetColor( );
Console.Error.Write( " limit chunk " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "count" );
Console.ResetColor( );
Console.Error.Write( " to first " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "nnn" );
Console.ResetColor( );
Console.Error.WriteLine( " files" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /ENC:encoding" );
Console.ResetColor( );
Console.Error.Write( " force " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "encoding" );
Console.ResetColor( );
Console.Error.WriteLine( " (ASCII, UTF-7, UTF-8, UTF-32," );
Console.Error.WriteLine( " Unicode or UnicodeBE)" );
Console.Error.WriteLine( );
Console.Error.WriteLine( "Note: Output chunks will be located in the current directory and have the" );
Console.Error.WriteLine( " same name and extension as the (big) input file, with an added index" );
Console.Error.WriteLine( " number between the file name and extension (e.g. bigfilename.1.txt)." );
Console.Error.WriteLine( );
Console.Error.WriteLine( "Written by Rob van der Woude" );
Console.Error.Write( "http://www.robvanderwoude.com" );
Console.OpenStandardOutput( );
return 1;
}
#endregion Error Handling
}
}
page last modified: 2024-04-16; loaded in 0.0116 seconds