@ECHO OFF :: Check command line IF "%~1"=="" GOTO Syntax IF NOT "%~3"=="" GOTO Syntax ECHO.%* | FIND "?" >NUL && GOTO Syntax IF /I NOT "%~x1"==".pages" GOTO Syntax IF NOT EXIST "%~1" ( ECHO File not found: "%~1" GOTO Syntax ) :: Make %TEMP% the working directory PUSHD "%TEMP%" :: Check if files already exist IF EXIST preview.jpg ( ECHO File preview.jpg already exists. CHOICE.EXE /D N /T 10 /M "Do you want to delete it?" IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 ( ECHO  DEL preview.jpg ) ELSE ( ECHO Please move or rename preview.jpg and try again. POPD EXIT /B 1 ) ) IF EXIST "%~dpn1.txt" ( ECHO File "%~n1.txt" already exists. CHOICE.EXE /D N /T 10 /M "Do you want to delete it?" IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 ( ECHO  DEL "%~dpn1.txt" ) ELSE ( ECHO Please move or rename "%~n1.txt" and try again. POPD EXIT /B 1 ) ) :: Extract preview.jpg from .pages file FOR /F "tokens=*" %%A IN ('DIR /AD /B "%ProgramFiles%\7*"') DO ( FOR /F "tokens=*" %%B IN ('DIR /B /S "%ProgramFiles%\%%~A\7z.exe"') DO ( "%%~B" e "%~f1" preview.jpg ) ) IF NOT EXIST preview.jpg ( ECHO This batch file requires 7zip, available at ECHO https://7-zip.org/ CHOICE /D N /T 10 /M "Do you want to download it?" IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 ( START "" https://7-zip.org/ ) ECHO  POPD EXIT /B 1 ) :: Perform OCR on extracted preview.jpg and save it with same name as specified input file and .txt extension FOR /F "tokens=*" %%A IN ('DIR /AD /B "%ProgramFiles%\tesseract*"') DO ( REM Check if language code is specified, and if it is valid IF NOT "%~2"=="" ( IF NOT EXIST "%ProgramFiles%\%%~A\tessdata\%~2.*data*" ( ECHO Unsupported Tesseract language code: "%~2" FOR /F %%B IN ('DIR /B "%ProgramFiles%\%%~A\tessdata\???.*data*" ^| FIND.EXE /C "data"') DO ( IF %%B GTR 1 ( ECHO Use one of the following language codes: FOR %%C IN ("%ProgramFiles%\%%~A\tessdata\???.*data*") DO ( IF /I NOT "%%~nC"=="osd" ( SET /P "=%%~nC, " < NUL ) ) SET /P "=or omit the language code to use the default (eng)" < NUL ) ) ECHO  POPD EXIT /B 1 ) ) FOR /F "tokens=*" %%B IN ('DIR /B /S "%ProgramFiles%\%%~A\tesseract.exe"') DO ( IF "%~2"=="" ( "%%~B" preview.jpg "%~dpn1" -l eng ) ELSE ( "%%~B" preview.jpg "%~dpn1" -l %~2 ) ) ) IF NOT EXIST "%~dpn1.txt" ( ECHO This batch file requires Tesseract OCR, available at ECHO https://github.com/UB-Mannheim/tesseract/wiki CHOICE /D N /T 10 /M "Do you want to download it?" IF ERRORLEVEL 1 IF NOT ERRORLEVEL 2 ( START "" https://github.com/UB-Mannheim/tesseract/wiki ) ECHO  POPD EXIT /B 1 ) ECHO Extracted text successfully saved as "%~dpn1.txt" :: Delete temporary file DEL preview.jpg :: Open extracted text in Word, if available IF EXIST "%ProgramFiles%\Microsoft Office\" ( FOR /F "tokens=*" %%A IN ('DIR /B /S "%ProgramFiles%\Microsoft Office\winword.exe"') DO ( START "" "%%~A" /t "%~dpn1.txt" ) ) :: Restore working directory POPD :: Done EXIT /B 0 :Syntax ECHO. ECHO %~nx0, Version 1.00 ECHO Use OCR to extract text from a *.pages document. ECHO. ECHO Usage: %~nx0 file.pages [ languagecode ] ECHO. ECHO Where: file.pages *.pages file from which text is to be extracted ECHO languagecode optional Tesseract 3 letter language code (default: eng) ECHO. ECHO Notes: This program requires 7-zip as well as Tesseract OCR. ECHO The extracted text will be saved as plain text in the .pages file's ECHO parent folder, using the specified file's name, and .txt extension. ECHO If the specified file name contains multiple dots, the output file ECHO name will be truncated at the first dot. If the output file already ECHO exists, you will be prompted to delete it or abort. ECHO If an invalid language code is specified, the batch file will abort ECHO after showing a list of available language codes. ECHO A temporary file preview.jpg will be created. If it already exists, ECHO you will be prompted to delete it or abort. ECHO If MS Word is available, the extracted text will be opened in Word. ECHO The batch file's return code ("Errorlevel") will equal 0 if the ECHO specified file was successfully converted, otherwise it will equal 1. ECHO. ECHO Written by Rob van der Woude ECHO https://www.robvanderwoude.com EXIT /B 1