diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..333276e --- /dev/null +++ b/.gitignore @@ -0,0 +1,176 @@ +# reference, old shit +1_reference/ + +# macOS Junks +.DS_Store + +# VSCode Junks +.vscode +.vscode/* + +# Microsoft Office Junks +~$*.* + +# C/C++ Junks +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Python Junks +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +share/ +bin/ +bin/* +include/ +include/* + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don’t work, or not +# install all needed dependencies. +Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/README.md b/README.md index d24214d..a9841b4 100644 --- a/README.md +++ b/README.md @@ -44,13 +44,14 @@ PDFMtEd Editor is an easy-to-use graphical metadata editor that supports viewing **Screenshot** -![screenshot of PDFMtEd Editor](https://github.com/Glutanimate/PDFMtEd/blob/master/screenshots/pdfmted-editor.png) +![screenshot of PDFMtEd Editor](https://github.com/uriel1998/PDFMtEd/blob/7f96c7170e6fc66992635cb1bd94ba824ffa790d/screenshots/pdfmted-editor.png) **Features:** - easily process multiple files and folders right from your file manager -- preview your documents in your default PDF viewer and file manager before editing them +- preview your documents in your default PDF viewer before editing them - rename files based on metadata +- perform OCR on files by calling ocrmypdf ### PDFMtEd Inspector @@ -88,19 +89,22 @@ The following instructions are provided for most Linux distributions. *Optional but recommended*: [sejda-console](https://github.com/torakiki/sejda/releases) -#### Ubuntu 16.04 or lower +#### Ubuntu -Install all dependencies on Ubuntu with the following commands: - - sudo add-apt-repository ppa:webupd8team/y-ppa-manager - sudo apt-get update - sudo apt-get install yad libimage-exiftool-perl qpdf +Install all required dependencies on Ubuntu with the following commands: +```sudo add-apt-repository ppa:webupd8team/y-ppa-manager +sudo apt-get update +sudo apt-get install yad libimage-exiftool-perl qpdf +``` + #### Most other Linux distribution based on Debian or Ubuntu sudo apt install libimage-exiftool-perl yad qpdf +If you do not have Gnome installed, you may need `gir1.2-gnomedesktop-3.0` and `gir1.2-gnomedesktop-4.0` for thumbnail generation. + #### Fedora and most other Linux distribution based on Fedora sudo dnf install perl-Image-ExifTool yad qpdf @@ -120,7 +124,17 @@ Install all dependencies on Ubuntu with the following commands: **Recommended:** -- [Sejda](http://www.sejda.org/): extensible and configurable PDF manipulation layer library; used to restore broken PDF files; more reliable than qpdf in this, but unfortunately not available in any repository; please use the [debfile release](ttps://github.com/torakiki/sejda/releases) to install `sejda-console` +- [Sejda](http://www.sejda.org/): extensible and configurable PDF manipulation layer library; used to restore broken PDF files; more reliable than qpdf in this, but unfortunately not available in any repository; please use the [debfile release](https://github.com/torakiki/sejda/releases) to install `sejda-console` + +These three are available in Debian, Ubuntu, and distros based on them with + + sudo apt install ocrmypdf poppler-utils imagemagick + +- [ocrmypdf](https://github.com/ocrmypdf/OCRmyPDF): Adds OCR text layer to scanned PDF files; used to OCR +- [poppler-utils](https://poppler.freedesktop.org/): Precompiled command-line utilities (based on Poppler) for manipulating PDF files and converting them to other formats. Used for detecting text layer in PDFs and generating larger thumbnail. +- [imagemagick](https://imagemagick.org/index.php): Image manipulation, used to resize and convert larger thumbnail. + + ### Installation @@ -128,17 +142,13 @@ Install all dependencies on Ubuntu with the following commands: 2. Clone this repository or download the latest zipfile and extract it -3. `cd` to the cloned/extracted directory, e.g.: +3. `cd` to the cloned/extracted directory, e.g.: `cd PDFMtEd` - cd PDFMtEd - -4. Run the provided installer: - - sudo ./install.sh +4. Run the provided installer: `sudo ./install.sh` (If your file manager supports custom scripts you can just copy the `PDFMtEd` folder to the default scripts path instead ([instructions for Nautilus](http://askubuntu.com/a/236415))) -After the installation PDFMtEd Editor and Inspector should be available as entries in the *Open with* context menu: + After the installation PDFMtEd Editor and Inspector should be available as entries in the *Open with* context menu: ![file manager context menu with PDFMtEd entries](https://github.com/Glutanimate/PDFMtEd/blob/master/screenshots/pdfmted_usage.png) @@ -148,21 +158,26 @@ If you can't find the entries click on *Other application* (arrow) and navigate Follow these steps to uninstall the PDFMted utilities: -1. `cd` to the project directory, e.g.: +1. `cd` to the project directory, e.g.: `cd PDFMtEd` - cd PDFMtEd +2. Run `uninstall.sh` as root: `sudo ./uninstall.sh` -2. Run `uninstall.sh` as root: +## Usage - sudo ./uninstall.sh +### General usage +#### From the File Manager -## Usage +* Use your file manager to select one or more files/directories +* Choose the utility you want to launch from the *Open with* context menu + +![file manager context menu with PDFMtEd entries](https://github.com/Glutanimate/PDFMtEd/blob/master/screenshots/pdfmted_usage.png) -### General usage +#### From the command line + +`pdfmted-editor [file1] [file2] [file3]` -1. Use your file manager to select one or more files/directories -2. Choose the utility you want to launch from the *Open with* context menu +This can be generated on the fly, e.g. `pdfmted-editor $(ls *.pdf)` ### PDFMtEd Editor diff --git a/pdfmted-editor b/pdfmted-editor index e577220..449d8b9 100755 --- a/pdfmted-editor +++ b/pdfmted-editor @@ -37,6 +37,25 @@ TMPDIR="${XDG_RUNTIME_DIR:+$XDG_RUNTIME_DIR/PDFMtEd}" ; [ -z "$TMPDIR" ] && echo TMPDIR="${TMPDIR:-$XDG_CACHE_HOME/PDFMtEd}" TMPDIR="${TMPDIR:-$HOME/.cache/PDFMtEd}" + +# setting this out here to clean up at the end - Steven Saus +TMPIMG1="${TMPDIR}/lg_preview.png" +LARGEPREVIEW=TRUE +OCRPOSSIBLE=TRUE + +# evaluating here whether to turn on/off larger previews if poppler-utils and +# imagemagick do not exist. Not included in checkdeps, because it's an +# optional enhancement, not a dependency. +if [ ! -f $(which pdftocairo) ]; then + LARGEPREVIEW=FALSE +fi +if [ ! -f $(which convert) ];then + LARGEPREVIEW=FALSE +fi +if [ ! -f $(which pdffonts) ];then + OCRPOSSIBLE=FALSE +fi + ############### SETTINGS ################# # Supply password for protected PDF files @@ -68,7 +87,7 @@ Available options: YAD_TITLE="PDFMtEd – Editor" WMCLASS="pdfmtededitor" ICON="application-pdf" -WIDTH="800" +WIDTH="1000" HEIGHT="650" PROGWIDTH="400" PROGHEIGHT="100" @@ -109,6 +128,7 @@ sejda (recommended) or linearizing it with qpdf helps in these cases. Want to pr TEXT_NOTI_SUCCE1="Metadata succesfully updated." TEXT_NOTI_ERROR1="Post-processing failed." + ############## FUNCTIONS ################# # dependencies @@ -356,8 +376,7 @@ gui_metadata_entry_multi(){ --field="":LBL "" \ --field="$TEXT_BTTN_PDFVWR!application-pdf!$TEXT_HOVR_PDFVWR:FBTN" \ "bash -c \"gui_open '$FILE_ESCAPED'\"" \ - --field="$TEXT_BTTN_SHWFLD!folder!$TEXT_HOVR_SHWFLD:FBTN" \ - "bash -c \"gui_open '$DIRNAME_ESCAPED'\"" \ + --field="OCR on exit? Current OCR Status: $PDF_IS_OCR ":CHK "$OCR_OPT" \ --field="":LBL "" \ --field="$TEXT_CHCK_SAVECH":CHK "TRUE" \ --field="$TEXT_CHCK_RENMFL":CHK "$RENAME_OPT" \ @@ -424,18 +443,52 @@ gui_thumbnail_generate() { fi } +find_thumbnail(){ + full_path="file://$(realpath -s "$1")" + md5name=$(printf %s "${full_path// /%20}" | md5sum) + # could also be ~/.thumbnails + find ~/.cache/thumbnails/ -name "${md5name%% *}.png" +} + # assign thumbnail gui_thumbnail_assign(){ - PREVIEWIMG="$(python3 -c "import gio; import sys; \ + # Replaced finding thumbnail image with a bash-specific one from + # https://askubuntu.com/questions/1084640/where-are-the-thumbnails-of-a-new-image-located + # because the python version is throwing an error for me - Steven Saus + + full_path="file://$(realpath -s "$FILE")" + md5name=$(printf %s "${full_path// /%20}" | md5sum) + # could be ~/.thumbnails or ~/.cache/thumbnails; failing those, fallback to the original. + if [ -d ~/.thumbnails/ ];then + PREVIEWIMG=$(find ~/.thumbnails/ -name "${md5name%% *}.png") + elif [ -d ~/.cache/thumbnails/ ];then + PREVIEWIMG=$(find ~/.cache/thumbnails/ -name "${md5name%% *}.png") + else + PREVIEWIMG="$(python3 -c "import gio; import sys; \ print gio.File(sys.argv[1]).query_info('*').get_attribute_byte_string \ ('''thumbnail::path''')" "$FILE")" - - if [[ "$(file -ib "$FILE" | cut -d';' -f1)" != "application/pdf" || ! -f "$PREVIEWIMG" ]] - then - PREVIEWIMG="$ICON" fi + #TODO - Toggle this instead of always on for someone who might want it off + + # Enlarged temporary "thumbnail" for PDFs using poppler-utils and imagemagick + if [ "$LARGEPREVIEW" == "TRUE" ];then + if [[ "$(file -ib "$FILE" | cut -d';' -f1)" == "application/pdf" ]]; then + # pdftocairo does a wierd adding of -1 on the name of the file no matter what you do + # hence this hack here. + if [ -f "${TMPDIR}/ss_pdfmted_preview.png" ];then + rm "${TMPDIR}/ss_pdfmted_preview.png" + fi + pdftocairo -png "${FILE}" "${TMPDIR}/ss_pdfmted_preview" + convert "${TMPDIR}/ss_pdfmted_preview-1.png" -resize "464x600" "${TMPIMG1}" + rm "${TMPDIR}/ss_pdfmted_preview-1.png" + PREVIEWIMG="${TMPIMG1}" + fi + fi + if [[ "$(file -ib "$FILE" | cut -d';' -f1)" != "application/pdf" || ! -f "$PREVIEWIMG" ]]; then + PREVIEWIMG="$ICON" + fi IMAGE="$PREVIEWIMG" NOTIFY_ICON="$PREVIEWIMG" } @@ -473,6 +526,7 @@ process_yad_formchoices(){ SAVECHOICE="$(echo "$NEW_METADATA" | cut -d '|' -f14)" RENAMECHOICE="$(echo "$NEW_METADATA" | cut -d '|' -f15)" DUPLICATECHOICE="$(echo "$NEW_METADATA" | cut -d '|' -f16)" + OCRCHOICE="$(echo "$NEW_METADATA" | cut -d '|' -f12)" MODIFYDATE="$(date +"%Y:%m:%d %H:%M:%S")" if [[ "$YEAR" = "0" ]]; then CREATEDATE="" @@ -488,6 +542,18 @@ read_metadata() { exiftool -e -S -PDF:Title -PDF:Author -PDF:Subject \ -PDF:Keywords -PDF:CreateDate -PDF:ModifyDate \ "$FILE" > "$PROGPIPE" + + # https://stackoverflow.com/questions/7997399/bash-script-to-check-pdfs-are-ocrd + if [ "$OCRPOSSIBLE" == "TRUE" ];then + MYFONTS=$(pdffonts -l 5 "${FILE}" | tail -n +3 | cut -d' ' -f1 | sort | uniq) + if [ "$MYFONTS" = '' ] || [ "$MYFONTS" = '[none]' ]; then + echo "FALSE" > "$OCRPIPE" + else + echo "TRUE" > "$OCRPIPE" + fi + else + echo "UNKNOWN" > "$OCRPIPE" + fi } print_original_metadata() { @@ -500,6 +566,7 @@ print_original_metadata() { echo "Modification date: $MODIFYDATE_ORIGINAL" echo "Subject: $SUBJECT_ORIGINAL" echo "Keywords: $KEYWORDS_ORIGINAL" + echo "PDF is OCRed: $PDF_IS_OCR" } evaluate_original_metadata() { @@ -511,6 +578,12 @@ evaluate_original_metadata() { MODIFYDATE_ORIGINAL="$(echo "$METADATA_PDF" | sed -n 's/ModifyDate: //p')" SUBJECT_ORIGINAL="$(echo "$METADATA_PDF" | sed -n 's/Subject: //p')" KEYWORDS_ORIGINAL="$(echo "$METADATA_PDF" | sed -n 's/Keywords: //p')" + PDF_IS_OCR="$( [[ -f "$OCRPIPE" ]] && cat "$OCRPIPE")" + if [ "$PDF_IS_OCR" == "TRUE" ];then + ALLOW_OCR=":RO" + else + ALLOW_OCR="" + fi } # Metadata output @@ -538,6 +611,20 @@ update_metadata(){ error_check_write } + +update_ocr(){ + if [ ! -f $(which ocrmypdf) ];then + echo "# OCR chosen but ocrmypdf not available!" + echo "99" > "$ERRORPIPE1" + else + gui_notify "Performing OCR" + echo "# Performing OCR" + ocrmypdf --clean --rotate-pages "${FILE}" "${FILE}" + echo "$?" > "$ERRORPIPE1" + fi + error_check_write +} + print_updated_metadata() { echo "###Updated metadata:" echo "" @@ -545,7 +632,7 @@ print_updated_metadata() { echo "Title: $TITLE" echo "Year: $YEAR" echo "Creation date: $CREATEDATE" - echo "Modificaiton date: $MODIFYDATE" + echo "Modification date: $MODIFYDATE" echo "Subject: $SUBJECT" echo "Keywords: $KEYWORDS" } @@ -574,6 +661,19 @@ update_filename(){ fi fi echo "Updating file name..." + if [ -f "$TARGETFOLDER/$UPDATED_NAME.pdf" ];then + COUNTER=0 + echo "Avoiding clobbering..." + while : ; do + UPDATED_NAME+=" - $COUNTER" + echo "$TARGETFOLDER/$UPDATED_NAME.pdf" + if [ ! -f "$TARGETFOLDER/$UPDATED_NAME.pdf" ]; then + break + else + ((COUNTER++)) + fi + done + fi mv "$FILE" "$TARGETFOLDER/$UPDATED_NAME.pdf" ARGS[$ARGID]="$TARGETFOLDER/$UPDATED_NAME.pdf" # make sure to update file array with # new location @@ -683,6 +783,7 @@ main_iteration(){ process_yad_formchoices print_updated_metadata [[ "$SAVECHOICE" = "TRUE" ]] && update_metadata + [[ "$OCRCHOICE" = "TRUE" ]] && update_ocr [[ "$SAVECHOICE" = "TRUE" && "$RENAMECHOICE" = "TRUE" ]] && update_filename } @@ -724,6 +825,7 @@ trap "cleanup; exit" EXIT TOP_PID="$$" mkdir -p "$TMPDIR" PROGPIPE="$(mktemp -u --tmpdir="$TMPDIR" XXXXXXXX)" +OCRPIPE="$(mktemp -u --tmpdir="$TMPDIR" XXXXXXXX)" ERRORPIPE1="$(mktemp -u --tmpdir="$TMPDIR" XXXXXXXX)" ERRORPIPE2="$(mktemp -u --tmpdir="$TMPDIR" XXXXXXXX)" @@ -740,3 +842,8 @@ arg_compose_filearray "${FILEARGS[@]}" ################ MAIN #################### main + +# cleaning up mouse dropping +if [ -f "${TMPIMG1}" ];then + rm "${TMPIMG1}" +fi diff --git a/screenshots/pdfmted-editor.png b/screenshots/pdfmted-editor.png index fbbdb2f..c4493a2 100644 Binary files a/screenshots/pdfmted-editor.png and b/screenshots/pdfmted-editor.png differ