diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..d6786ac --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,37 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.13"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 256ab09..1007a4a 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -6,7 +6,7 @@ # separate terms of service, privacy policy, and support # documentation. -name: filemac +name: filewarp on: release: @@ -26,6 +26,7 @@ jobs: uses: actions/setup-python@v3 with: python-version: '3.x' + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da623e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Ignore the entire src directory (if not needed) +src/** + +# Ignore this .gitignore file itself (not necessary, but can be included for clarity) +#.gitignore + +# Ignore Python cache directories +__pycache__/ +*.py[cod] + +# Ignore temporary files +*~ +filewarp.egg-info/** +# Ignore build directories +**/build/ +**/dist/ + +# Ignore IDE and editor files +.vscode/ +.idea/ +*.vscode/ +*.idea/ + +# Ignore operating system files +.DS_Store +Thumbs.db +*.db +*.sqlite* + +# Ignore log files +*.log + +# Ignore node_modules (if applicable) +node_modules/ + +# Ignore virtual environment directories (if applicable) +env/ +venv/ +*.docx +*.doc diff --git a/.kateproject.notes b/.kateproject.notes new file mode 100644 index 0000000..fb772b1 --- /dev/null +++ b/.kateproject.notes @@ -0,0 +1,2 @@ +commit_message: +Migrate django templates to react components \ No newline at end of file diff --git a/.pyproject.toml b/.pyproject.toml new file mode 100644 index 0000000..cbebcc8 --- /dev/null +++ b/.pyproject.toml @@ -0,0 +1,83 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "filewarp" +version = "2.0.0" # Will be overridden by version.txt in setup.py unless removed from there +description = "Open source Python CLI toolkit for conversion, manipulation, analysis of files (All major file operations)" +readme = "README.md" +requires-python = ">=3.6" +license = { file = "LICENSE" } +authors = [ + { name = "wambua", email = "swskye17@gmail.com" }, +] +keywords = [ + "file-conversion", + "file-analysis", + "file-manipulation", + "ocr", + "image-conversion", + "audio_effects", + "voice_shift", + "pdf", + "docx", +] +classifiers = [ + "Environment :: Console", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +dependencies = [ + "argparse", + "pdfminer.six", + "python-docx", + "python-pptx", + "gTTS", + "pypandoc", + "fitz", # Consider replacing with "PyMuPDF" if that's what's actually used + "pydub", + "Pillow", + "pandas", + "opencv-python", + "pytesseract", + "PyPDF2", + "pdf2docx", + "requests", + "moviepy", + "reportlab", + "numpy", + "pdf2image", + "openpyxl", + "rich", + "tqdm", + "ffmpeg-python", + "librosa", + "python-magic", + "matplotlib", + "soundfile", + "SpeechRecognition", + "colorama", + "scipy", + "PyMuPDF", + "pyautogui", + "imageio", + "pynput", + "pyaudio", +] + +[project.urls] +Homepage = "https://pypi.org/project/filewarp/" +Source = "https://github.com/skye-cyber/filewarp" +Issues = "https://github.com/skye-cyber/filewarp/issues" + +[project.scripts] +filewarp = "filewarp:main" +Filewarp = "filewarp:main" +warp = "filewarp:main" diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..b9f925f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,27 @@ +############################# +# MANIFEST.in for “filewarp” # +############################# + +# ---------- Top‑level metadata ---------- +include README.md +include LICENSE* +include version.txt + +# ---------- Package‑wide data ---------- +# (Any files your code loads at runtime – templates, models, config files…) + +# ---------- Documentation (optional) ---------- +# Comment out if you don’t publish docs with the package +# recursive-include docs * + +# ---------- Type information (if you add stubs) ---------- +# include py.typed + +# ---------- Exclude common cruft ---------- +exclude *.py[cod] __pycache__ *.so *.dll *.dylib +prune build +prune dist +prune .git +prune .idea +prune .pytest_cache +prune .~ diff --git a/README.md b/README.md index b5014e0..fbaca7b 100644 --- a/README.md +++ b/README.md @@ -1,133 +1,299 @@ -# fconverter -A python file `conversion`, `manipulation`, `Analysis` toolkit -`This is a Linux command-line interface (CLI) utility that coverts documents from one format to another, -analyzes files, manipulates files. -Your can also convert text file to mp3 formart using google Text to speech library (gTTS). +# FileWarp + +A Python file **conversion**, **manipulation**, and **analysis** toolkit. +This is a Linux command-line interface (CLI) utility that converts documents from one format to another, analyzes files, manipulates files, and more. + +## Name Variations + +```shell +filewarp -h +Filewarp -h +warp -h +``` ## Installation -1. using pip - ```shell - pip install filemac - ``` -2. Install from github +### Using pip + +```shell +pip install filewarp +``` + +### Install from GitHub + +```shell +pip install git+https://github.com/skye-cyber/FileWarp.git +``` - ```shell - pip install git+https://github.com/skye-cyber/FileMAC.git - ``` ## Usage To run the CLI app, use the following command: ```shell -FileMAC [options] stdin format +filewarp [OPTIONS] COMMAND [ARGS]... ``` -Replace `[options]` with the appropriate command-line options based on the functionality you want to execute. +Replace `[OPTIONS]` with global options and `COMMAND` with the specific operation you want to execute. -## Available Options +## Available Commands -- `1`: --convert_doc. -- `2`: --convert_audio. -- `3`: --convert_video. -- `4`: --convert_image. -- `5`: --extract_audio. -- `6`: --Analyze_video -- `7`: --OCR +| Command | Description | +|---------|-------------| +| `convert-doc` | Convert documents between formats (PDF, DOCX, etc.) | +| `convert-audio` | Convert audio files between formats (MP3, WAV, etc.) | +| `convert-video` | Convert video files between formats (MP4, MKV, etc.) | +| `convert-image` | Convert image files between formats (PNG, JPG, etc.) | +| `ocr` | Extract text from images using OCR | +| `pdf-join` | Join multiple PDF files | +| `extract-audio` | Extract audio from video files | +| `extract-pages` | Extract specific pages from PDF | +| `extract-images` | Extract images from PDF | +| `scan-pdf` | Scan PDF and extract text | +| `scan-as-image` | Scan PDF as images then extract text | +| `scan-long` | Scan document as long image (effective for complex layouts) | +| `pdf2long-image` | Convert PDF to long image | +| `doc-to-image` | Convert documents to images | +| `images-to-pdf` | Convert images to PDF | +| `images-to-word` | Convert images to Word document | +| `grayscale` | Convert images to grayscale | +| `resize-image` | Resize or compress images | +| `join-audio` | Join multiple audio files into one | +| `analyze-video` | Analyze video file properties | +| `edit-video` | Edit videos (trim, cut, etc.) | +| `convert-svg` | Convert SVG files to other formats | +| `html2word` | Convert HTML files to Word documents | +| `markdown2word` | Convert Markdown to Word with Mermaid rendering | +| `text2word` | Convert styled text to Word document | +| `record` | Record audio from microphone | +| `voice-type` | Use voice to type text | +| `audio-effects` | Apply audio effects and voice changes | ## Examples -1. Example command 1: - - ```shell - filemac --convert_doc example.docx -t pdf - ``` - ``Supported formats For document conversion`` - `1`. PDF to DOCX - `2`. PDF to TXT - `3`. PDF to Audio - `4`. DOCX to PDF - `5`. DOCX to pptx - `6`. DOCX to TXT - `7`. DOCX to Audio - `8`. TXT to PDF - `9`. TXT to DOCX - `10`' TXT to Audio - `11`. PPTX to DOCX - `12`. XLSX to Sql - `13`. XLSX to CSV - `14`. XLSX to TXT - `15`. XLSX to DOCX - - This promt parses convert_doc signifying that the inteded operation id document conversion then parses ```example.docx``` as the input file(file path can also be provided) to be converted to format ```pdf```. -the output file assumes the base name of the input file but the extension conforms to the parsed format```pdf``` - -2. converting text mp3 to wav - ```shell - filemac --convert_audio example.mp3 -t wav - ``` - ``Supported formats For audio conversion`` - `1`. wav - `2`. mp3 - `3`. ogg - `4`. flv - `5`. avi - `6`. ogv - `7`. matroska - `8`. mov - `9`. webm - -3. Extract text from images - ```shell - filemac --OCR image.jpg - ``` - - 2. converting videos - ```shell - filemac --convert_video example.mp4 -t wav - ``` - ``Supported formats For video conversion`` - `1`. MP4 - `2`. AVI - `3`. OGV - `4`. WEBM - `5`. MOV - `6`. MKV - `7`. FLV - `8`. WMV - -2. converting images - ```shell - filemac --convert_image example.png -t jpg - ``` - ``Supported formats For audio conversion`` - `1`.JPEG: `.jpg` - `2`.PNG": `.png` - `3`.GIF": `.gif` - `4`.BM": `.bmp` - `5`.TIFF: `.tiff` - `6`.EXR `.exr` - `7`.PDF: `.pdf` - `8`.WebP: `.webp` - `9`.ICNS: `.icns` - `10`.PSD: `.psd` - `11`.SVG: `.svg` - `12`.EPS: `.eps` +### 1. Document Conversion + +```shell +filewarp convert-doc example.docx --to pdf +``` + +**Supported Formats for Document Conversion:** +- PDF to (Word, TXT, Audio[TTS]) +- PDF to TXT +- PDF to Audio (Ogg, MP3, WAV, etc.) +- DOCX to (PDF, PPTX/PPT, TXT, Audio) +- TXT to (PDF, Word, Audio) +- PPTX to DOCX +- XLSX to (SQL, CSV, TXT, Word) + +This command converts `example.docx` to PDF. The output file retains the base name of the input file but uses the specified extension (`pdf`). + +### 2. Audio Conversion + +```shell +filewarp convert-audio example.mp3 --to wav +``` + +**Supported Audio Formats:** +- WAV, MP3, Ogg, FLV, OGV, AVI, MKV, MOV, WebM + +### 3. Optical Character Recognition (OCR) + +```shell +filewarp ocr image.jpg +``` + +Extracts text from the specified image. + +### 4. Video Conversion + +```shell +filewarp convert-video example.mp4 --to avi +``` + +**Supported Video Formats:** +- MP4, AVI, OGV, WebM, MOV, MKV, FLV, WMV + +### 5. Image Conversion + +```shell +filewarp convert-image example.png --to jpg +``` + +**Supported Image Formats:** +- JPEG: `.jpg` +- PNG: `.png` +- GIF: `.gif` +- BMP: `.bmp` +- TIFF: `.tiff` +- EXR: `.exr` +- PDF: `.pdf` +- WebP: `.webp` +- ICNS: `.icns` +- PSD: `.psd` +- SVG: `.svg` +- EPS: `.eps` + +### 6. Join PDF Files + +```shell +filewarp pdf-join file1.pdf file2.pdf +``` + +Joins multiple PDF files into a single PDF. + +### 7. Extract Audio from Video + +```shell +filewarp extract-audio video.mp4 +``` + +Extracts audio from the specified video file. + +### 8. Extract Pages from PDF + +```shell +filewarp extract-pages document.pdf 1 3 5 +``` + +Extracts pages 1, 3, and 5 from the PDF. + +### 9. Convert Images to PDF + +```shell +filewarp images-to-pdf image1.jpg image2.png +``` + +Converts multiple images into a single PDF. + +### 10. Convert Images to Word + +```shell +filewarp images-to-word image1.jpg image2.png +``` + +Converts multiple images into a Word document. + +### 11. Convert to Grayscale + +```shell +filewarp grayscale image.jpg +``` + +Converts the image to grayscale. + +### 12. Resize Image + +```shell +filewarp resize-image image.jpg --size 800x600 +``` + +Resizes the image to the specified dimensions. + +### 13. Join Audio Files + +```shell +filewarp join-audio audio1.mp3 audio2.mp3 +``` + +Joins multiple audio files into one. + +### 14. Analyze Video + +```shell +filewarp analyze-video video.mp4 +``` + +Analyzes the video file properties. + +### 15. Edit Video + +```shell +filewarp edit-video video.mp4 --trim_start 10 +``` + +Trims the first 10 seconds from the video. + +### 16. Convert SVG + +```shell +filewarp convert-svg image.svg --to png +``` + +Converts SVG to PNG. + +### 17. HTML to Word + +```shell +filewarp html2word document.html +``` + +Converts HTML to Word document. + +### 18. Markdown to Word + +```shell +filewarp markdown2word document.md +``` + +Converts Markdown to Word document. + +### 19. Text to Word + +```shell +filewarp text2word document.txt +``` + +Converts text to Word document. + +### 20. Record Audio + +```shell +filewarp record +``` + +Records audio from the microphone. + +### 21. Voice Typing + +```shell +filewarp voice-type +``` + +Uses voice to type text. + +### 22. Audio Effects + +```shell +filewarp audio-effects audio.mp3 --effect high +``` + +Applies audio effects to the file. ## Help -in any case you can pass the string help to an option to see its supported operations or inputs nd output formats. + +To see supported operations or input/output formats for a specific command, use: + +```shell +filewarp COMMAND --help +``` + +For example: + ```shell - filemac --convert_doc help +filewarp convert-doc --help ``` -The above command displays the surported input and output formats for document conversion. + +This displays detailed help for the `convert-doc` command. + ## Contributing Contributions are welcome! If you encounter any issues or have suggestions for improvements, please open an issue or submit a pull request. ## License -This project is an open source software. Under GPL-3.0 license - +This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. -Feel free to modify and customize this template according to your specific project requirements and add any additional sections or information that you think would be helpful for users. +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +You should have received a copy of the GNU General Public License along with this program. If not, see [https://www.gnu.org/licenses/](https://www.gnu.org/licenses/). diff --git a/__pycache__/Analyzer.cpython-311.pyc b/__pycache__/Analyzer.cpython-311.pyc deleted file mode 100644 index d502d8b..0000000 Binary files a/__pycache__/Analyzer.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/AudioExtractor.cpython-311.pyc b/__pycache__/AudioExtractor.cpython-311.pyc deleted file mode 100644 index 1a8d426..0000000 Binary files a/__pycache__/AudioExtractor.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/OCRTextExtractor.cpython-311.pyc b/__pycache__/OCRTextExtractor.cpython-311.pyc deleted file mode 100644 index f4c49b9..0000000 Binary files a/__pycache__/OCRTextExtractor.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/Simple_v_Analyzer.cpython-311.pyc b/__pycache__/Simple_v_Analyzer.cpython-311.pyc deleted file mode 100644 index 2e9d71a..0000000 Binary files a/__pycache__/Simple_v_Analyzer.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/converter.cpython-311.pyc b/__pycache__/converter.cpython-311.pyc deleted file mode 100644 index d22d623..0000000 Binary files a/__pycache__/converter.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/formarts.cpython-311.pyc b/__pycache__/formarts.cpython-311.pyc deleted file mode 100644 index 15358ab..0000000 Binary files a/__pycache__/formarts.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/formats.cpython-311.pyc b/__pycache__/formats.cpython-311.pyc deleted file mode 100644 index 514208e..0000000 Binary files a/__pycache__/formats.cpython-311.pyc and /dev/null differ diff --git a/__pycache__/show_progress.cpython-311.pyc b/__pycache__/show_progress.cpython-311.pyc deleted file mode 100644 index cf779b2..0000000 Binary files a/__pycache__/show_progress.cpython-311.pyc and /dev/null differ diff --git a/assets/audio_effects/chipmunk_demo_v.mp4 b/assets/audio_effects/chipmunk_demo_v.mp4 new file mode 100644 index 0000000..8e44250 Binary files /dev/null and b/assets/audio_effects/chipmunk_demo_v.mp4 differ diff --git a/assets/audio_effects/demo.mp3 b/assets/audio_effects/demo.mp3 new file mode 100644 index 0000000..b23cec6 Binary files /dev/null and b/assets/audio_effects/demo.mp3 differ diff --git a/assets/audio_effects/demo_v.mp4 b/assets/audio_effects/demo_v.mp4 new file mode 100644 index 0000000..6ea564d Binary files /dev/null and b/assets/audio_effects/demo_v.mp4 differ diff --git a/assets/audio_effects/high_demo.mp3 b/assets/audio_effects/high_demo.mp3 new file mode 100644 index 0000000..7150d0c Binary files /dev/null and b/assets/audio_effects/high_demo.mp3 differ diff --git a/assets/demo.html b/assets/demo.html new file mode 100644 index 0000000..05c5460 --- /dev/null +++ b/assets/demo.html @@ -0,0 +1,96 @@ + + + + + + Audio and Video Examples + + + +

Audio

+
filewarp --audio_effect 'demo.mp3' --effect high
+

Original:

+ + +

Result:

+ + +
+ +

Video

+
filewarp --audio_effect 'demo_v.mp4' --effect high
+

Original:

+ + +

Result:

+ + + diff --git a/assets/init.css b/assets/init.css new file mode 100644 index 0000000..8a196d1 --- /dev/null +++ b/assets/init.css @@ -0,0 +1,316 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; +.scrollbar-hide { + /* Hide scrollbar for Chrome, Safari, and Edge */ + -ms-overflow-style: none; /* Internet Explorer 10+ */ + scrollbar-width: none; /* Firefox */ + overflow: -moz-scrollbars-none; /* Older Firefox */ + overflow-y: scroll; /* Add this to ensure the content is scrollable */ + &::-webkit-scrollbar { + display: none; /* Hide scrollbar for Chrome, Safari, and Edge */ + } +} +/* global.css or within a Tailwind plugin */ +@layer utilities { + h1, + h2, + h3, + h4, + h5, + h6 { + margin: 0; /* Reset margin for consistency */ + } + + h1 { + font-size: 2.5rem; /* 40px */ + font-weight: 700; /* bold */ + } + + h2 { + font-size: 2rem; /* 32px */ + font-weight: 600; /* semi-bold */ + } + + h3 { + font-size: 1.75rem; /* 28px */ + font-weight: 500; /* medium */ + } + + h4 { + font-size: 1.5rem; /* 24px */ + font-weight: 400; /* normal */ + } + + h5 { + font-size: 1.25rem; /* 20px */ + font-weight: 300; /* light */ + } + + h6 { + font-size: 1rem; /* 16px */ + font-weight: 200; /* extra light */ + } +} +.pulse { + display: inline-block; + transition: transform 0.3s ease-in-out; +} + +.pulse:hover { + transform: scale(1.1); +} + +@keyframes pulse { + 0% { + transform: scale(1); + } + 50% { + transform: scale(1.5); + } + 100% { + transform: scale(1); + } +} + +.pulse-hover { + display: inline-block; +} + +.pulse-hover:hover { + animation: pulse 1s infinite; +} +/* Reset default scrollbar */ +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +/* Light theme scrollbar */ +::-webkit-scrollbar-track { + background: #2c2c2c; + border-radius: 4px; + opacity: 0.5; +} + +.dark ::-webkit-scrollbar-track { + background: #24486b; + border-radius: 4px; + opacity: 0.5; +} + +::-webkit-scrollbar-thumb { + background: linear-gradient(145deg, #00aa7f, #aaffff, #00aaff); + border-radius: 4px; + transition: background-color 0.3s ease; +} + +::-webkit-scrollbar-thumb:hover { + background: #555500; +} + +.dark ::-webkit-scrollbar-thumb { + background: #ffffff; +} + +::-webkit-scrollbar-thumb:active { + background: linear-gradient(135deg, #aa55ff, #aaaaff, #ff55ff); +} + +/* Optional: Add transitions for more natural feel */ +ython.assistant-unused .note:-webkit-scrollbar { + scroll-behavior: smooth; +} + +/* Simulate a placeholder on the contenteditable div */ +#userInput:empty:before { + content: attr(data-placeholder); + color: #9ca3af; +} +/* Always ensure an extra empty row at the bottom */ +#userInput::after { + content: "\A"; /* Inserts a newline */ + white-space: pre; + display: block; + visibility: hidden; + height: 2.4em; /* Adjust this value to match the height of an empty row */ +} + +@keyframes modalEnter { + from { + transform: scale(0); + opacity: 0; + } + to { + transform: scale(1); + opacity: 1; + } +} + +@keyframes modalExit { + from { + transform: scale(1); + opacity: 1; + } + to { + transform: scale(0); + opacity: 0; + } +} + +.animate-enter { + animation: modalEnter 0.4s ease-out forwards; +} + +.animate-exit { + animation: modalExit 0.3s ease-in forwards; +} + +@keyframes singleRipple { + 0% { + transform: scale(0.8); + opacity: 1; + } + 100% { + transform: scale(2.5); + opacity: 0; + } +} +.ripple-single-1 { + position: absolute; + border: 3px solid; + border-image: linear-gradient(45deg, #ff8a65, #ff7043) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-2 { + position: absolute; + border: 3px solid; + border-image: linear-gradient(45deg, #ff8a65, #55aaff) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} +.ripple-single-3 { + position: absolute; + border: 3px solid; + border-image: linear-gradient(45deg, #55ff7f, #ff7043) 1; + border-radius: 50%; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} +.ripple-single-1 { + animation-delay: 0s; +} +.ripple-single-2 { + animation-delay: 0.6s; +} +.ripple-single-3 { + animation-delay: 1.2s; +} + +/* Light code theme*/ + +/* Dark code theme*/ +.hljs { + background-color: #282c34; + color: #abb2bf; + padding: 15px; + border-radius: 8px; + line-height: 1.5; + font-family: "Fira Code", monospace; +} + +.hljs-keyword { + color: #c678dd; + font-weight: bold; +} + +.hljs-built_in { + color: #e06c74; +} + +.hljs-string { + color: #98c379; +} + +.hljs-number { + color: #d19a66; +} + +.hljs-comment { + color: #5c6370; + font-style: italic; +} + +.hljs-function { + color: #61afef; +} + +.hljs-params { + color: #abb2bf; +} + +.hljs-variable { + color: #d19a66; +} + +.hljs-class { + color: #e5c07b; +} + +.hljs-title { + color: #61afef; +} + +.hljs-attribute { + color: #d19a66; +} + +.hljs-symbol { + color: #61afef; +} + +.hljs-bullet { + color: #abb2bf; +} + +.hljs-meta { + color: #5c6370; +} + +.hljs-link { + color: #61afef; + text-decoration: underline; +} + +.hljs-doctag { + color: #c678dd; + font-weight: bold; +} + +.hljs-tag { + color: #e06c74; +} + +.hljs-name { + color: #61afef; +} + +.hljs-attr { + color: #d19a66; +} + +.hljs-attr { + color: #00aaff; +} + +.hljs-literal { + color: #d19a66; +} diff --git a/assets/styles.css b/assets/styles.css new file mode 100644 index 0000000..51d299d --- /dev/null +++ b/assets/styles.css @@ -0,0 +1,1094 @@ +*, ::before, ::after { + --tw-border-spacing-x: 0; + --tw-border-spacing-y: 0; + --tw-translate-x: 0; + --tw-translate-y: 0; + --tw-rotate: 0; + --tw-skew-x: 0; + --tw-skew-y: 0; + --tw-scale-x: 1; + --tw-scale-y: 1; + --tw-pan-x: ; + --tw-pan-y: ; + --tw-pinch-zoom: ; + --tw-scroll-snap-strictness: proximity; + --tw-gradient-from-position: ; + --tw-gradient-via-position: ; + --tw-gradient-to-position: ; + --tw-ordinal: ; + --tw-slashed-zero: ; + --tw-numeric-figure: ; + --tw-numeric-spacing: ; + --tw-numeric-fraction: ; + --tw-ring-inset: ; + --tw-ring-offset-width: 0px; + --tw-ring-offset-color: #fff; + --tw-ring-color: rgb(59 130 246 / 0.5); + --tw-ring-offset-shadow: 0 0 #0000; + --tw-ring-shadow: 0 0 #0000; + --tw-shadow: 0 0 #0000; + --tw-shadow-colored: 0 0 #0000; + --tw-blur: ; + --tw-brightness: ; + --tw-contrast: ; + --tw-grayscale: ; + --tw-hue-rotate: ; + --tw-invert: ; + --tw-saturate: ; + --tw-sepia: ; + --tw-drop-shadow: ; + --tw-backdrop-blur: ; + --tw-backdrop-brightness: ; + --tw-backdrop-contrast: ; + --tw-backdrop-grayscale: ; + --tw-backdrop-hue-rotate: ; + --tw-backdrop-invert: ; + --tw-backdrop-opacity: ; + --tw-backdrop-saturate: ; + --tw-backdrop-sepia: ; + --tw-contain-size: ; + --tw-contain-layout: ; + --tw-contain-paint: ; + --tw-contain-style: ; +} + +::backdrop { + --tw-border-spacing-x: 0; + --tw-border-spacing-y: 0; + --tw-translate-x: 0; + --tw-translate-y: 0; + --tw-rotate: 0; + --tw-skew-x: 0; + --tw-skew-y: 0; + --tw-scale-x: 1; + --tw-scale-y: 1; + --tw-pan-x: ; + --tw-pan-y: ; + --tw-pinch-zoom: ; + --tw-scroll-snap-strictness: proximity; + --tw-gradient-from-position: ; + --tw-gradient-via-position: ; + --tw-gradient-to-position: ; + --tw-ordinal: ; + --tw-slashed-zero: ; + --tw-numeric-figure: ; + --tw-numeric-spacing: ; + --tw-numeric-fraction: ; + --tw-ring-inset: ; + --tw-ring-offset-width: 0px; + --tw-ring-offset-color: #fff; + --tw-ring-color: rgb(59 130 246 / 0.5); + --tw-ring-offset-shadow: 0 0 #0000; + --tw-ring-shadow: 0 0 #0000; + --tw-shadow: 0 0 #0000; + --tw-shadow-colored: 0 0 #0000; + --tw-blur: ; + --tw-brightness: ; + --tw-contrast: ; + --tw-grayscale: ; + --tw-hue-rotate: ; + --tw-invert: ; + --tw-saturate: ; + --tw-sepia: ; + --tw-drop-shadow: ; + --tw-backdrop-blur: ; + --tw-backdrop-brightness: ; + --tw-backdrop-contrast: ; + --tw-backdrop-grayscale: ; + --tw-backdrop-hue-rotate: ; + --tw-backdrop-invert: ; + --tw-backdrop-opacity: ; + --tw-backdrop-saturate: ; + --tw-backdrop-sepia: ; + --tw-contain-size: ; + --tw-contain-layout: ; + --tw-contain-paint: ; + --tw-contain-style: ; +} + +/* +! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com +*/ + +/* +1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4) +2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116) +*/ + +*, +::before, +::after { + box-sizing: border-box; + /* 1 */ + border-width: 0; + /* 2 */ + border-style: solid; + /* 2 */ + border-color: #e5e7eb; + /* 2 */ +} + +::before, +::after { + --tw-content: ''; +} + +/* +1. Use a consistent sensible line-height in all browsers. +2. Prevent adjustments of font size after orientation changes in iOS. +3. Use a more readable tab size. +4. Use the user's configured `sans` font-family by default. +5. Use the user's configured `sans` font-feature-settings by default. +6. Use the user's configured `sans` font-variation-settings by default. +7. Disable tap highlights on iOS +*/ + +html, +:host { + line-height: 1.5; + /* 1 */ + -webkit-text-size-adjust: 100%; + /* 2 */ + -moz-tab-size: 4; + /* 3 */ + -o-tab-size: 4; + tab-size: 4; + /* 3 */ + font-family: ui-sans-serif, system-ui, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; + /* 4 */ + font-feature-settings: normal; + /* 5 */ + font-variation-settings: normal; + /* 6 */ + -webkit-tap-highlight-color: transparent; + /* 7 */ +} + +/* +1. Remove the margin in all browsers. +2. Inherit line-height from `html` so users can set them as a class directly on the `html` element. +*/ + +body { + margin: 0; + /* 1 */ + line-height: inherit; + /* 2 */ +} + +/* +1. Add the correct height in Firefox. +2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655) +3. Ensure horizontal rules are visible by default. +*/ + +hr { + height: 0; + /* 1 */ + color: inherit; + /* 2 */ + border-top-width: 1px; + /* 3 */ +} + +/* +Add the correct text decoration in Chrome, Edge, and Safari. +*/ + +abbr:where([title]) { + -webkit-text-decoration: underline dotted; + text-decoration: underline dotted; +} + +/* +Remove the default font size and weight for headings. +*/ + +h1, +h2, +h3, +h4, +h5, +h6 { + font-size: inherit; + font-weight: inherit; +} + +/* +Reset links to optimize for opt-in styling instead of opt-out. +*/ + +a { + color: inherit; + text-decoration: inherit; +} + +/* +Add the correct font weight in Edge and Safari. +*/ + +b, +strong { + font-weight: bolder; +} + +/* +1. Use the user's configured `mono` font-family by default. +2. Use the user's configured `mono` font-feature-settings by default. +3. Use the user's configured `mono` font-variation-settings by default. +4. Correct the odd `em` font sizing in all browsers. +*/ + +code, +kbd, +samp, +pre { + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + /* 1 */ + font-feature-settings: normal; + /* 2 */ + font-variation-settings: normal; + /* 3 */ + font-size: 1em; + /* 4 */ +} + +/* +Add the correct font size in all browsers. +*/ + +small { + font-size: 80%; +} + +/* +Prevent `sub` and `sup` elements from affecting the line height in all browsers. +*/ + +sub, +sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} + +sub { + bottom: -0.25em; +} + +sup { + top: -0.5em; +} + +/* +1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297) +2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016) +3. Remove gaps between table borders by default. +*/ + +table { + text-indent: 0; + /* 1 */ + border-color: inherit; + /* 2 */ + border-collapse: collapse; + /* 3 */ +} + +/* +1. Change the font styles in all browsers. +2. Remove the margin in Firefox and Safari. +3. Remove default padding in all browsers. +*/ + +button, +input, +optgroup, +select, +textarea { + font-family: inherit; + /* 1 */ + font-feature-settings: inherit; + /* 1 */ + font-variation-settings: inherit; + /* 1 */ + font-size: 100%; + /* 1 */ + font-weight: inherit; + /* 1 */ + line-height: inherit; + /* 1 */ + letter-spacing: inherit; + /* 1 */ + color: inherit; + /* 1 */ + margin: 0; + /* 2 */ + padding: 0; + /* 3 */ +} + +/* +Remove the inheritance of text transform in Edge and Firefox. +*/ + +button, +select { + text-transform: none; +} + +/* +1. Correct the inability to style clickable types in iOS and Safari. +2. Remove default button styles. +*/ + +button, +input:where([type='button']), +input:where([type='reset']), +input:where([type='submit']) { + -webkit-appearance: button; + /* 1 */ + background-color: transparent; + /* 2 */ + background-image: none; + /* 2 */ +} + +/* +Use the modern Firefox focus style for all focusable elements. +*/ + +:-moz-focusring { + outline: auto; +} + +/* +Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737) +*/ + +:-moz-ui-invalid { + box-shadow: none; +} + +/* +Add the correct vertical alignment in Chrome and Firefox. +*/ + +progress { + vertical-align: baseline; +} + +/* +Correct the cursor style of increment and decrement buttons in Safari. +*/ + +::-webkit-inner-spin-button, +::-webkit-outer-spin-button { + height: auto; +} + +/* +1. Correct the odd appearance in Chrome and Safari. +2. Correct the outline style in Safari. +*/ + +[type='search'] { + -webkit-appearance: textfield; + /* 1 */ + outline-offset: -2px; + /* 2 */ +} + +/* +Remove the inner padding in Chrome and Safari on macOS. +*/ + +::-webkit-search-decoration { + -webkit-appearance: none; +} + +/* +1. Correct the inability to style clickable types in iOS and Safari. +2. Change font properties to `inherit` in Safari. +*/ + +::-webkit-file-upload-button { + -webkit-appearance: button; + /* 1 */ + font: inherit; + /* 2 */ +} + +/* +Add the correct display in Chrome and Safari. +*/ + +summary { + display: list-item; +} + +/* +Removes the default spacing and border for appropriate elements. +*/ + +blockquote, +dl, +dd, +h1, +h2, +h3, +h4, +h5, +h6, +hr, +figure, +p, +pre { + margin: 0; +} + +fieldset { + margin: 0; + padding: 0; +} + +legend { + padding: 0; +} + +ol, +ul, +menu { + list-style: none; + margin: 0; + padding: 0; +} + +/* +Reset default styling for dialogs. +*/ + +dialog { + padding: 0; +} + +/* +Prevent resizing textareas horizontally by default. +*/ + +textarea { + resize: vertical; +} + +/* +1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300) +2. Set the default placeholder color to the user's configured gray 400 color. +*/ + +input::-moz-placeholder, textarea::-moz-placeholder { + opacity: 1; + /* 1 */ + color: #9ca3af; + /* 2 */ +} + +input::placeholder, +textarea::placeholder { + opacity: 1; + /* 1 */ + color: #9ca3af; + /* 2 */ +} + +/* +Set the default cursor for buttons. +*/ + +button, +[role="button"] { + cursor: pointer; +} + +/* +Make sure disabled buttons don't get the pointer cursor. +*/ + +:disabled { + cursor: default; +} + +/* +1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14) +2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210) + This can trigger a poorly considered lint error in some tools but is included by design. +*/ + +img, +svg, +video, +canvas, +audio, +iframe, +embed, +object { + display: block; + /* 1 */ + vertical-align: middle; + /* 2 */ +} + +/* +Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14) +*/ + +img, +video { + max-width: 100%; + height: auto; +} + +/* Make elements with the HTML hidden attribute stay hidden by default */ + +[hidden]:where(:not([hidden="until-found"])) { + display: none; +} + +.container { + width: 100%; +} + +@media (min-width: 640px) { + .container { + max-width: 640px; + } +} + +@media (min-width: 768px) { + .container { + max-width: 768px; + } +} + +@media (min-width: 1024px) { + .container { + max-width: 1024px; + } +} + +@media (min-width: 1280px) { + .container { + max-width: 1280px; + } +} + +@media (min-width: 1536px) { + .container { + max-width: 1536px; + } +} + +.mx-auto { + margin-left: auto; + margin-right: auto; +} + +.my-8 { + margin-top: 2rem; + margin-bottom: 2rem; +} + +.mb-4 { + margin-bottom: 1rem; +} + +.mb-6 { + margin-bottom: 1.5rem; +} + +.mt-2 { + margin-top: 0.5rem; +} + +.mt-8 { + margin-top: 2rem; +} + +.list-inside { + list-style-position: inside; +} + +.list-disc { + list-style-type: disc; +} + +.rounded-lg { + border-radius: 0.5rem; +} + +.bg-gray-200 { + --tw-bg-opacity: 1; + background-color: rgb(229 231 235 / var(--tw-bg-opacity, 1)); +} + +.bg-gray-800 { + --tw-bg-opacity: 1; + background-color: rgb(31 41 55 / var(--tw-bg-opacity, 1)); +} + +.bg-white { + --tw-bg-opacity: 1; + background-color: rgb(255 255 255 / var(--tw-bg-opacity, 1)); +} + +.bg-gradient-to-r { + background-image: linear-gradient(to right, var(--tw-gradient-stops)); +} + +.from-blue-800 { + --tw-gradient-from: #1e40af var(--tw-gradient-from-position); + --tw-gradient-to: rgb(30 64 175 / 0) var(--tw-gradient-to-position); + --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to); +} + +.to-blue-600 { + --tw-gradient-to: #2563eb var(--tw-gradient-to-position); +} + +.p-4 { + padding: 1rem; +} + +.p-6 { + padding: 1.5rem; +} + +.py-4 { + padding-top: 1rem; + padding-bottom: 1rem; +} + +.py-8 { + padding-top: 2rem; + padding-bottom: 2rem; +} + +.text-center { + text-align: center; +} + +.text-3xl { + font-size: 1.875rem; + line-height: 2.25rem; +} + +.text-5xl { + font-size: 3rem; + line-height: 1; +} + +.text-lg { + font-size: 1.125rem; + line-height: 1.75rem; +} + +.font-bold { + font-weight: 700; +} + +.font-semibold { + font-weight: 600; +} + +.leading-normal { + line-height: 1.5; +} + +.tracking-normal { + letter-spacing: 0em; +} + +.text-blue-800 { + --tw-text-opacity: 1; + color: rgb(30 64 175 / var(--tw-text-opacity, 1)); +} + +.text-gray-800 { + --tw-text-opacity: 1; + color: rgb(31 41 55 / var(--tw-text-opacity, 1)); +} + +.text-white { + --tw-text-opacity: 1; + color: rgb(255 255 255 / var(--tw-text-opacity, 1)); +} + +.shadow-lg { + --tw-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1); + --tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color); + box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); +} + +.shadow-md { + --tw-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1); + --tw-shadow-colored: 0 4px 6px -1px var(--tw-shadow-color), 0 2px 4px -2px var(--tw-shadow-color); + box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); +} + +h1, + h2, + h3, + h4, + h5, + h6 { + margin: 0; + /* Reset margin for consistency */ +} + +h1 { + font-size: 2.5rem; + /* 40px */ + font-weight: 700; + /* bold */ +} + +h2 { + font-size: 2rem; + /* 32px */ + font-weight: 600; + /* semi-bold */ +} + +h3 { + font-size: 1.75rem; + /* 28px */ + font-weight: 500; + /* medium */ +} + +h4 { + font-size: 1.5rem; + /* 24px */ + font-weight: 400; + /* normal */ +} + +h5 { + font-size: 1.25rem; + /* 20px */ + font-weight: 300; + /* light */ +} + +h6 { + font-size: 1rem; + /* 16px */ + font-weight: 200; + /* extra light */ +} + +.scrollbar-hide { + /* Hide scrollbar for Chrome, Safari, and Edge */ + -ms-overflow-style: none; + /* Internet Explorer 10+ */ + scrollbar-width: none; + /* Firefox */ + overflow: -moz-scrollbars-none; + /* Older Firefox */ + overflow-y: scroll; + /* Add this to ensure the content is scrollable */ + &::-webkit-scrollbar { + display: none; + /* Hide scrollbar for Chrome, Safari, and Edge */ + } +} + +/* global.css or within a Tailwind plugin */ + +.pulse { + display: inline-block; + transition: transform 0.3s ease-in-out; +} + +.pulse:hover { + transform: scale(1.1); +} + +@keyframes pulse { + 0% { + transform: scale(1); + } + + 50% { + transform: scale(1.5); + } + + 100% { + transform: scale(1); + } +} + +.pulse-hover { + display: inline-block; +} + +.pulse-hover:hover { + animation: pulse 1s infinite; +} + +/* Reset default scrollbar */ + +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +/* Light theme scrollbar */ + +::-webkit-scrollbar-track { + background: #2c2c2c; + border-radius: 4px; + opacity: 0.5; +} + +.dark ::-webkit-scrollbar-track { + background: #24486b; + border-radius: 4px; + opacity: 0.5; +} + +::-webkit-scrollbar-thumb { + background: linear-gradient(145deg, #00aa7f, #aaffff, #00aaff); + border-radius: 4px; + -webkit-transition: background-color 0.3s ease; + transition: background-color 0.3s ease; +} + +::-webkit-scrollbar-thumb:hover { + background: #555500; +} + +.dark ::-webkit-scrollbar-thumb { + background: #ffffff; +} + +::-webkit-scrollbar-thumb:active { + background: linear-gradient(135deg, #aa55ff, #aaaaff, #ff55ff); +} + +/* Optional: Add transitions for more natural feel */ + +ython.assistant-unused .note:-webkit-scrollbar { + scroll-behavior: smooth; +} + +/* Simulate a placeholder on the contenteditable div */ + +#userInput:empty:before { + content: attr(data-placeholder); + color: #9ca3af; +} + +/* Always ensure an extra empty row at the bottom */ + +#userInput::after { + content: "\A"; + /* Inserts a newline */ + white-space: pre; + display: block; + visibility: hidden; + height: 2.4em; + /* Adjust this value to match the height of an empty row */ +} + +@keyframes modalEnter { + from { + transform: scale(0); + opacity: 0; + } + + to { + transform: scale(1); + opacity: 1; + } +} + +@keyframes modalExit { + from { + transform: scale(1); + opacity: 1; + } + + to { + transform: scale(0); + opacity: 0; + } +} + +.animate-enter { + animation: modalEnter 0.4s ease-out forwards; +} + +.animate-exit { + animation: modalExit 0.3s ease-in forwards; +} + +@keyframes singleRipple { + 0% { + transform: scale(0.8); + opacity: 1; + } + + 100% { + transform: scale(2.5); + opacity: 0; + } +} + +.ripple-single-1 { + position: absolute; + border: 3px solid; + -o-border-image: linear-gradient(45deg, #ff8a65, #ff7043) 1; + border-image: linear-gradient(45deg, #ff8a65, #ff7043) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-2 { + position: absolute; + border: 3px solid; + -o-border-image: linear-gradient(45deg, #ff8a65, #55aaff) 1; + border-image: linear-gradient(45deg, #ff8a65, #55aaff) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-3 { + position: absolute; + border: 3px solid; + -o-border-image: linear-gradient(45deg, #55ff7f, #ff7043) 1; + border-image: linear-gradient(45deg, #55ff7f, #ff7043) 1; + border-radius: 50%; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-1 { + animation-delay: 0s; +} + +.ripple-single-2 { + animation-delay: 0.6s; +} + +.ripple-single-3 { + animation-delay: 1.2s; +} + +/* Light code theme*/ + +/* Dark code theme*/ + +.hljs { + background-color: #282c34; + color: #abb2bf; + padding: 15px; + border-radius: 8px; + line-height: 1.5; + font-family: "Fira Code", monospace; +} + +.hljs-keyword { + color: #c678dd; + font-weight: bold; +} + +.hljs-built_in { + color: #e06c74; +} + +.hljs-string { + color: #98c379; +} + +.hljs-number { + color: #d19a66; +} + +.hljs-comment { + color: #5c6370; + font-style: italic; +} + +.hljs-function { + color: #61afef; +} + +.hljs-params { + color: #abb2bf; +} + +.hljs-variable { + color: #d19a66; +} + +.hljs-class { + color: #e5c07b; +} + +.hljs-title { + color: #61afef; +} + +.hljs-attribute { + color: #d19a66; +} + +.hljs-symbol { + color: #61afef; +} + +.hljs-bullet { + color: #abb2bf; +} + +.hljs-meta { + color: #5c6370; +} + +.hljs-link { + color: #61afef; + text-decoration: underline; +} + +.hljs-doctag { + color: #c678dd; + font-weight: bold; +} + +.hljs-tag { + color: #e06c74; +} + +.hljs-name { + color: #61afef; +} + +.hljs-attr { + color: #d19a66; + color: #00aaff; +} + +.hljs-literal { + color: #d19a66; +} + +.hover\:underline:hover { + text-decoration-line: underline; +} diff --git a/audiobot/__init__.py b/audiobot/__init__.py new file mode 100644 index 0000000..3b1561f --- /dev/null +++ b/audiobot/__init__.py @@ -0,0 +1,50 @@ +""" + ///////] /// /// ///////] (O) //////] ///// //////] ///////// + // // /// /// // // /// /// /// // / /// /// /// + ///////// /// /// // / /// /// /// ///// /// /// /// + // // /// /// // / /// // // // / // // /// +// // ////////// /////// / /// /////// /////// /////// /// +Perform audio modifications such as adding voice effect to an audio or video file\n +Operation: + +""" + +from .cli import cli, ArgumentsProcessor +from .utils.logging_utils import LoggingFormatter, colored_logger +from .utils.visualizer import audiowave_visualizer +from .utils.metadata_utils import get_audio_bitrate +from .core.codec import AudioSegmentArrayCodec +from .core.effects import VoiceEffectProcessor +from .core.audio.core import AudioModulator, AudioDenoiser + +__version__ = "0.2.0" +__all__ = [ + "cli", + "ArgumentsProcessor", + "LoggingFormatter", + "colored_logger", + "audiowave_visualizer", + "get_audio_bitrate", + "AudioSegmentArrayCodec", + "VoiceEffectProcessor", + "AudioModulator", + "AudioDenoiser", +] +LOGO = """ + ///////] /// /// ///////] (O) //////] ///// //////] ///////// + // // /// /// // // /// /// /// // / /// /// /// + ///////// /// /// // / /// /// /// ///// /// /// /// + // // /// /// // / /// // // // / // // /// +// // ////////// /////// / /// /////// /////// /////// /// +""" diff --git a/audiobot/cli.py b/audiobot/cli.py new file mode 100644 index 0000000..4230ab3 --- /dev/null +++ b/audiobot/cli.py @@ -0,0 +1,196 @@ +#!/usr/bin/python3 + + +""" +CLI Entry point for audiobot.\n +Implements:\n + Argsmain->cmd argument handler either from other packages or directly form cli +""" + +import argparse +import logging + +import os + +import magic +from .utils.metadata_utils import transcribe_audio +from filewarp.utils.colors import fg, rs +from .core.processor import VideoProcessor, AudioProcessor +from .utils.logging_utils import colored_logger +from .config.core import Config + +RESET = rs + +Clogger = colored_logger() + + +class ArgumentsProcessor: + def __init__(self, args, parser): + self.args = args + self.parser = parser + self.mime = magic.Magic(mime=True) + self.output_dir = os.getcwd() if not self.args.output else self.args.output + + def process(self): + if not self.args or self.args.audio_effect: + self.parser.print_help() + return + + if self.args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + if self.args.output and not os.path.exists(self.args.output): + os.makedirs(self.args.output) + if self.args.batch: + self.batch_processor() + else: + self.mono_processor() + + def mono_processor(self): + try: + file_type = self.mime.from_file(self.args.file) + Clogger.info(f"{fg.BLUE}Detected file type: {file_type}{RESET}") + if file_type.startswith("audio"): + if self.args.transcribe: + transcribe_audio(self.args.file) + AudioProcessor().process_audio_file( + self.args.file, + self.args.effect, + self.output_dir, + self.args.verbose, + self.args.visualize, + ) + elif file_type.startswith("video"): + VideoProcessor().process_video_file( + self.args.file, + self.args.effect, + self.output_dir, + self.args.verbose, + self.args.visualize, + ) + else: + Clogger.warning( + f"Unsupported file type: {file_type}. Only audio and video files are supported." + ) + except Exception as e: + Clogger.error(e) + + def batch_processor(self): + try: + for root, _, files in os.walk(self.args.file): + for file in files: + full_path = os.path.join(root, file) + file_type = self.mime.from_file(full_path) + Clogger.info(f"{fg.BLUE}Detected file type: {file_type}{RESET}") + if file_type.startswith("audio"): + if self.args.transcribe: + transcribe_audio(full_path) + AudioProcessor().process_audio_file( + full_path, + self.args.effect, + self.output_dir, + self.args.verbose, + self.args.visualize, + ) + elif file_type.startswith("video"): + VideoProcessor().process_video_file( + full_path, + self.args.effect, + self.output_dir, + self.args.verbose, + self.args.visualize, + ) + else: + Clogger.warning(f"Ignoring unsupported file type: {file}") + except Exception as e: + Clogger.info(e) + + +def cli(argsv=None): + """ + Recieve and process agruments from audio/video audio effects + """ + parser = argparse.ArgumentParser( + description="Audiobot: A tool for audio effects on audio and video files.", + usage="filewarp --audio_effect [-h] [--file FILE] \n\ + [-e {robotic,deep,high,echo,reverb,whisper,demonic,chipmunk,hacker,lowpass,distortion}] \n\ + [-o OUTPUT] [-v] [-b] [--visualize] [--transcribe] \n\ + [--audio_effect]", + ) + parser.add_argument( + "file", + help=f"{fg.CYAN}The input audio, video file, or directory.{RESET}", + ) + parser.add_argument( + "-e", + "--effect", + choices=[ + "robotic", + "deep", + "high", + "echo", + "reverb", + "whisper", + "demonic", + "chipmunk", + "hacker", + "lowpass", + "highpass", + "distortion", + "denoise", + ], + help=f"{fg.CYAN}The voice effect to apply.{RESET}", + ) + parser.add_argument( + "--cutoff", + type=int, + help=f"Cutoff frequency for denoise operation, defualt={fg.YELLOW}200{RESET}", + ) + parser.add_argument( + "-N", + "--noise", + choices=["low", "high", "both"], + type=str, + default="low", + help=f"Specifies the type of noise to remove choices:[{fg.BLUE}low, high, both{RESET}] defualt={fg.YELLOW}low{RESET}", + ) + parser.add_argument( + "-o", + "--output", + help=f"{fg.CYAN}Output directory for modified files.{RESET}", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help=f"{fg.CYAN}Increase output verbosity.{RESET}", + ) + parser.add_argument( + "-b", + "--batch", + action="store_true", + help=f"{fg.CYAN}Batch process all files in a directory.{RESET}", + ) + parser.add_argument( + "--visualize", + action="store_true", + help=f"{fg.CYAN}Visualize the audio waveform before and after modification.{RESET}", + ) + parser.add_argument( + "--transcribe", + action="store_true", + help=f"{fg.CYAN}Transcribe the audio content before applying the effect.{RESET}", + ) + parser.add_argument("--audio_effect", action="store_true", help=argparse.SUPPRESS) + + args = parser.parse_args(argsv) if argsv else parser.parse_args() + if args.cutoff: + config = Config() + config.options["cutoff"] = args.cutoff + config.options["noise"] = args.noise + # Call argument processor + ArgumentsProcessor(args, parser).process() + + +if __name__ == "__main__": + cli() diff --git a/audiobot/config/__init__.py b/audiobot/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/audiobot/config/core.py b/audiobot/config/core.py new file mode 100644 index 0000000..5efa61d --- /dev/null +++ b/audiobot/config/core.py @@ -0,0 +1,8 @@ +class Config: + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(Config, cls).__new__(cls) + cls._instance.options = {} + return cls._instance diff --git a/audiobot/core/__init__.py b/audiobot/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/audiobot/core/audio/core.py b/audiobot/core/audio/core.py new file mode 100644 index 0000000..623cab6 --- /dev/null +++ b/audiobot/core/audio/core.py @@ -0,0 +1,283 @@ +import numpy as np +from ...utils.logging_utils import colored_logger +import librosa +from pydub import AudioSegment, effects +from scipy.signal import butter, lfilter, sosfilt +from ...config.core import Config +from filewarp.utils.colors import fg, rs + +RESET = rs + +Clogger = colored_logger() +config = Config() + + +class AudioModulator: + def __init__(self): + self._cutoff = config.options.get("cutoff") + + def pitch_shift(self, audio_segment, n_steps): + # Convert the audio samples to a NumPy array in float32 + samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32) + + # If the audio is stereo, convert it to mono + if audio_segment.channels == 2: + samples = audio_segment.set_channels(1) + + # Convert the samples back to NumPy array and flaoting point + samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32) + + # Pitch shift (no need to pass sample_rate separately) + shifted_samples = librosa.effects.pitch_shift( + samples, sr=audio_segment.frame_rate, n_steps=n_steps + ) + + # Convert the shifted samples back to int16 + shifted_audio = AudioSegment( + shifted_samples.astype(np.int16).tobytes(), + frame_rate=audio_segment.frame_rate, + sample_width=audio_segment.sample_width, + channels=audio_segment.channels, + ) + + return shifted_audio + + def hacker(self, audio_segment): + """Applies a deep, robotic voice effect used for anonymity.""" + + # Step 1: Pitch shift down (lower the pitch) + Clogger.info("Applying deep pitch shift for hacker voice") + deep_voice = self.pitch_shift(audio_segment, n_steps=-10) + + # Step 2: Speed up for robotic effect + Clogger.info("Speeding up for robotic effect") + robotic_voice = effects.speedup(deep_voice, playback_speed=1.1) + if robotic_voice is None: + Clogger.error("Speedup failed") + return None + + # Step 3: Apply reverb (check for validity) + Clogger.info("Adding subtle echo for distortion") + if isinstance(robotic_voice, AudioSegment): + # Shorter delay for subtle echo + delay = AudioSegment.silent(duration=500) + + Clogger.info("Overlaying echo effect") + + try: + echo_effect = robotic_voice.overlay(delay + robotic_voice - 5000) + except Exception as e: + Clogger.error(f"Error during overlay: {e}") + return None + else: + Clogger.error("Robotic voice generation failed") + return None + + # Step 4: Apply low-pass filter (optional) + hacker_voice_effect = ( + effects.low_pass_filter(echo_effect, cutoff=2500) if echo_effect else None + ) + if hacker_voice_effect is None: + Clogger.error("Low pass filter failed") + return None + + return hacker_voice_effect + + def echo(self, samples, delay=0.2, decay=0.5, sample_rate=44100): + """Apply echo effect with a specified delay and decay.""" + delay_samples = int(sample_rate * delay) + echo_signal = np.zeros(len(samples) + delay_samples) + + echo_signal[: len(samples)] = samples + echo_signal[delay_samples:] += decay * samples # Delayed echo signal + + return echo_signal[: len(samples)] # Truncate to original length + + def reverb(self, samples, decay=0.7, delay=0.05, sample_rate=44100): + try: + """Apply a reverb effect by adding delayed and attenuated copies of the signal.""" + delay_samples = int(sample_rate * delay) + + # Create a delayed version of the samples and attenuate (apply decay) + reverb_samples = np.zeros_like(samples) + + if samples.ndim == 2: # Stereo + for i in range(delay_samples, len(samples)): + reverb_samples[i, 0] = ( + samples[i, 0] + decay * samples[i - delay_samples, 0] + ) + reverb_samples[i, 1] = ( + samples[i, 1] + decay * samples[i - delay_samples, 1] + ) + else: # Mono + for i in range(delay_samples, len(samples)): + reverb_samples[i] = samples[i] + decay * samples[i - delay_samples] + + return reverb_samples + except Exception as e: + Clogger.error(e) + # raise + + def lowpass_filter(self, samples, cutoff=200, sample_rate=44100): + """ + Apply a low-pass filter to remove frequencies higher than the specified cutoff. + + This function uses a 6th-order Butterworth filter to attenuate frequencies above the + cutoff frequency, effectively smoothing the audio signal. + + Args: + samples (numpy.ndarray): The audio samples as a NumPy array. + cutoff (int, optional): The cutoff frequency in Hz. Defaults to 200. + Typical cutoff values: + - Voice: 1000-2000 Hz + - Music: 5000-8000 Hz + - Hiss/noise removal: 200-500 Hz + sample_rate (int, optional): The sample rate of the audio in Hz. Defaults to 44100. + + Returns: + numpy.ndarray: The filtered audio samples as a NumPy array. + """ + + cutoff = self._cutoff if self._cutoff else cutoff + Clogger.debug(f"{fg.BLUE}cutoff: {fg.CYAN}{cutoff}{RESET}") + Clogger.info("Apply a low-pass filter to remove frequencies higher than cutoff") + nyquist = 0.5 * sample_rate + normal_cutoff = cutoff / nyquist + b, a = butter(6, normal_cutoff, btype="low", analog=False) + filtered_samples = lfilter(b, a, samples) + + return filtered_samples + + def distort(self, samples, gain=10, threshold=0.3): + """Apply distortion by clipping the waveform.""" + Clogger.info("Apply distortion by clipping the waveform.") + samples = samples * gain + samples = np.clip(samples, -threshold, threshold) # Clip at threshold + return samples + + def whisper(self, audio_segment): + return effects.low_pass_filter(audio_segment, 70).apply_gain(-10) + + def highpass(self, audio_segment, cutoff: int = 200): + cutoff = self._cutoff if self._cutoff else cutoff + Clogger.info(f"Cutoff: {fg.BBLUE}{cutoff}{RESET}") + return effects.high_pass_filter(audio_segment, cutoff=cutoff) + + def lowpass(self, audio_segment, cutoff: int = 2200): + cutoff = self._cutoff if self._cutoff else cutoff + Clogger.info(f"Cutoff: {fg.BBLUE}{cutoff}{RESET}") + return effects.low_pass_filter(audio_segment, cutoff=cutoff) + + def normalize(self, audio_segment): + return effects.normalize(audio_segment) + + +class AudioDenoiser: + def __init__(self, sample_rate=44100): + self.sample_rate = sample_rate + # Dictionaries to cache filter coefficients by cutoff value + self._sos_low = {} + self._sos_high = {} + self._cutoff = config.options.get("cutoff") + Clogger.debug(f"{fg.BLUE}cutoff: {fg.CYAN}{self._cutoff}{RESET}") + + def lowpass_filter( + self, samples: np.ndarray, cutoff: int = 2200, order: int = 6 + ) -> np.ndarray: + """ + Apply a 6th-order low-pass Butterworth filter to remove frequencies above the cutoff. + + Args: + samples (np.ndarray): The input audio samples. + cutoff (int, optional): Cutoff frequency in Hz. Defaults to 2200. + order (int, optional): Order of the filter. Defaults to 6. + + Returns: + np.ndarray: The low-pass filtered audio samples. + """ + cutoff = self._cutoff if self._cutoff else cutoff + + if not isinstance(samples, np.ndarray): + raise ValueError("Input samples must be a NumPy array") + + nyquist = 0.5 * self.sample_rate + if cutoff >= nyquist: + Clogger.warn(f"Cutoff frequency must be less than Nyquist ({nyquist} Hz)") + cutoff = nyquist - (nyquist * 0.1) + + # Cache coefficients to avoid recomputation for the same cutoff value. + if cutoff not in self._sos_low: + self._sos_low[cutoff] = butter( + order, cutoff / nyquist, btype="low", analog=False, output="sos" + ) + + return sosfilt(self._sos_low[cutoff], samples) + + def highpass_filter( + self, samples: np.ndarray, cutoff: int = 200, order: int = 30 + ) -> np.ndarray: + """ + Apply a 6th-order high-pass Butterworth filter to remove frequencies below the cutoff. + + Args: + samples (np.ndarray): The input audio samples. + cutoff (int, optional): Cutoff frequency in Hz. Defaults to 200. + order (int, optional): Order of the filter. Defaults to 6. + + Returns: + np.ndarray: The high-pass filtered audio samples. + """ + + cutoff = self._cutoff if self._cutoff else cutoff + + if not isinstance(samples, np.ndarray): + raise ValueError("Input samples must be a NumPy array") + + nyquist = 0.5 * self.sample_rate + if cutoff <= 0: + raise ValueError("Cutoff frequency must be positive") + + if cutoff not in self._sos_high: + self._sos_high[cutoff] = butter( + order, cutoff / nyquist, btype="high", analog=False, output="sos" + ) + + return sosfilt(self._sos_high[cutoff], samples) + + def denoise( + self, + samples: np.ndarray, + lowpass_cutoff: int = 2200, + highpass_cutoff: int = 200, + order: int = 6, + ) -> np.ndarray: + """ + Denoise the audio by sequentially applying a low-pass filter and a high-pass filter. + This combination effectively acts as a band-pass filter, + removing both high-frequency noise (hiss) and low-frequency rumble. + + Args: + samples (np.ndarray): The input audio samples. + lowpass_cutoff (int, optional): Cutoff frequency for low-pass filtering. Defaults to 2200 Hz. + highpass_cutoff (int, optional): Cutoff frequency for high-pass filtering. Defaults to 200 Hz. + order (int, optional): Order of the filters. Defaults to 6. + + Returns: + np.ndarray: The denoised audio samples. + """ + noise = config.options.get("noise") if config.options.get("noise") else "low" + + Clogger.info( + f"{fg.BLUE}Noise: {fg.CYAN}{config.options.get('noise')}{RESET}" + ) + if noise == "low": + # Remove high-frequency noise + return self.lowpass_filter(samples, cutoff=lowpass_cutoff, order=order) + if noise == "high": + # Remove low-frequency noise + return self.highpass_filter(samples, cutoff=highpass_cutoff, order=order) + if noise == "both": + # Remove high-frequency noise + filtered = self.lowpass_filter(samples, cutoff=lowpass_cutoff, order=order) + # Remove low-frequency noise + return self.highpass_filter(filtered, cutoff=highpass_cutoff, order=order) diff --git a/audiobot/core/codec.py b/audiobot/core/codec.py new file mode 100644 index 0000000..e107d65 --- /dev/null +++ b/audiobot/core/codec.py @@ -0,0 +1,67 @@ +from pydub import AudioSegment +import numpy as np + + +class AudioSegmentArrayCodec: + """ + This class provides functionality to convert between pydub AudioSegments and NumPy arrays. + + It allows for the following conversions:\n + 1. AudioSegments to NumPy arrays. + 2. NumPy arrays to AudioSegments. + """ + + def __init__(self): + """ + Initializes the AudioSegmentArrayCodec object. + Currently, this constructor does not perform any specific operations. + """ + self = self # Note: This line has no effect and can be removed. + + def numpy_to_audiosegment(self, samples, sample_rate, sample_width, channels): + """ + Converts a NumPy array to a pydub AudioSegment. + + Args: + samples (numpy.ndarray): The NumPy array representing the audio samples. + sample_rate (int): The sample rate of the audio in Hz. + sample_width (int): The sample width in bytes (e.g., 2 for 16-bit audio). + channels (int): The number of audio channels (1 for mono, 2 for stereo). + + Returns: + pydub.AudioSegment: An AudioSegment object created from the NumPy array. + """ + # Flatten the array if it has 2 channels (stereo) + if len(samples.shape) == 2 and channels == 2: + samples = samples.flatten() + + # Convert the NumPy array to raw audio data + raw_data = samples.tobytes() + + # Create a new AudioSegment using the raw audio data + return AudioSegment( + data=raw_data, + sample_width=sample_width, + frame_rate=sample_rate, + channels=channels, + ) + + def audiosegment_to_numpy(self, audio_segment): + """ + Converts a pydub AudioSegment to a NumPy array. + + Args: + audio_segment (pydub.AudioSegment): The AudioSegment object to convert. + + Returns: + tuple: A tuple containing: + - numpy.ndarray: The NumPy array representing the audio samples. + - int: The sample rate of the audio in Hz. + """ + samples = np.array(audio_segment.get_array_of_samples()) + + # If stereo, reshape to (n_samples, 2) + if audio_segment.channels == 2: + samples = samples.reshape((-1, 2)) + + return samples, audio_segment.frame_rate diff --git a/audiobot/core/effects.py b/audiobot/core/effects.py new file mode 100644 index 0000000..36e14da --- /dev/null +++ b/audiobot/core/effects.py @@ -0,0 +1,115 @@ +from pydub import effects +from .codec import AudioSegmentArrayCodec +from .audio.core import AudioModulator +from ..utils.logging_utils import colored_logger +from pydub import AudioSegment + +# logger = colored_logger() + + +class VoiceEffectProcessor: + def __init__(self, audio_segment, effect: str, verbosity: bool = False): + self.effect = effect.lower() + self.audio_segment = audio_segment + self.verbosity = verbosity + self.handler = AudioSegmentArrayCodec() + self.logger = colored_logger() + + def _apply_chipmunk(self): + return AudioModulator().pitch_shift( + effects.speedup(self.audio_segment, 1.01), n_steps=9 + ) + + def _apply_high(self): + return AudioModulator().pitch_shift(self.audio_segment, n_steps=4) + + def _apply_lowpass(self): + return AudioModulator().lowpass(self.audio_segment) + + def _apply_highpass(self): + return AudioModulator().highpass(self.audio_segment) + + def _apply_robotic(self): + return AudioModulator().pitch_shift( + effects.speedup(self.audio_segment, 1.01), n_steps=-10 + ) + + def _apply_demonic(self): + return ( + AudioModulator() + .pitch_shift(effects.speedup(self.audio_segment, 1.01), n_steps=-10) + .overlay( + AudioSegment.silent(duration=700) + self.audio_segment.fade_out(500) + ) + ) + + def _apply_hacker(self): + return AudioModulator().hacker(self.audio_segment) + + def _apply_distortion(self): + samples, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment) + distorted_samples = AudioModulator().distort(samples) + return self.handler.numpy_to_audiosegment( + distorted_samples, + sample_rate, + self.audio_segment.sample_width, + self.audio_segment.channels, + ) + + def _apply_deep(self): + return AudioModulator().pitch_shift(self.audio_segment, n_steps=-4) + + def _apply_echo(self): + delay = AudioSegment.silent(duration=1000) + return self.audio_segment.overlay(delay + self.audio_segment) + + def _apply_whisper(self): + return AudioModulator().whisper(self.audio_segment) + + def _apply_reverb(self): + samples, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment) + reverbed_samples = AudioModulator().reverb(samples) + return self.handler.numpy_to_audiosegment( + reverbed_samples, + sample_rate, + self.audio_segment.sample_width, + self.audio_segment.channels, + ) + + def denoise(self): + from .modulator import AudioDenoiser + + sample, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment) + denoised_sample = AudioDenoiser().denoise(sample) + audio_segment = self.handler.numpy_to_audiosegment( + denoised_sample, + sample_rate, + self.audio_segment.sample_width, + self.audio_segment.channels, + ) + return audio_segment + + def _get_effects(self): + return { + "chipmunk": self._apply_chipmunk, + "high": self._apply_high, + "lowpass": self._apply_lowpass, + "robotic": self._apply_robotic, + "demonic": self._apply_demonic, + "hacker": self._apply_hacker, + "distortion": self._apply_distortion, + "deep": self._apply_deep, + "echo": self._apply_echo, + "whisper": self._apply_whisper, + "reverb": self._apply_reverb, + "denoise": self.denoise, + "highpass": self._apply_highpass, + } + + def apply_effect(self): + effect_handler = self._get_effects().get(self.effect) + if effect_handler: + return effect_handler() + elif self.verbosity: + self.logger.critical(f"Unknown voice effect: {self.effect}") + return self.audio_segment # Return unmodified audio if effect is unknown diff --git a/audiobot/core/processor.py b/audiobot/core/processor.py new file mode 100644 index 0000000..88a50ed --- /dev/null +++ b/audiobot/core/processor.py @@ -0,0 +1,154 @@ +import os +from .audio.core import AudioModulator +from moviepy import AudioFileClip, VideoFileClip +from ..utils.logging_utils import colored_logger +from pydub import AudioSegment +from ..utils.visualizer import audiowave_visualizer +from ..utils.metadata_utils import get_audio_bitrate +from .effects import VoiceEffectProcessor +from filewarp.utils.colors import fg, rs +import sys +# import io + +RESET = rs + +Clogger = colored_logger() + + +class VideoProcessor: + def __init__(self): + pass + + def process_video_file( + self, + input_file, + effect, + output_dir, + verbosity: bool = False, + visualize: bool = False, + ): + """ + Process video file by applying audio effects and retaining original bitrate. + """ + + Clogger.info(f"Set Voice effect : {fg.MAGENTA}{effect}{RESET}") + Clogger.info(f"Processing video file: {input_file}") + + try: + # Get the original video bitrate + original_bitrate = get_audio_bitrate(input_file, verbosity) + if verbosity and original_bitrate: + Clogger.info( + f"Original video bitrate: {fg.YELLOW}{original_bitrate}{RESET}" + ) + + # Capture stdout and stderr + old_stdout = sys.stdout + old_stderr = sys.stderr + # sys.stdout = captured_stdout = io.StringIO() + # sys.stderr = captured_stderr = io.StringIO() + + # Load the video + try: + video = VideoFileClip(input_file) + finally: + sys.stdout = old_stdout # Restore stdout + sys.stderr = old_stderr # Restore stder + audio_file = "temp_audio.wav" + + # Extract audio and save it to a file + if verbosity: + Clogger.info("Extract audio and write it to file") + video.audio.write_audiofile(audio_file) + audio_segment = AudioSegment.from_file(audio_file) + + # Apply the selected voice effect + Clogger.info( + f"Applying the [{fg.BBWHITE}{effect}{RESET}{fg.GREEN}] effect" + ) + modified_audio = VoiceEffectProcessor(audio_segment, effect).apply_effect() + + # Normalize the modified audio + modified_audio = AudioModulator().normalize(modified_audio) + + # Export the modified audio to a WAV file + if verbosity: + Clogger.info("Export the modified audio to a WAV file") + modified_audio.export("modified_audio.wav", format="wav") + + # Load the modified audio file back into an AudioFileClip + new_audio = AudioFileClip("modified_audio.wav") + + # Set the video to use the modified audio + if verbosity: + Clogger.info("Set the video audio to the new modified audio") + final_video = video.with_audio(new_audio) + + # Define the output file path + output_file = os.path.join( + output_dir, f"{effect}_{os.path.basename(input_file)}" + ) + + # Use the original bitrate or default to 5000k if unavailable + if verbosity: + Clogger.info( + f"Set:\n\tCodec = [{fg.MAGENTA}libx264{fg.GREEN}\n" + f"\tCodec type = [{fg.MAGENTA}aac{fg.GREEN}\n" + f"\tBitrate = [{fg.MAGENTA}{original_bitrate or '5000k'}{RESET}]" + ) + + final_video.write_videofile( + output_file, + codec="libx264", + audio_codec="aac", + bitrate=original_bitrate or "5000k", + ) + + Clogger.info(f"Modified video saved as: {output_file}") + Clogger.debug(f"Final bitrate = {get_audio_bitrate(output_file)}") + # Optional: visualize the before and after audio + if visualize: + audiowave_visualizer(audio_file, "modified_audio.wav") + + # Clean up temporary files + if os.path.exists(audio_file): + os.remove(audio_file) + os.remove("modified_audio.wav") + + except KeyboardInterrupt: + Clogger.info("Quit") + sys.exit(1) + except Exception as e: + Clogger.error(f"Error processing video file {input_file}: {e}") + # raise + + +class AudioProcessor: + def __init__(self): + pass + + def process_audio_file( + self, input_file, effect, output_dir, verbosity, visualize=False + ): + Clogger.info(f"Set Voice effect : {fg.MAGENTA}{effect}{RESET}") + + Clogger.info(f"Processing audio file: {fg.MAGENTA}{input_file}{RESET}") + + try: + audio_segment = AudioSegment.from_file(input_file) + if verbosity: + print(f"- INFO - Audio channels: {audio_segment.channels}") + print(f"- INFO - Audio sample width: {audio_segment.sample_width}") + modified_audio = VoiceEffectProcessor(audio_segment, effect).apply_effect() + modified_audio = AudioModulator().normalize(modified_audio) + output_file = os.path.join( + output_dir, f"{effect}_{os.path.basename(input_file)}" + ) + modified_audio.export(output_file, format="wav") + Clogger.info(f"Modified audio saved as: {output_file}") + + if visualize: + audiowave_visualizer(input_file, output_file) + + except Exception as e: + Clogger.error(f"Error processing audio file {input_file}: {e}") diff --git a/audiobot/utils/logging_utils.py b/audiobot/utils/logging_utils.py new file mode 100644 index 0000000..09914a5 --- /dev/null +++ b/audiobot/utils/logging_utils.py @@ -0,0 +1,40 @@ +import logging +from filewarp.utils.colors import fg, rs + +RESET = rs + + +class LoggingFormatter(logging.Formatter): + COLORS = { + logging.DEBUG: fg.BBLUE, + logging.INFO: fg.GREEN, + logging.WARNING: fg.YELLOW, + logging.ERROR: fg.RED, + logging.CRITICAL: fg.MAGENTA, + } + + def format(self, record): + log_color = self.COLORS.get(record.levelno, fg.WHITE) + log_message = super().format(record) + return f"{log_color}{log_message}{RESET}" + + +def colored_logger(logger_name="colored_logger") -> logging.Logger: + """ + Sets up a colored logger with a single handler. + + Returns: + logging.Logger: The configured logger. + """ + logger = logging.getLogger(logger_name) + + if not logger.handlers: # Check if handlers already exist + handler = logging.StreamHandler() + handler.setFormatter(LoggingFormatter("- %(levelname)s - %(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + # Prevent log messages from propagating to the root logger. + logger.propagate = False + + return logger diff --git a/audiobot/utils/metadata_utils.py b/audiobot/utils/metadata_utils.py new file mode 100644 index 0000000..1763fec --- /dev/null +++ b/audiobot/utils/metadata_utils.py @@ -0,0 +1,55 @@ +import speech_recognition as sr +import ffmpeg +from .logging_utils import colored_logger +from filewarp.utils.colors import fg, rs + +RESET = rs + +Clogger = colored_logger() + + +def get_audio_bitrate(input_file, verbosity=False): + """ + Probes a media file using ffmpeg and returns its metadata. + + Args: + input_file (str): The path to the media file. + + Returns: + int: bitrate + + Raises: + ffmpeg.Error: If ffmpeg returns a non-zero exit code. + FileNotFoundError: If the input file does not exist. + Exception: For other errors during probing. + """ + if verbosity: + Clogger.info( + f"Fetch the original bitrate of the video file using {fg.YELLOW}ffmpeg{RESET}." + ) + try: + metadata = ffmpeg.probe(input_file) + bitrate = None + # Iterate over the streams and find the video stream + for stream in metadata["streams"]: + if stream["codec_type"] == "video": + bitrate = stream.get("bit_rate", None) + break + return bitrate + except ffmpeg.Error or Exception as e: + Clogger.error(f"Error fetching bitrate for {input_file}: {e}") + return None + + +def transcribe_audio(input_file): + Clogger.info(f"Transcribing audio: {input_file}") + try: + recognizer = sr.Recognizer() + with sr.AudioFile(input_file) as source: + audio = recognizer.record(source) + transcription = recognizer.recognize_google(audio) + Clogger.info(f"Transcription: {transcription}") + return transcription + except Exception as e: + Clogger.error(f"Error transcribing audio file {input_file}: {e}") + return None diff --git a/audiobot/utils/visualizer.py b/audiobot/utils/visualizer.py new file mode 100644 index 0000000..d4a5baa --- /dev/null +++ b/audiobot/utils/visualizer.py @@ -0,0 +1,25 @@ +import matplotlib.pyplot as plt +import soundfile as sf +from .logging_utils import colored_logger + + +Clogger = colored_logger() + + +def audiowave_visualizer(original_file, modified_file): + Clogger.info(f"Visualizing audio: {original_file} and {modified_file}") + try: + original_data, original_sr = sf.read(original_file) + modified_data, modified_sr = sf.read(modified_file) + + plt.figure(figsize=(14, 5)) + plt.subplot(2, 1, 1) + plt.plot(original_data) + plt.title("Original Audio Waveform") + plt.subplot(2, 1, 2) + plt.plot(modified_data) + plt.title("Modified Audio Waveform") + plt.show() + + except Exception as e: + Clogger.error(f"Error visualizing audio: {e}") diff --git a/audiobot/version.txt b/audiobot/version.txt new file mode 100644 index 0000000..0c62199 --- /dev/null +++ b/audiobot/version.txt @@ -0,0 +1 @@ +0.2.1 diff --git a/build/lib/filemac/AudioExtractor.py b/build/lib/filemac/AudioExtractor.py deleted file mode 100644 index 65172b1..0000000 --- a/build/lib/filemac/AudioExtractor.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -import sys -from moviepy.editor import VideoFileClip -import logging -import logging.handlers -############################################################################### -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class ExtractAudio: - def __init__(self, input_file): - self.input_file = input_file - - def preprocess(self): - try: - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - ls = ["mp4", "mkv"] - if os.path.isfile(file_path) and any(file_path.lower().endswith(ext) for ext in ls): - files_to_process.append(file_path) - - return files_to_process - except Exception as e: - print(e) - - def moviepyextract(self): - try: - video_list = self.preprocess() - for input_video in video_list: - print("\033[1;33mExtracting..\033[1;36m") - video = VideoFileClip(input_video) - audio = video.audio - basename, _ = os.path.splitext(input_video) - outfile = basename + ".wav" - audio.write_audiofile(outfile) - # print(f"\033[1;32mFile saved as \033[36m{outfile}\033[0m") - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - print(e) - - -if __name__ == "__main__": - vi = ExtractAudio( - "/home/skye/Music/Melody in My Mind.mp4") - vi.moviepyextract() diff --git a/build/lib/filemac/OCRTextExtractor.py b/build/lib/filemac/OCRTextExtractor.py deleted file mode 100644 index 392ff6d..0000000 --- a/build/lib/filemac/OCRTextExtractor.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import sys -import cv2 -import pytesseract -from PIL import Image -import logging -import logging.handlers -############################################################################### -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) -############################################################################### -'''Do OCR text extraction from a given image file and display the extracted - text - to the screen finally save it to a text file assuming the name of the input - file''' - -############################################################################### - - -class ExtractText: - def __init__(self, input_file): - self.input_file = input_file - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def OCR(self): - image_list = self.preprocess() - ls = ['png', 'jpg'] - image_list = [ - item for item in image_list if any(item.lower().endswith(ext) - for ext in ls)] - - def ocr_text_extraction(image_path): - '''Load image using OpenCV''' - img = cv2.imread(image_path) - - logger.info(f"\033[2;95mprocessing {image_path}...\033[0m") - - try: - '''Preprocess image for better OCR results''' - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - thresh = cv2.threshold( - gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] - img_pil = Image.fromarray(thresh) - - '''Perform OCR using pytesseract''' - config = ("-l eng --oem 3 --psm 6") - text = pytesseract.image_to_string((img_pil), config=config) - - '''Remove extra whitespaces and newlines - text = ' '.join(text.split()).strip()''' - logger.info("\033[36mFound:\n\033[0m") - print(text) - current_path = os.getcwd() - file_path = os.path.join(current_path, OCR_file) - ''' Save the extracted text to specified file ''' - logger.info("\033[1;92mGenerating text file for the extracted \ -text..\033[0m") - - with open(file_path, 'w') as file: - file.write(text) - logger.info( - f"File saved as \033[1;93m{OCR_file}\033[0m:") - '''If there are multiple candidate images for text extraction, - wait for key press before proceeding to the next - image otherwise don't wait - size = [i for i in enumerate(image_list)]''' - if len(image_list) >= 2: - input("\033[5;97mPress Enter to continue\033[0m") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(0) - except FileNotFoundError as e: - logger.error(f"Error: {str(e)}") - except IOError as e: - logger.error( - f"Could not write to output file '{OCR_file}'. \ -Reason: {str(e)}\033[0m") - except Exception as e: - logger.error(f"Error: {type(e).__name__}: {str(e)}") - except Exception as e: - logger.error(f"Error:>>\033[31m{e}\033[0m") - return text - - for image_path in image_list: - OCR_file = image_path[:-4] + ".txt" - ocr_text_extraction(image_path) diff --git a/build/lib/filemac/Simple_v_Analyzer.py b/build/lib/filemac/Simple_v_Analyzer.py deleted file mode 100644 index 3b492bc..0000000 --- a/build/lib/filemac/Simple_v_Analyzer.py +++ /dev/null @@ -1,60 +0,0 @@ -import sys -import cv2 -import numpy as np - - -class SA: - - def __init__(self, video): - self.video = video - - def SimpleAnalyzer(self): - try: - # Read the video file - cap = cv2.VideoCapture(self.video) - print("\033[1;33mInitializing..\033[0m") - # Initialize variables - frame_count = 0 - total_area = 0 - duration = 0 - - print("\033[1;36mWorking on it") - while True: - ret, frame = cap.read() - - if not ret: - break - # Increase frame count and accumulate area - frame_count += 1 - total_area += np.prod(frame.shape[:2]) - - # Calculate current frame duration - fps = cap.get(cv2.CAP_PROP_FPS) - duration += 1 / fps - - # Display the resulting frame - cv2.imshow('Frame', frame) - - # Break the loop after pressing 'q' - if cv2.waitKey(1) == ord('q'): - break - - # Release the video capture object and close all windows - cap.release() - cv2.destroyAllWindows() - - # Print results - print(f"Total Frames: \033[1;32m{frame_count}\033[0m") - print(f"Average Frame Area: \033[1;32m{total_area / frame_count}\033[0m") - print(f"Duration: \033[1;32m{duration}\033[0m seconds") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - print(e) - sys.exit(1) - - -if __name__ == "__main__": - vi = SA("/home/skye/Music/Melody in My Mind.mp4") - vi.SimpleAnalyzer() diff --git a/build/lib/filemac/__init__.py b/build/lib/filemac/__init__.py deleted file mode 100644 index e32c40a..0000000 --- a/build/lib/filemac/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .fmac import main diff --git a/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc b/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc deleted file mode 100644 index 36b350c..0000000 Binary files a/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc and /dev/null differ diff --git a/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc b/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc deleted file mode 100644 index 2e0efeb..0000000 Binary files a/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc and /dev/null differ diff --git a/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc b/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc deleted file mode 100644 index a29f114..0000000 Binary files a/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc and /dev/null differ diff --git a/build/lib/filemac/__pycache__/converter.cpython-311.pyc b/build/lib/filemac/__pycache__/converter.cpython-311.pyc deleted file mode 100644 index cbc7e1f..0000000 Binary files a/build/lib/filemac/__pycache__/converter.cpython-311.pyc and /dev/null differ diff --git a/build/lib/filemac/__pycache__/formats.cpython-311.pyc b/build/lib/filemac/__pycache__/formats.cpython-311.pyc deleted file mode 100644 index d2b6f26..0000000 Binary files a/build/lib/filemac/__pycache__/formats.cpython-311.pyc and /dev/null differ diff --git a/build/lib/filemac/colors.py b/build/lib/filemac/colors.py deleted file mode 100644 index 7e03e49..0000000 --- a/build/lib/filemac/colors.py +++ /dev/null @@ -1,40 +0,0 @@ -import os - -from colorama import Fore, Style, init - -init(autoreset=True) - -if os.name == "posix": - RESET = '\033[0m' - RED = '\033[91m' - DRED = '\033[1;91m' - GREEN = '\033[92m' - DGREEN = '\033[1;92m' - YELLOW = '\033[93m' - DYELLOW = '\033[1;93m' - BLUE = '\033[94m' - DBLUE = '\033[1;94m' - MAGENTA = '\033[95m' - DMAGENTA = '\033[1;95m' - CYAN = '\033[96m' - DCYAN = '\033[1;96m' - ICYAN = '\033[3;96m' - -elif os.name == "nt": - RESET = Style.RESET_ALL - RED = Fore.LIGHTRED_EX - DRED = Fore.RED - GREEN = Fore.LIGHTGREEN_EX - DGREEN = Fore.GREEN - YELLOW = Fore.LIGHTYELLOW_EX - DYELLOW = Fore.YELLOW - BLUE = Fore.LIGHTBLUE_EX - DBLUE = Fore.BLUE - MAGENTA = Fore.LIGHTMAGENTA_EX - DMAGENTA = Fore.MAGENTA - CYAN = Fore.LIGHTCYAN_EX - DCYAN = Fore.CYAN - ICYAN = Fore.WHITE - -#return RESET, RED, DRED, GREEN, DGREEN, YELLOW, DYELLOW, BLUE, DBLUE, -#MAGENTA, DMAGENTA, CYAN, DCYAN diff --git a/build/lib/filemac/converter.py b/build/lib/filemac/converter.py deleted file mode 100644 index a46a46f..0000000 --- a/build/lib/filemac/converter.py +++ /dev/null @@ -1,1027 +0,0 @@ -############################################################################# -import logging -import logging.handlers -# import math -import os -import re -import sqlite3 -import subprocess -import sys -import time -import traceback -# import pdfminer.high_level -# from typing import Iterable -from pdf2image import convert_from_path -import cv2 -import pandas as pd -import pydub -import PyPDF2 -# import pytesseract -import requests -import speedtest -from docx import Document -# from pydub.playback import play -from gtts import gTTS -# from PyPDF2 import PdfFileReader -from moviepy.editor import VideoFileClip -from pdf2docx import parse -from PIL import Image -from pptx import Presentation -from pydub import AudioSegment -from .colors import (RESET, GREEN, DGREEN, YELLOW, DYELLOW, CYAN, BLUE, DBLUE, - MAGENTA, DMAGENTA, RED, DRED, ICYAN) -from reportlab.lib.pagesizes import letter -from reportlab.platypus import Paragraph, SimpleDocTemplate - -from .formats import (SUPPORTED_AUDIO_FORMATS, SUPPORTED_IMAGE_FORMATS, - SUPPORTED_VIDEO_FORMATS) - -# import pygame -# from aspose.words import Document as aspose_document -# from aspose.slides import Presentation as aspose_presentation -# from show_progress import progress_show -# from PIL import ImageDraw, ImageFont -############################################################################### - -PYGAME_DETECT_AVX2 = 1 -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class MakeConversion: - - '''Initialize the class''' - - def __init__(self, input_file): - self.input_file = input_file - - '''Check input object whether it's a file or a directory if a file append - the file to a set and return it otherwise append directory full path - content to the set and return the set file. The returned set will be - evaluated in the next step as required on the basis of requested operation - For every requested operation, the output file if any is automatically - generated on the basis of the input filename and saved in the sam - directory as the input file - ''' - - def preprocess(self): - try: - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - except Exception as e: - print(e) - -############################################################################### -# Convert word file to pdf document (docx) -############################################################################### - def word_to_pdf(self): - word_list = self.preprocess() - ls = ["doc", "docx"] - word_list = [ - item for item in word_list if any(item.lower().endswith(ext) for ext in ls)] - for word_file in word_list: - if word_file.lower().endswith("doc"): - pdf_file = word_file[:-3] + "pdf" - elif word_file.lower().endswith("docx"): - pdf_file = word_file[:-4] + "pdf" - - try: - print( - f'{BLUE}Converting: {RESET}{word_file} {BLUE}to {RESET}{pdf_file}') - if os.name == 'posix': # Check if running on Linux - # Use subprocess to run the dpkg and grep commands - result = subprocess.run( - ['dpkg', '-l', 'libreoffice'], stdout=subprocess.PIPE, text=True) - if result.returncode != 0: - print( - "Please install libreoffice to use this functionality !") - sys.exit(1) - subprocess.run(['soffice', '--convert-to', - 'pdf', word_file, pdf_file]) - # print(f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}") - elif os.name == "nt": - try: - from docx2pdf import convert - except ImportError: - print("Run pip install docx2pdf for this function to work") - sys.exit(1) - convert(word_file, pdf_file) - print( - f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}") - - except Exception as e: - print(f"Error converting {word_file} to {pdf_file}: {e}") - -############################################################################### -# Convert pdf file to word document (docx) -############################################################################### - def pdf_to_word(self): - pdf_list = self.preprocess() - pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] - for pdf_file in pdf_list: - if pdf_file.lower().endswith("pdf"): - word_file = pdf_file[:-3] + "docx" - - try: - - parse(pdf_file, word_file, start=0, end=None) - - print(f'{GREEN}Converting to word..{RESET}', end='\r') - - logger.info(f"{DMAGENTA} Successfully converted{pdf_file} \ -to {word_file}{RESET}") - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - logger.info(f'{DRED}All conversion attempts have failed: \ -{e}{RESET}') - -############################################################################### -# Convert text file(s) to pdf document (docx) -############################################################################### - def txt_to_pdf(input_file, output_file): - """Convert a .txt file to a PDF.""" - - # Read the contents of the input .txt file - with open(input_file, 'r', encoding='utf-8') as file: - text_contents = file.readlines() - - # Initialize the PDF document - doc = SimpleDocTemplate(output_file, pagesize=letter) - - # Create a story to hold the elements of the PDF - story = [] - - # Iterate through each line in the input .txt file and add it to the PDF - for line in text_contents: - story.append(Paragraph(line.strip(), style="normalText")) - - # Build and write the PDF document - doc.build(story) - -############################################################################### -# Convert word file(s) to pptx document (pptx/ppt) -############################################################################### - def word_to_pptx(self): - word_list = self.preprocess() - word_list = [item for item in word_list if item.lower().endswith( - "docx") or item.lower().endswith("doc")] - - for word_file in word_list: - - if word_list is None: - print("Please provide appropriate file type") - sys.exit(1) - if word_file.lower().endswith("docx"): - pptx_file = word_file[:-4] + "pptx" - elif word_file.lower().endswith("doc"): - pptx_file = word_file[:-3] + "pptx" - try: - # Load the Word document - print(F"{DYELLOW}Load the Word document..{RESET}") - doc = Document(word_file) - - # Create a new PowerPoint presentation - print(F"{DYELLOW}Create a new PowerPoint presentation..{RESET}") - prs = Presentation() - - # Iterate through each paragraph in the Word document - print( - f"{DGREEN}Populating pptx slides with {DYELLOW}{len(doc.paragraphs)}{DGREEN} entries..{RESET}") - count = 0 - for paragraph in doc.paragraphs: - count += 1 - perc = (count/len(doc.paragraphs))*100 - print( - f"{DMAGENTA}Progress:: \033[1;36m{perc:.2f}%{RESET}", end="\r") - # Create a new slide in the PowerPoint presentation - slide = prs.slides.add_slide(prs.slide_layouts[1]) - - # Add the paragraph text to the slide - slide.shapes.title.text = paragraph.text - - # Save the PowerPoint presentation - prs.save(pptx_file) - print(f"\n{DGREEN}Done{RESET}") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - logger.error(e) - -############################################################################### -# Convert word file to txt file''' -############################################################################### - - def word_to_txt(self): - word_list = self.preprocess() - word_list = [item for item in word_list if item.lower().endswith( - "docx") or item.lower().endswith("doc")] - for file_path in word_list: - if file_path.lower().endswith("docx"): - txt_file = file_path[:-4] + "txt" - elif file_path.lower().endswith("doc"): - txt_file = file_path[:-3] + "txt" - try: - doc = Document(file_path) - print("INFO Processing...") - - with open(txt_file, 'w', encoding='utf-8') as f: - Par = 0 - for paragraph in doc.paragraphs: - f.write(paragraph.text + '\n') - Par += 1 - - print(f"Par:{BLUE}{Par}/{len(doc.paragraphs)}{RESET}", end='\r') - logger.info(f"{DMAGENTA}Conversion of file to txt success{RESET}") - - except KeyboardInterrupt: - print("\nExit") - sys.exit() - except Exception as e: - logger.error( - f"Dear user something went amiss while attempting the conversion:\n {e}") - with open("conversion.log", "a") as log_file: - log_file.write(f"Couldn't convert {file_path} to {txt_file}:\ -REASON->{e}") - -############################################################################### -# Convert pdf file to text file -############################################################################### - def pdf_to_txt(self): - pdf_list = self.preprocess() - pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] - for file_path in pdf_list: - txt_file = file_path[:-3] + "txt" - try: - with open(file_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - text = '' - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num] - text += page.extract_text() - with open(txt_file, 'w', encoding='utf-8') as f: - f.write(text) - logger.info(f"{DMAGENTA}Successfully converted {file_path} to \ -{txt_file}{RESET}") - except Exception as e: - logger.error( - f"Oops somethin went astray while converting {file_path} \ -to {txt_file}: {e}") - with open("conversion.log", "a") as log_file: - log_file.write( - f"Error converting {file_path} to {txt_file}: {e}\n") - -############################################################################### -# Convert ppt file to word document -############################################################################### - def ppt_to_word(self): - ppt_list = self.preprocess() - ppt_list = [item for item in ppt_list if item.lower().endswith( - "pptx") or item.lower().endswith("ppt")] - for file_path in ppt_list: - if file_path.lower().endswith("pptx"): - word_file = file_path[:-4] + "docx" - elif file_path.lower().endswith("ppt"): - word_file = file_path[:-3] + "docx" - try: - presentation = Presentation(file_path) - document = Document() - - for slide in presentation.slides: - for shape in slide.shapes: - if shape.has_text_frame: - text_frame = shape.text_frame - for paragraph in text_frame.paragraphs: - new_paragraph = document.add_paragraph() - for run in paragraph.runs: - new_run = new_paragraph.add_run(run.text) - # Preserve bold formatting - new_run.bold = run.font.bold - # Preserve italic formatting - new_run.italic = run.font.italic - # Preserve underline formatting - new_run.underline = run.font.underline - # Preserve font name - new_run.font.name = run.font.name - # Preserve font size - new_run.font.size = run.font.size - try: - # Preserve font color - new_run.font.color.rgb = run.font.color.rgb - except AttributeError: - # Ignore error and continue without - # setting the font color - pass - # Add a new paragraph after each slide - document.add_paragraph() - document.save(word_file) - logger.info(f"{DMAGENTA}Successfully converted {file_path} to \ - {word_file}{RESET}") - except Exception as e: - logger.error( - f"Oops somethin gwent awry while attempting to convert \ - {file_path} to {word_file}:\n>>>{e}") - with open("conversion.log", "a") as log_file: - log_file.write( - f"Oops something went astray while attempting \ - convert {file_path} to {word_file}:{e}\n") - -############################################################################### -# Convert text file to word -############################################################################### - def text_to_word(self): - flist = self.preprocess() - flist = [item for item in flist if item.lower().endswith("txt")] - for file_path in flist: - if file_path.lower().endswith("txt"): - word_file = file_path[:-3] + "docx" - - try: - # Read the text file - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: - text_content = file.read() - - # Filter out non-XML characters - filtered_content = re.sub( - r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]+', '', text_content) - - # Create a new Word document - doc = Document() - # Add the filtered text content to the document - doc.add_paragraph(filtered_content) - - # Save the document as a Word file - doc.save(word_file) - logger.info(f"{DMAGENTA}Successfully converted {file_path} to \ - {word_file}{RESET}") - except FileExistsError as e: - logger.error(f"{str(e)}") - except Exception as e: - logger.error( - f"Oops Unable to perfom requested conversion: {e}\n") - with open("conversion.log", "a") as log_file: - log_file.write( - f"Error converting {file_path} to {word_file}: \ -{e}\n") - -############################################################################### -# Convert xlsx file(s) to word file(s) -############################################################################### - def convert_xls_to_word(self): - xls_list = self.preprocess() - ls = ["xlsx", "xls"] - xls_list = [item for item in xls_list if any( - item.lower().endswith(ext) for ext in ls)] - print(F"{DGREEN}Initializing conversion sequence{RESET}") - for xls_file in xls_list: - if xls_file.lower().endswith("xlsx"): - word_file = xls_file[:-4] + "docx" - elif xls_file.lower().endswith("xls"): - word_file = xls_file[:-3] + "docx" - try: - '''Read the XLS file using pandas''' - - df = pd.read_excel(xls_file) - - '''Create a new Word document''' - doc = Document() - - '''Iterate over the rows of the dataframe and add them to the - Word document''' - logger.info(f"{ICYAN}Converting {xls_file}..{RESET}") - # time.sleep(2) - total_rows = df.shape[0] - for _, row in df.iterrows(): - current_row = _ + 1 - percentage = (current_row / total_rows)*100 - for value in row: - doc.add_paragraph(str(value)) - print(f"Row {DYELLOW}{current_row}/{total_rows} \ -{DBLUE}{percentage:.1f}%{RESET}", end="\r") - # print(f"\033[1;36m{row}{RESET}") - - # Save the Word document - doc.save(word_file) - print(F"{DGREEN}Conversion successful!{RESET}", end="\n") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - print("Oops Conversion failed:", str(e)) - -############################################################################### - '''Convert xlsx/xls file/files to text file format''' -############################################################################### - - def convert_xls_to_text(self): - xls_list = self.preprocess() - ls = ["xlsx", "xls"] - xls_list = [ - item for item in xls_list if any(item.lower().endswith(ext) - for ext in ls)] - print(F"{DGREEN}Initializing conversion sequence{RESET}") - for xls_file in xls_list: - if xls_file .lower().endswith("xlsx"): - txt_file = xls_file[:-4] + "txt" - elif xls_file .lower().endswith("xls"): - txt_file = xls_file[:-3] + "txt" - try: - # Read the XLS file using pandas - logger.info(f"Converting {xls_file}..") - df = pd.read_excel(xls_file) - - # Convert the dataframe to plain text - text = df.to_string(index=False) - chars = len(text) - words = len(text.split()) - lines = len(text.splitlines()) - - print( - f"Preparing to write: {DYELLOW}{chars} \033[1;30m \ -characters{DYELLOW} {words}\033[1;30m words {DYELLOW}{lines}\033[1;30m \ -lines {RESET}", end="\n") - # Write the plain text to the output file - with open(txt_file, 'w') as file: - file.write(text) - - print(F"{DGREEN}Conversion successful!{RESET}", end="\n") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - print("Oops Conversion failed:", str(e)) - -############################################################################### - '''Convert xlsx/xls file to csv(comma seperated values) format''' -############################################################################### - - def convert_xlsx_to_csv(self): - xls_list = self.preprocess() - ls = ["xlsx", "xls"] - xls_list = [ - item for item in xls_list if any(item.lower().endswith(ext) - for ext in ls)] - for xls_file in xls_list: - if xls_file.lower().endswith("xlsx"): - csv_file = xls_file[:-4] + "csv" - elif xls_file.lower().endswith("xls"): - csv_file = xls_file[:-3] + "csv" - try: - '''Load the Excel file''' - print(F"{DGREEN}Initializing conversion sequence{RESET}") - df = pd.read_excel(xls_file) - logger.info(f"Converting {xls_file}..") - total_rows = df.shape[0] - print(f"Writing {DYELLOW}{total_rows} rows {RESET}", end="\n") - for i in range(101): - print(f"Progress: {i}%", end="\r") - '''Save the DataFrame to CSV''' - df.to_csv(csv_file, index=False) - print(F"{DMAGENTA} Conversion successful{RESET}") - except KeyboardInterrupt: - print("Exiting") - sys.exit(1) - except Exception as e: - print(e) - -############################################################################### -# Convert xlsx file(s) to sqlite -############################################################################### - - def convert_xlsx_to_database(self): - xlsx_list = self.preprocess() - ls = ["xlsx", "xls"] - xlsx_list = [ - item for item in xlsx_list if any(item.lower().endswith(ext) - for ext in ls)] - for xlsx_file in xlsx_list: - if xlsx_file.lower().endswith("xlsx"): - sqlfile = xlsx_file[:-4] - elif xlsx_file.lower().endswith("xls"): - sqlfile = xlsx_file[:-3] - try: - db_file = input( - F"{DBLUE}Please enter desired sql filename: {RESET}") - table_name = input( - "Please enter desired table name: ") - # res = ["db_file", "table_name"] - if any(db_file) == "": - db_file = sqlfile + "sql" - table_name = sqlfile - if not db_file.endswith(".sql"): - db_file = db_file + ".sql" - column = 0 - for i in range(20): - column += 0 - # Read the Excel file into a pandas DataFrame - print(f"Reading {xlsx_file}...") - df = pd.read_excel(xlsx_file) - print(f"{DGREEN}Initializing conversion sequence{RESET}") - print(f"{DGREEN} Connected to sqlite3 database::{RESET}") - # Create a connection to the SQLite database - conn = sqlite3.connect(db_file) - print(F"{DYELLOW} Creating database table::{RESET}") - # Insert the DataFrame into a new table in the database - df.to_sql(table_name, column, conn, - if_exists='replace', index=False) - print( - f"Operation successful{RESET} file saved as \033[32{db_file}{RESET}") - # Close the database connection - conn.close() - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - logger.error(f"{e}") - -############################################################################### -# Create image objects from given files -############################################################################### - def doc2image(self, outf="png"): - outf_list = ['png', 'jpg'] - if outf not in outf_list: - outf = "png" - path_list = self.preprocess() - ls = ["pdf", "doc", "docx"] - file_list = [ - item for item in path_list if any(item.lower().endswith(ext) - for ext in ls)] - imgs = [] - for file in file_list: - if file.lower().endswith("pdf"): - # Convert the PDF to a list of PIL image objects - print("Generate image objects ..") - images = convert_from_path(file) - - # Save each image to a file - fname = file[:-4] - print(f"{YELLOW}Target images{BLUE} {len(images)}{RESET}") - for i, image in enumerate(images): - print(f"{DBLUE}{i}{RESET}", end="\r") - yd = f"{fname}_{i+1}.{outf}" - image.save(yd) - imgs.append(yd) - print(f"{GREEN}Ok{RESET}") - - return imgs - - -class Scanner: - - def __init__(self, input_file): - self.input_file = input_file - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def scanPDF(self): - pdf_list = self.preprocess() - pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] - - for pdf in pdf_list: - out_f = pdf[:-3] + 'txt' - print(f"{YELLOW}Read pdf ..{RESET}") - - with open(pdf, 'rb') as f: - reader = PyPDF2.PdfReader(f) - text = '' - - pg = 0 - for page_num in range(len(reader.pages)): - pg += 1 - - print(f"{DYELLOW}Progress:{RESET}", end="") - print(f"{CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r") - page = reader.pages[page_num] - text += page.extract_text() - - print(f"\n{text}") - print(F"\n{YELLOW}Write text to {GREEN}{out_f}{RESET}") - with open(out_f, 'w') as f: - f.write(text) - - print(F"{DGREEN}Ok{RESET}") - - def scanAsImgs(self): - file = self.input_file - mc = MakeConversion(file) - img_objs = mc.doc2image() - # print(img_objs) - from .OCRTextExtractor import ExtractText - text = '' - for i in img_objs: - extract = ExtractText(i) - tx = extract.OCR() - if tx is not None: - text += tx - print(text) - print(f"{GREEN}Ok{RESET}") - return text - - -class FileSynthesis: - - def __init__(self, input_file): - self.input_file = input_file - # self.CHUNK_SIZE = 20_000 - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - @staticmethod - def join_audios(files, output_file): - masterfile = output_file + "_master.mp3" - print( - f"{DBLUE}Create a master file {DMAGENTA}{masterfile}{RESET}", end='\r') - # Create a list to store files - ogg_files = [] - # loop through the directory while adding the ogg files to the list - print(files) - for filename in files: - print(f"Join {DBLUE}{len(files)}{RESET} files") - # if filename.endswith('.ogg'): - # ogg_file = os.path.join(path, filename) - ogg_files.append(AudioSegment.from_file(filename)) - - # Concatenate the ogg files - combined_ogg = ogg_files[0] - for i in range(1, len(files)): - combined_ogg += ogg_files[i] - - # Export the combined ogg to new mp3 file or ogg file - combined_ogg.export(output_file + "_master.ogg", format='ogg') - print(F"{DGREEN}Master file:Ok {RESET}") - - def Synthesise(self, text: str, output_file: str, CHUNK_SIZE: int = 20_000, ogg_folder: str = 'tempfile', retries: int = 5) -> None: - """Converts given text to speech using Google Text-to-Speech API.""" - out_ls = [] - try: - if not os.path.exists(ogg_folder): - os.mkdir(ogg_folder) - print(f"{DYELLOW}Get initial net speed..{RESET}") - st = speedtest.Speedtest() # get initial network speed - st.get_best_server() - download_speed: float = st.download() # Keep units as bytes - logger.info( - - f"{GREEN} Conversion to mp3 sequence initialized start\ -speed {CYAN}{download_speed/1_000_000:.2f}Kbps{RESET}") - - for attempt in range(retries): - try: - '''Split input text into smaller parts and generate - individual gTTS objects''' - counter = 0 - for i in range(0, len(text), CHUNK_SIZE): - chunk = text[i:i+CHUNK_SIZE] - output_filename = f"{output_file}_{counter}.ogg" - counter += 1 - # print(output_filename) - if os.path.exists(output_filename): - output_filename = f"{output_file}_{counter+1}.ogg" - # print(output_filename) - tts = gTTS(text=chunk, lang='en', slow=False) - tts.save(output_filename) - out_ls.append(output_filename) - break - # print(out_ls) - '''Handle any network related issue gracefully''' - except Exception in (ConnectionError, ConnectionAbortedError, - ConnectionRefusedError, - ConnectionResetError) as e: - logger.error(f"Sorry boss connection problem encountered: {e} in {attempt+1}/{retries}:") - time.sleep(5) # Wait 5 seconds before retrying - - # Handle connectivity/network error - except requests.exceptions.RequestException as e: - logger.error(f"{e}") - except Exception as e: - logger.error(f'{DRED} Error during conversion attempt \ -{attempt+1}/{retries}:{e}{RESET}') - tb = traceback.extract_tb(sys.exc_info()[2]) - logger.info("\n".join([f" > {line}" - for line in map(str, tb)])) - time.sleep(3) # Wait 5 seconds before retrying - pass - - if attempt >= retries: - logger.error( - f"Conversion unsuccessful after {retries} attempts.") - sys.exit(2) - - finally: - # print(out_ls) - # Combine generated gTTS objects - if len(out_ls) >= 1: - FileSynthesis.join_audios(out_ls, output_file) - - st = speedtest.Speedtest() - logger.info("Done") - print("Get final speed ...") - logger.info( - - f"{YELLOW}Final Network Speed: {st.download()/(10**6):.2f} Kbps{RESET}") - - @staticmethod - def pdf_to_text(pdf_path): - logger.info('''Processing the file...\n''') - logger.info( - F'{GREEN} Initializing pdf to text conversion sequence...{RESET}') - try: - with open(pdf_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - text = '' - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num] - text += page.extract_text() - print(F"{DGREEN}Ok{RESET}") - return text - except Exception as e: - logger.error( - f"{DRED}Failed to extract text from '{YELLOW}{pdf_path}'{RESET}:\n {e}") - - @staticmethod - def text_file(input_file): - try: - with open(input_file, 'r', errors='ignore') as file: - text = file.read().replace('\n', ' ') - return text - except FileNotFoundError: - logger.error("File '{}' was not found.".format(input_file)) - except Exception as e: - logger.error( - F"{DRED}Error converting {input_file} to text: {str(e)}\ -{RESET}") - - @staticmethod - def docx_to_text(docx_path): - try: - logger.info(f"{BLUE} Converting {docx_path} to text...{RESET}") - doc = Document(docx_path) - paragraphs = [paragraph.text for paragraph in doc.paragraphs] - return '\n'.join(paragraphs) - except FileNotFoundError: - logger.error(f"File '{docx_path}' was not found.") - except Exception as e: - logger.error( - F"{DRED}Error converting {docx_path} to text: {e}\ -{RESET}") - - '''Handle input files based on type to initialize conversion sequence''' - - def audiofy(self): - input_list = self.preprocess() - extdoc = ["docx", "doc"] - ls = {"pdf", "docx", "doc", "txt"} - input_list = [item for item in input_list if item.lower().endswith(tuple(ls))] - for input_file in input_list: - if input_file.endswith('.pdf'): - text = FileSynthesis.pdf_to_text(input_file) - output_file = input_file[:-4] - - elif input_file.lower().endswith(tuple(extdoc)): - - text = FileSynthesis.docx_to_text(input_file) - output_file = input_file[:-5] - - elif input_file.endswith('.txt'): - text = FileSynthesis.text_file(input_file) - output_file = input_file[:-4] - - else: - logger.error('Unsupported file format. Please provide \ -a PDF, txt, or Word document.') - sys.exit(1) - try: - FileSynthesis.Synthesise(None, text, output_file) - except KeyboardInterrupt: - sys.exit(1) - - -############################################################################### -# Convert video file to from one format to another''' -############################################################################### - - -class VideoConverter: - - def __init__(self, input_file, out_format): - self.input_file = input_file - self.out_format = out_format - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def CONVERT_VIDEO(self): - try: - input_list = self.preprocess() - out_f = self.out_format.upper() - input_list = [item for item in input_list if any( - item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS)] - print(F"{DYELLOW}Initializing conversion..{RESET}") - - for file in input_list: - if out_f.upper() in SUPPORTED_VIDEO_FORMATS: - _, ext = os.path.splitext(file) - output_filename = _ + '.' + out_f.lower() - print(output_filename) - else: - print("Unsupported output format") - sys.exit(1) - format_codec = { - "MP4": "mpeg4", - "AVI": "rawvideo", - # "OGV": "avc", - "WEBM": "libvpx", - "MOV": "mpeg4", - "MKV": "MPEG4", - "FLV": "flv" - # "WMV": "WMV" - } - '''Load the video file''' - print(f"{DBLUE}oad file{RESET}") - video = VideoFileClip(file) - '''Export the video to a different format''' - print(f"{DMAGENTA}Converting file to {output_filename}{RESET}") - video.write_videofile( - output_filename, codec=format_codec[out_f]) - '''Close the video file''' - print(f"{DGREEN}Done{RESET}") - video.close() - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - print(e) - - -############################################################################### -# Convert Audio file to from one format to another''' -############################################################################### - - -class AudioConverter: - - def __init__(self, input_file, out_format): - self.input_file = input_file - self.out_format = out_format - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def pydub_conv(self): - input_list = self.preprocess() - out_f = self.out_format - input_list = [item for item in input_list if any( - item.lower().endswith(ext) for ext in SUPPORTED_AUDIO_FORMATS)] - print(F"{DYELLOW}Initializing conversion..{RESET}") - for file in input_list: - if out_f.lower() in SUPPORTED_AUDIO_FORMATS: - _, ext = os.path.splitext(file) - output_filename = _ + '.' + out_f - else: - print("Unsupported output format") - sys.exit(1) - fmt = ext[1:] - print(fmt, out_f) - audio = pydub.AudioSegment.from_file(file, fmt) - print(f"{DMAGENTA}Converting to {output_filename}{RESET}") - audio.export(output_filename, format=out_f) - # new_audio = pydub.AudioSegment.from_file('output_audio.') - print(f"{DGREEN}Done{RESET}") - # play(new_audio) - # new_audio.close() - - -############################################################################### -# Convert images file to from one format to another -############################################################################### - - -class ImageConverter: - - def __init__(self, input_file, out_format): - self.input_file = input_file - self.out_format = out_format - - def preprocess(self): - try: - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - except FileNotFoundError: - print("File not found") - sys.exit(1) - - def convert_image(self): - try: - input_list = self.preprocess() - out_f = self.out_format.upper() - - input_list = [item for item in input_list if any( - item.lower().endswith(ext) for ext in SUPPORTED_IMAGE_FORMATS[out_f])] - for file in input_list: - print(file) - if out_f.upper() in SUPPORTED_IMAGE_FORMATS: - _, ext = os.path.splitext(file) - output_filename = _ + \ - SUPPORTED_IMAGE_FORMATS[out_f].lower() - else: - print("Unsupported output format") - sys.exit(1) - '''Load the image using OpenCV: ''' - print(F"{DYELLOW}Reading input image..{RESET}") - img = cv2.imread(file) - '''Convert the OpenCV image to a PIL image: ''' - print(f"{DMAGENTA}Converting to PIL image{RESET}") - pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - '''Save the PIL image to a different format: ''' - print(f"\033[1;36mSaving image as {output_filename}{RESET}") - pil_img.save(output_filename, out_f) - print(f"{DGREEN}Done{RESET}") - '''Load the image back into OpenCV: ''' - print(f"{DMAGENTA}Load and display image{RESET}") - opencv_img = cv2.imread(output_filename) - '''Display the images: ''' - cv2.imshow('OpenCV Image', opencv_img) - # pil_img.show() - '''Wait for the user to press a key and close the windows: ''' - cv2.waitKey(0) - cv2.destroyAllWindows() - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) diff --git a/build/lib/filemac/dd.py b/build/lib/filemac/dd.py deleted file mode 100644 index 90fbe1f..0000000 --- a/build/lib/filemac/dd.py +++ /dev/null @@ -1,10 +0,0 @@ -from OCRTextExtractor import ExtractText -img_objs = ['/home/skye/Software Engineering/Y2/SEM2/RV/SPE 2210 Client Side Programming Year II Semester II_1.png'] -text = '' -for i in img_objs: - extract = ExtractText(i) - tx = extract.OCR() - print(tx) - if tx is not None: - text += tx -print(text) diff --git a/build/lib/filemac/fmac.py b/build/lib/filemac/fmac.py deleted file mode 100644 index 91b28ba..0000000 --- a/build/lib/filemac/fmac.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python3.11.7 -# multimedia_cli/main.py -import argparse -import logging -import logging.handlers -import sys - -from . import handle_warnings -from .AudioExtractor import ExtractAudio -from .colors import (RESET, DYELLOW) -from .converter import (AudioConverter, FileSynthesis, ImageConverter, - MakeConversion, Scanner, VideoConverter) -from .formats import (SUPPORTED_AUDIO_FORMATS_SHOW, SUPPORTED_DOC_FORMATS, - SUPPORTED_IMAGE_FORMATS_SHOW, - SUPPORTED_VIDEO_FORMATS_SHOW) -from .image_op import Compress_Size -from .OCRTextExtractor import ExtractText -from .Simple_v_Analyzer import SA - -# from .formats import SUPPORTED_INPUT_FORMATS, SUPPORTED_OUTPUT_FORMATS -handle_warnings -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class Eval: - - def __init__(self, file, outf): - self.file = file - self.outf = outf - - def document_eval(self): - ls = ["docx", "doc"] - sheetls = ["xlsx", "xls"] - try: - conv = MakeConversion(self.file) - if self.file.lower().endswith(tuple(sheetls)): - if self.outf.lower() == "csv": - conv.convert_xlsx_to_csv() - elif self.outf.lower() == "txt": - conv.convert_xls_to_text() - elif self.outf.lower() == "doc" or self.outf == "docx": - conv.convert_xls_to_word() - elif self.outf.lower() == "db": - conv.convert_xlsx_to_database() - - elif self.file.lower().endswith(tuple(ls)): - if self.outf.lower() == "txt": - conv.word_to_txt() - elif self.outf.lower() == "pdf": - conv.word_to_pdf() - elif self.outf.lower() == "pptx": - conv.word_to_pptx() - elif self.outf.lower() == "audio" or self.outf.lower() == "ogg": - conv = FileSynthesis(self.file) - conv.audiofy() - - elif self.file.endswith('txt'): - if self.outf.lower() == "pdf": - conv.txt_to_pdf() - elif self.outf.lower() == "doc" or self.outf == "docx" or self.outf == "word": - conv.text_to_word() - elif self.outf.lower() == "audio" or self.outf.lower() == "ogg": - conv = FileSynthesis(self.file) - conv.audiofy() - - elif self.file.lower().endswith('ppt') or self.file.lower().endswith('pptx'): - if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word": - conv.ppt_to_word() - - elif self.file.lower().endswith('pdf'): - if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word": - conv.pdf_to_word() - elif self.outf.lower() == "txt": - conv.pdf_to_txt() - elif self.outf.lower() == "audio" or self.outf.lower() == "ogg": - conv = FileSynthesis(self.file) - conv.audiofy() - - else: - print(f"{DYELLOW}Unsupported Conversion type{RESET}") - except Exception as e: - logger.error(e) - - -def main(): - parser = argparse.ArgumentParser( - description="Multimedia Element Operations") - - parser.add_argument( - "--convert_doc", help=f"Converter document file(s) to different format ie pdf_to_docx.\ - example {DYELLOW}filemac --convert_doc example.docx -t pdf{RESET}") - - parser.add_argument( - "--convert_audio", help=f"Convert audio file(s) to and from different format ie mp3 to wav\ - example {DYELLOW}filemac --convert_audio example.mp3 -t wav{RESET}") - - parser.add_argument( - "--convert_video", help=f"Convert video file(s) to and from different format ie mp4 to mkv.\ - example {DYELLOW}filemac --convert_video example.mp4 -t mkv{RESET}") - - parser.add_argument( - "--convert_image", help=f"Convert image file(s) to and from different format ie png to jpg.\ - example {DYELLOW}filemac --convert_image example.jpg -t png{RESET}") - - parser.add_argument( - - "--convert_doc2image", help=f"Convert documents to images ie png to jpg.\ - example {DYELLOW}filemac --convert_doc2image example.pdf -t png{RESET}") - - parser.add_argument("-xA", "--extract_audio", - help=f"Extract audio from a video.\ - example {DYELLOW}filemac -xA example.mp4 {RESET}") - - parser.add_argument( - "-Av", "--Analyze_video", help=f"Analyze a given video.\ - example {DYELLOW}filemac --analyze_video example.mp4 {RESET}") - - parser.add_argument("-t", "--target_format", - help="Target format for conversion (optional)") - - parser.add_argument( - "--resize_image", help=f"change size of an image compress/decompress \ - example {DYELLOW}filemac --resize_image example.png -t png {RESET}") - - parser.add_argument("-t_size", help="used in combination with resize_image \ - to specify target image size") - - parser.add_argument( - "-S", "--scan", help=f"Scan pdf file and extract text\ - example {DYELLOW}filemac --scan example.pdf {RESET}") - - parser.add_argument( - "-SA", "--scanAsImg", help=f"Scan pdf file and extract text\ - example {DYELLOW}filemac --scanAsImg example.pdf {RESET}") - - parser.add_argument("--OCR", help=f"Extract text from an image.\ - example {DYELLOW}filemac --OCR image.png{RESET}") - - args = parser.parse_args() - - -# Call function to handle document conversion inputs before begining conversion - if args.convert_doc == 'help': - print(SUPPORTED_DOC_FORMATS) - sys.exit(1) - if args.convert_doc: - ev = Eval(args.convert_doc, args.target_format) - ev.document_eval() - - -# Call function to handle video conversion inputs before begining conversion - elif args.convert_video: - if args.convert_video == 'help' or args.convert_video is None: - print(SUPPORTED_VIDEO_FORMATS_SHOW) - sys.exit(1) - ev = VideoConverter(args.convert_video, args.target_format) - ev.CONVERT_VIDEO() -# Call function to handle image conversion inputs before begining conversion - - elif args.convert_image: - if args.convert_image == 'help' or args.convert_image is None: - print(SUPPORTED_IMAGE_FORMATS_SHOW) - sys.exit(1) - conv = ImageConverter(args.convert_image, args.target_format) - conv.convert_image() - -# Handle image resizing - elif args.resize_image: - res = Compress_Size(args.resize_image) - res.resize_image(args.t_size) - -# Handle documents to images conversion - elif args.convert_doc2image: - conv = MakeConversion(args.convert_doc2image) - conv.doc2image(args.target_format) - -# Call function to handle audio conversion inputs before begining conversion - elif args.convert_audio: - if args.convert_audio == 'help' or args.convert_audio is None: - print(SUPPORTED_AUDIO_FORMATS_SHOW) - sys.exit(1) - ev = AudioConverter(args.convert_audio, args.target_format) - ev.pydub_conv() - - -# Call module to evaluate audio files before making audio extraction from input video files conversion - elif args.extract_audio: - vi = ExtractAudio(args.extract_audio) - vi.moviepyextract() - -# Call module to scan the input and extract text - elif args.scan: - sc = Scanner(args.scan) - sc.scanPDF() - -# Call module to scan the input FILE as image object and extract text - elif args.scanAsImg: - sc = Scanner(args.scanAsImg) - tx = sc.scanAsImgs() -# Call module to handle Candidate images for text extraction inputs before begining conversion - elif args.OCR: - conv = ExtractText(args.OCR) - conv.OCR() - - elif args.Analyze_video: - analyzer = SA(args.Analyze_video) - analyzer.SimpleAnalyzer() - - -if __name__ == "__main__": - main() diff --git a/build/lib/filemac/formats.py b/build/lib/filemac/formats.py deleted file mode 100644 index 6490294..0000000 --- a/build/lib/filemac/formats.py +++ /dev/null @@ -1,121 +0,0 @@ -# multimedia_cli/formats.py -from .colors import CYAN, DBLUE, DMAGENTA, DYELLOW, RESET - -SUPPORTED_DOC_FORMATS = f""" -|--------------------------------------------------------------------------- -|{DBLUE}Input format{RESET} |{DBLUE}Output format{RESET} | -|________________________________|__________________________________________| -| xlsx {DYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) | -| | | -| doc/docx{DYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) | -| | | -| txt {DYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) | -| | | -| pdf {DYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) | -| | | -| pptx/ppt{DYELLOW}-------------------->{RESET}|doc/docx | -| | -|___________________________________________________________________________| -""" - - -def p(): - print(SUPPORTED_DOC_FORMATS) - - -# Add supported input and output formats for each media type -SUPPORTED_AUDIO_FORMATS = ["wav", # Waveform Audio File Format - "mp3", # MPEG Audio Layer III - "ogg", - "flv", - "ogv", - "webm", - "aac", # Advanced Audio Codec - "bpf", - "aiff", - "flac"] # Free Lossless Audio Codec) - -SUPPORTED_AUDIO_FORMATS_SHOW = f''' -|==============================| -| {DBLUE}Supported I/O formats {RESET} | -|==============================| -| {CYAN} wav {DYELLOW} | -| {CYAN} mp3 {DYELLOW} | -| {CYAN} ogg {DYELLOW} | -| {CYAN} flv {DYELLOW} | -| {CYAN} ogv {DYELLOW} | -| {CYAN} matroska {DYELLOW} | -| {CYAN} mov {DYELLOW} | -| {CYAN} webm {DYELLOW} | -| {CYAN} aac {DYELLOW} | -| {CYAN} bpf {DYELLOW} | --------------------------------- - -''' - -SUPPORTED_VIDEO_FORMATS = ["MP4", # MPEG-4 part 14 - "AVI", # Audio Video Interleave - "OGV", - "WEBM", - "MOV", # QuickTime Movie - "MKV", # Matroska Multimedia Container - MKV is known for its support of high-quality content. - "FLV", # - "WMV"] - -SUPPORTED_VIDEO_FORMATS_SHOW = f''' -,_______________________________________, -|x| {DBLUE}Supported I/O formats{RESET} |x| -|x|-----------------------------------{DYELLOW}|x| -|x| {DMAGENTA} MP4 {DYELLOW} |x| -|x| {DMAGENTA} AVI {DYELLOW} |x| -|x| {DMAGENTA} OGV {DYELLOW} |x| -|x| {DMAGENTA} WEBM{DYELLOW} |x| -|x| {DMAGENTA} MOV {DYELLOW} |x| -|x| {DMAGENTA} MKV {DYELLOW} |x| -|x| {DMAGENTA} FLV {DYELLOW} |x| -|x| {DMAGENTA} WMV {DYELLOW} |x| -|,|___________________________________|,|{DYELLOW} -''' - -SUPPORTED_IMAGE_FORMATS = { - "JPEG": ".jpg", # Joint Photographic Experts Group -Lossy compression - "PNG": ".png", # Joint Photographic Experts Group - not lossy - "GIF": ".gif", # Graphics Interchange Format - "BM": ".bmp", - "BMP": ".dib", - "DXF": ".dxf", # Autocad format 2D - "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression - "EXR": ".exr", - "pic": ".pic", - "pict": "pct", - "PDF": ".pdf", - "WebP": ".webp", - "ICNS": ".icns", - "PSD": ".psd", - "SVG": ".svg", # Scalable vector Graphics - "EPS": ".eps", - "PostSciript": ".ps", - "PS": ".ps"} - -SUPPORTED_IMAGE_FORMATS_SHOW = f''' -__________________________________________ -|x|{DBLUE}Supported I/O formats{RESET} |x| -|x|_____________________________________{DYELLOW}|x| -|x| {DMAGENTA} JPEG {DYELLOW} |x| -|x| {DMAGENTA} PNG {DYELLOW} |x| -|x| {DMAGENTA} GIF {DYELLOW} |x| -|x| {DMAGENTA} BM {DYELLOW} |x| -|x| {DMAGENTA} TIFF {DYELLOW} |x| -|x| {DMAGENTA} EXR {DYELLOW} |x| -|x| {DMAGENTA} PDF {DYELLOW} |x| -|x| {DMAGENTA} WebP{DYELLOW} |x| -|x| {DMAGENTA} ICNS {DYELLOW} |x| -|x| {DMAGENTA} PSD {DYELLOW} |x| -|x| {DMAGENTA} SVG {DYELLOW} |x| -|x| {DMAGENTA} EPS {DYELLOW} |x| -|x| {DMAGENTA} Postscript {DYELLOW} |x| -|_|_____________________________________|x| -''' - -SUPPORTED_DOCUMENT_FORMATS = ['pdf', 'doc', 'docx', 'csv', 'xlsx', 'xls', - 'ppt', 'pptx', 'txt', 'ogg', 'mp3', 'audio'] diff --git a/build/lib/filemac/handle_warnings.py b/build/lib/filemac/handle_warnings.py deleted file mode 100644 index 3e592d1..0000000 --- a/build/lib/filemac/handle_warnings.py +++ /dev/null @@ -1,6 +0,0 @@ -import warnings - -warnings.simplefilter("ignore", RuntimeWarning) -with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message="Your system is avx2 capable but pygame was not built with support for it.", category=RuntimeWarning) diff --git a/build/lib/filemac/image_op.py b/build/lib/filemac/image_op.py deleted file mode 100644 index 61cfe6d..0000000 --- a/build/lib/filemac/image_op.py +++ /dev/null @@ -1,64 +0,0 @@ -from PIL import Image -import os -import logging -import logging.handlers - -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class Compress_Size: - - def __init__(self, input_image_path): - self.input_image_path = input_image_path - - def resize_image(self, target_size): - ext = input_image_path[-3:] - output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}" - - original_image = Image.open(input_image_path) - original_size = original_image.size - size = os.path.getsize(input_image_path) - print(f"Original image size \033[93m{size/1000_000:.2f}MiB") - - # Calculate the aspect ratio of the original image - aspect_ratio = original_size[0] / original_size[1] - - # Convert the target sixze to bytes - tz = int(target_size[:-2]) - if target_size[-2:].lower() == 'mb': - target_size_bytes = tz * 1024 * 1024 - elif target_size[-2:].lower() == 'kb': - target_size_bytes = tz * 1024 - else: - logger.warning("Invalid units. Please use either \033[1;95m'MB'\033[0m\ - or \033[1;95m'KB'\033[0m") - - # Calculate the new dimensions based on the target size - new_width, new_height = Compress_Size.calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes) - print("\033[94mProcessing ..\033[0m") - resized_image = original_image.resize((new_width, new_height)) - resized_image.save(output_image_path) - t_size = os.path.getsize(output_image_path)/1000_000 - print("\033[1;92mOk\033[0m") - print(f"Image resized to \033[1;93m{t_size:.2f}\033[0m and saved to \033[1;93m{output_image_path}") - - def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes): - # Calculate the new dimensions based on the target size in bytes - original_size_bytes = original_size[0] * original_size[1] * 3 # Assuming 24-bit color depth - scale_factor = (target_size_bytes / original_size_bytes) ** 0.5 - - new_width = int(original_size[0] * scale_factor) - new_height = int(original_size[1] * scale_factor) - - return new_width, new_height - - -if __name__ == "__main__": - input_image_path = input("Enter the path to the input image: ") - target_size = input("Enter the target output size (MB or KB): ") - ext = input_image_path[-3:] - output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}" - - init = Compress_Size(input_image_path) - init.resize_image(target_size) diff --git a/docs/CLI_ENHANCEMENT_PLAN.md b/docs/CLI_ENHANCEMENT_PLAN.md new file mode 100644 index 0000000..5728251 --- /dev/null +++ b/docs/CLI_ENHANCEMENT_PLAN.md @@ -0,0 +1,342 @@ +# FileMAC CLI Enhancement Plan + +## Overview + +This document outlines the comprehensive plan to enhance FileMAC's command-line interface using Rich and pyperclip libraries to create a more robust, user-friendly experience. + +## Current State Analysis + +### Strengths +- ✅ Rich library already integrated for progress bars +- ✅ Pyperclip available in environment +- ✅ Existing color support via custom utilities +- ✅ Comprehensive functionality across 40+ commands +- ✅ Well-structured operation mapping system + +### Opportunities for Improvement +- ❌ Basic argparse interface could be more user-friendly +- ❌ Text-based help lacks visual appeal +- ❌ Limited interactive elements +- ❌ No clipboard integration +- ❌ Inconsistent progress feedback + +## Enhancement Strategy + +### Phase 1: Foundation (Week 1-2) + +**Objective**: Establish core utilities and infrastructure + +**Tasks**: +1. **Create Rich Console Wrapper** (`filewarp/utils/rich_utils.py`) + - Custom theme matching existing color scheme + - Standardized message formats (info, success, error, warning) + - Console initialization and configuration + +2. **Implement Clipboard Utilities** (`filewarp/utils/clipboard.py`) + - `copy_to_clipboard()` function + - `paste_from_clipboard()` function + - Error handling for clipboard operations + +3. **Basic Rich Integration** + - Replace `print()` statements with Rich console methods + - Add color consistency across modules + - Create standard message formats + +### Phase 2: Core Enhancements (Week 3-4) + +**Objective**: Enhance core CLI functionality with Rich features + +**Tasks**: +1. **Enhanced Help System** (`filewarp/cli/help.py`) + - Rich-formatted command tables + - Categorized command display + - Interactive help navigation + +2. **Progress Bars for All Operations** (`filewarp/utils/progress.py`) + - Standardized progress bar creation + - Consistent styling across modules + - Time estimates and completion percentages + +3. **Enhanced Error Handling** (Enhance `filewarp/core/exceptions.py`) + - Rich-formatted error panels + - Contextual error information + - Suggested solutions and troubleshooting + +### Phase 3: Advanced Features (Week 5-6) + +**Objective**: Add interactive elements and workflow improvements + +**Tasks**: +1. **Interactive File Selection** (`filewarp/cli/interactive.py`) + - Visual file listing with tables + - Multi-file selection interface + - File preview capabilities + +2. **Clipboard Workflow Integration** (`filewarp/cli/clipboard_workflows.py`) + - Clipboard-based input workflows + - Result copying to clipboard + - Batch operation support + +3. **Operation Summary Display** (`filewarp/cli/summary.py`) + - Visual operation summaries + - Success/error breakdowns + - Clipboard copy options + +### Phase 4: Integration (Week 7) + +**Objective**: Full integration with existing CLI + +**Tasks**: +1. **Enhanced CLI Entry Point** (Modify `filewarp/cli/cli.py`) + - Rich welcome message + - Clipboard support flag + - Enhanced argument parsing + +2. **Operation Mapper Enhancement** (Extend `OperationMapper`) + - Rich progress display + - Clipboard integration + - Enhanced completion messages + +## Implementation Details + +### Rich Utilities Implementation + +```python +# filewarp/utils/rich_utils.py +from rich.console import Console +from rich.theme import Theme + +custom_theme = Theme({ + "info": "cyan", + "warning": "yellow", + "error": "bold red", + "success": "bold green", + "debug": "magenta", + "prompt": "bold blue" +}) + +console = Console(theme=custom_theme) + +def print_info(message): + console.print(f"[info]ℹ {message}[/info]") + +def print_success(message): + console.print(f"[success]✓ {message}[/success]") + +def print_error(message): + console.print(f"[error]❌ {message}[/error]") + +def print_warning(message): + console.print(f"[warning]⚠ {message}[/warning]") +``` + +### Clipboard Utilities Implementation + +```python +# filewarp/utils/clipboard.py +import pyperclip +from .rich_utils import console, print_success, print_error + +def copy_to_clipboard(text): + """Copy text to system clipboard""" + try: + pyperclip.copy(text) + print_success("Copied to clipboard!") + return True + except Exception as e: + print_error(f"Failed to copy to clipboard: {str(e)}") + return False + +def paste_from_clipboard(): + """Get text from system clipboard""" + try: + content = pyperclip.paste() + return content if content else None + except Exception as e: + print_error(f"Failed to access clipboard: {str(e)}") + return None +``` + +### Enhanced Help System + +```python +# filewarp/cli/help.py +from rich.panel import Panel +from rich.table import Table +from rich.box import ROUNDED +from .rich_utils import console + +def show_main_help(): + """Display enhanced help with Rich formatting""" + table = Table( + title="📁 FileMAC Commands", + show_header=True, + header_style="bold magenta", + box=ROUNDED, + border_style="blue" + ) + + table.add_column("Command", style="cyan", no_wrap=True) + table.add_column("Description", style="white") + table.add_column("Example", style="green") + + commands = [ + ("--convert_doc", "Convert documents between formats", "filewarp --convert_doc file.docx -to pdf"), + ("--convert_audio", "Convert audio files", "filewarp --convert_audio file.mp3 -to wav"), + # ... more commands + ] + + for cmd, desc, example in commands: + table.add_row(cmd, desc, example) + + panel = Panel.fit( + table, + title="[bold]FileMAC Help System[/bold]", + border_style="blue", + subtitle="Advanced file conversion toolkit" + ) + + console.print(panel) +``` + +## Migration Strategy + +### Backward Compatibility +- ✅ Keep all existing command-line arguments +- ✅ Maintain current functionality +- ✅ Add new features as optional flags +- ✅ Preserve existing workflows + +### Gradual Rollout Plan +1. **Week 1-2**: Foundation utilities +2. **Week 3-4**: Core Rich enhancements +3. **Week 5-6**: Advanced interactive features +4. **Week 7**: Full integration and testing + +### Risk Assessment + +**Low Risk**: +- Rich already in dependencies +- Gradual migration approach +- Backward compatibility maintained + +**Medium Risk**: +- User adaptation to new UI +- Clipboard permissions on some systems +- Performance impact of Rich rendering + +**Mitigation**: +- Provide fallback to text mode +- Add configuration options +- Comprehensive error handling +- User education + +## Benefits Realization + +### Immediate Benefits +- ✅ Better visual feedback for users +- ✅ Professional, modern CLI appearance +- ✅ Consistent color scheme and formatting +- ✅ Enhanced error messages with context + +### Medium-Term Benefits +- ✅ Faster workflows with clipboard integration +- ✅ Better user experience with progress indicators +- ✅ Interactive file selection and processing +- ✅ Visual operation summaries + +### Long-Term Benefits +- ✅ Foundation for advanced CLI features +- ✅ Improved user adoption and satisfaction +- ✅ Competitive advantage in CLI tools +- ✅ Easier maintenance and extension + +## Testing Approach + +### Unit Testing +- Test Rich utilities in isolation +- Verify clipboard functionality +- Validate progress bar behavior + +### Integration Testing +- Test with existing CLI commands +- Verify backward compatibility +- Check error handling + +### User Testing +- Gather feedback on new UI +- Test interactive workflows +- Validate clipboard integration + +### Performance Testing +- Measure Rich rendering impact +- Test with large file operations +- Validate progress bar performance + +## Documentation Requirements + +### Updated Documentation +- ✅ README.md with Rich features +- ✅ Examples of new clipboard workflows +- ✅ Visual guides for enhanced UI +- ✅ Updated help system documentation + +### User Education +- ✅ Migration guide for existing users +- ✅ New feature tutorials +- ✅ Best practices for Rich CLI usage +- ✅ Troubleshooting guide + +## Implementation Timeline + +```mermaid +gantt + title FileMAC CLI Enhancement Timeline + dateFormat YYYY-MM-DD + section Phase 1: Foundation + Rich Utilities :a1, 2023-11-01, 5d + Clipboard Helpers :a2, 2023-11-06, 3d + Basic Integration :a3, 2023-11-09, 2d + + section Phase 2: Core Enhancements + Enhanced Help :b1, 2023-11-13, 4d + Progress Bars :b2, 2023-11-17, 3d + Error Handling :b3, 2023-11-20, 3d + + section Phase 3: Advanced Features + Interactive Selection :c1, 2023-11-24, 5d + Clipboard Workflows :c2, 2023-11-29, 4d + Operation Summaries :c3, 2023-12-03, 3d + + section Phase 4: Integration + CLI Enhancement :d1, 2023-12-06, 5d + Testing & Debugging :d2, 2023-12-11, 4d + Documentation :d3, 2023-12-15, 3d +``` + +## Success Metrics + +### Quantitative Metrics +- ✅ Reduction in user errors +- ✅ Increase in command usage +- ✅ Faster operation completion times +- ✅ Higher user satisfaction scores + +### Qualitative Metrics +- ✅ Positive user feedback +- ✅ Increased feature adoption +- ✅ Improved documentation clarity +- ✅ Enhanced professional appearance + +## Conclusion + +This enhancement plan provides a clear, low-risk path to transform FileMAC's CLI from functional to exceptional. By leveraging existing Rich integration and adding strategic pyperclip functionality, we can significantly improve user experience and productivity while maintaining all existing functionality. + +The gradual migration approach ensures minimal disruption and allows for continuous feedback and improvement throughout the process. + +**Next Steps**: +1. Implement Phase 1 foundation utilities +2. Begin gradual integration with existing modules +3. Test thoroughly and gather user feedback +4. Proceed through phases as planned +5. Document and communicate changes effectively diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..86d894e --- /dev/null +++ b/docs/index.html @@ -0,0 +1,137 @@ + + + + + + FileMAC - Multimedia File Operation Kit + + + + + +
+
+

FileMAC

+

+ A Comprehensive Multimedia File Operation Kit +

+
+
+ + +
+ +
+

Introduction

+

+ FileMAC is a Python-based command-line interface (CLI) utility + designed for efficient file conversion, manipulation, and analysis. It + supports various multimedia operations, including document conversion, + file analysis, and text-to-speech conversion using Google's + Text-to-Speech (gTTS) library. +

+
+ + +
+

Features

+ +
+ + +
+

Installation

+

Install FileMAC using pip:

+
pip install filewarp
+

+ Alternatively, install directly from the GitHub repository: +

+
pip install git+https://github.com/skye-cyber/FileMAC.git
+
+ + +
+

Usage

+

+ After installation, you can use FileMAC through the command line. For + help and available commands, run: +

+
filewarp -h
+

or

+
Filemac -h
+

or

+
FILEMAC -h
+

+ To run the CLI app for specific operations, use the following command + structure: +

+
FileMAC [options] stdin format
+

+ Replace [options] with the desired + operation flags, stdin with the + input file, and format with the + target format or operation. +

+
+ + +
+

License

+

+ FileMAC is licensed under the GPL-3.0 License. For more details, refer + to the LICENSE file in the repository. +

+
+ + +
+

Repository

+

+ For more information, visit the GitHub repository: +

+ https://github.com/skye-cyber/FileMAC +
+
+ + + + + diff --git a/filemac.egg-info/PKG-INFO b/filemac.egg-info/PKG-INFO deleted file mode 100644 index fc84dd6..0000000 --- a/filemac.egg-info/PKG-INFO +++ /dev/null @@ -1,156 +0,0 @@ -Metadata-Version: 2.1 -Name: filemac -Version: 1.0.2 -Summary: Open source Python CLI toolkit for conversion, manipulation, Analysis -Author: wambua -Author-email: wambuamwiky2001@gmail.com -License: GPL v3 -Keywords: file-conversion,file-analysis,file-manipulation,ocr,image-conversion -Classifier: Environment :: Console -Classifier: Natural Language :: English -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3 :: Only -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Classifier: Programming Language :: Python :: 3.9 -Classifier: Programming Language :: Python :: 3.10 -Classifier: Programming Language :: Python :: 3.11 -Requires-Python: >=3.6 -Description-Content-Type: text/markdown -License-File: LICENSE - -# fconverter -A python file `conversion`, `manipulation`, `Analysis` toolkit -`This is a Linux command-line interface (CLI) utility that coverts documents from one format to another, -analyzes files, manipulates files. -Your can also convert text file to mp3 formart using google Text to speech library (gTTS). - -## Installation -1. using pip - - ```shell - pip install filemac - ``` -2. Install from github - - ```shell - pip install git+https://github.com/skye-cyber/FileMAC.git - ``` -## Usage - -To run the CLI app, use the following command: - -```shell -FileMAC [options] stdin format -``` - -Replace `[options]` with the appropriate command-line options based on the functionality you want to execute. - -## Available Options - -- `1`: --convert_doc. -- `2`: --convert_audio. -- `3`: --convert_video. -- `4`: --convert_image. -- `5`: --extract_audio. -- `6`: --Analyze_video -- `7`: --OCR - -## Examples - -1. Example command 1: - - ```shell - filemac --convert_doc example.docx -t pdf - ``` - ``Supported formats For document conversion`` - `1`. PDF to DOCX - `2`. PDF to TXT - `3`. PDF to Audio - `4`. DOCX to PDF - `5`. DOCX to pptx - `6`. DOCX to TXT - `7`. DOCX to Audio - `8`. TXT to PDF - `9`. TXT to DOCX - `10`' TXT to Audio - `11`. PPTX to DOCX - `12`. XLSX to Sql - `13`. XLSX to CSV - `14`. XLSX to TXT - `15`. XLSX to DOCX - - This promt parses convert_doc signifying that the inteded operation id document conversion then parses ```example.docx``` as the input file(file path can also be provided) to be converted to format ```pdf```. -the output file assumes the base name of the input file but the extension conforms to the parsed format```pdf``` - -2. converting text mp3 to wav - ```shell - filemac --convert_audio example.mp3 -t wav - ``` - ``Supported formats For audio conversion`` - `1`. wav - `2`. mp3 - `3`. ogg - `4`. flv - `5`. avi - `6`. ogv - `7`. matroska - `8`. mov - `9`. webm - -3. Extract text from images - ```shell - filemac --OCR image.jpg - ``` - - 2. converting videos - ```shell - filemac --convert_video example.mp4 -t wav - ``` - ``Supported formats For video conversion`` - `1`. MP4 - `2`. AVI - `3`. OGV - `4`. WEBM - `5`. MOV - `6`. MKV - `7`. FLV - `8`. WMV - -2. converting images - ```shell - filemac --convert_image example.png -t jpg - ``` - ``Supported formats For audio conversion`` - `1`.JPEG: `.jpg` - `2`.PNG": `.png` - `3`.GIF": `.gif` - `4`.BM": `.bmp` - `5`.TIFF: `.tiff` - `6`.EXR `.exr` - `7`.PDF: `.pdf` - `8`.WebP: `.webp` - `9`.ICNS: `.icns` - `10`.PSD: `.psd` - `11`.SVG: `.svg` - `12`.EPS: `.eps` - -## Help -in any case you can pass the string help to an option to see its supported operations or inputs nd output formats. -```shell - filemac --convert_doc help -``` -The above command displays the surported input and output formats for document conversion. -## Contributing - -Contributions are welcome! If you encounter any issues or have suggestions for improvements, please open an issue or submit a pull request. - -## License - -This project is an open source software. Under GPL-3.0 license - - -Feel free to modify and customize this template according to your specific project requirements and add any additional sections or information that you think would be helpful for users. - diff --git a/filemac.egg-info/SOURCES.txt b/filemac.egg-info/SOURCES.txt deleted file mode 100644 index 5d0d298..0000000 --- a/filemac.egg-info/SOURCES.txt +++ /dev/null @@ -1,38 +0,0 @@ -LICENSE -MANIFEST.ini -README.md -setup.py -version.txt -.github/workflows/python-publish.yml -__pycache__/Analyzer.cpython-311.pyc -__pycache__/AudioExtractor.cpython-311.pyc -__pycache__/OCRTextExtractor.cpython-311.pyc -__pycache__/Simple_v_Analyzer.cpython-311.pyc -__pycache__/converter.cpython-311.pyc -__pycache__/formarts.cpython-311.pyc -__pycache__/formats.cpython-311.pyc -__pycache__/show_progress.cpython-311.pyc -filemac/AudioExtractor.py -filemac/OCRTextExtractor.py -filemac/Simple_v_Analyzer.py -filemac/__init__.py -filemac/colors.py -filemac/converter.py -filemac/dd.py -filemac/fmac.py -filemac/formats.py -filemac/handle_warnings.py -filemac/image_op.py -filemac.egg-info/PKG-INFO -filemac.egg-info/SOURCES.txt -filemac.egg-info/dependency_links.txt -filemac.egg-info/entry_points.txt -filemac.egg-info/not-zip-safe -filemac.egg-info/requires.txt -filemac.egg-info/top_level.txt -filemac/__pycache__/AudioExtractor.cpython-311.pyc -filemac/__pycache__/OCRTextExtractor.cpython-311.pyc -filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc -filemac/__pycache__/colors.cpython-311.pyc -filemac/__pycache__/converter.cpython-311.pyc -filemac/__pycache__/formats.cpython-311.pyc \ No newline at end of file diff --git a/filemac.egg-info/dependency_links.txt b/filemac.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/filemac.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/filemac.egg-info/entry_points.txt b/filemac.egg-info/entry_points.txt deleted file mode 100644 index eee36a2..0000000 --- a/filemac.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -filemac = filemac:main diff --git a/filemac.egg-info/not-zip-safe b/filemac.egg-info/not-zip-safe deleted file mode 100644 index 8b13789..0000000 --- a/filemac.egg-info/not-zip-safe +++ /dev/null @@ -1 +0,0 @@ - diff --git a/filemac.egg-info/requires.txt b/filemac.egg-info/requires.txt deleted file mode 100644 index 585f2a3..0000000 --- a/filemac.egg-info/requires.txt +++ /dev/null @@ -1,19 +0,0 @@ -Pillow -PyPDF2 -argparse -gTTS -moviepy -numpy -opencv-python -pandas -pdf2docx -pdf2image -pdfminer.six -pydub -pypandoc -pytesseract -python-docx -python-pptx -reportlab -requests -requests diff --git a/filemac.egg-info/top_level.txt b/filemac.egg-info/top_level.txt deleted file mode 100644 index 93e015a..0000000 --- a/filemac.egg-info/top_level.txt +++ /dev/null @@ -1,2 +0,0 @@ -build -filemac diff --git a/filemac/AudioExtractor.py b/filemac/AudioExtractor.py deleted file mode 100644 index 65172b1..0000000 --- a/filemac/AudioExtractor.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -import sys -from moviepy.editor import VideoFileClip -import logging -import logging.handlers -############################################################################### -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class ExtractAudio: - def __init__(self, input_file): - self.input_file = input_file - - def preprocess(self): - try: - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - ls = ["mp4", "mkv"] - if os.path.isfile(file_path) and any(file_path.lower().endswith(ext) for ext in ls): - files_to_process.append(file_path) - - return files_to_process - except Exception as e: - print(e) - - def moviepyextract(self): - try: - video_list = self.preprocess() - for input_video in video_list: - print("\033[1;33mExtracting..\033[1;36m") - video = VideoFileClip(input_video) - audio = video.audio - basename, _ = os.path.splitext(input_video) - outfile = basename + ".wav" - audio.write_audiofile(outfile) - # print(f"\033[1;32mFile saved as \033[36m{outfile}\033[0m") - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - print(e) - - -if __name__ == "__main__": - vi = ExtractAudio( - "/home/skye/Music/Melody in My Mind.mp4") - vi.moviepyextract() diff --git a/filemac/OCRTextExtractor.py b/filemac/OCRTextExtractor.py deleted file mode 100644 index 392ff6d..0000000 --- a/filemac/OCRTextExtractor.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import sys -import cv2 -import pytesseract -from PIL import Image -import logging -import logging.handlers -############################################################################### -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) -############################################################################### -'''Do OCR text extraction from a given image file and display the extracted - text - to the screen finally save it to a text file assuming the name of the input - file''' - -############################################################################### - - -class ExtractText: - def __init__(self, input_file): - self.input_file = input_file - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def OCR(self): - image_list = self.preprocess() - ls = ['png', 'jpg'] - image_list = [ - item for item in image_list if any(item.lower().endswith(ext) - for ext in ls)] - - def ocr_text_extraction(image_path): - '''Load image using OpenCV''' - img = cv2.imread(image_path) - - logger.info(f"\033[2;95mprocessing {image_path}...\033[0m") - - try: - '''Preprocess image for better OCR results''' - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - thresh = cv2.threshold( - gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] - img_pil = Image.fromarray(thresh) - - '''Perform OCR using pytesseract''' - config = ("-l eng --oem 3 --psm 6") - text = pytesseract.image_to_string((img_pil), config=config) - - '''Remove extra whitespaces and newlines - text = ' '.join(text.split()).strip()''' - logger.info("\033[36mFound:\n\033[0m") - print(text) - current_path = os.getcwd() - file_path = os.path.join(current_path, OCR_file) - ''' Save the extracted text to specified file ''' - logger.info("\033[1;92mGenerating text file for the extracted \ -text..\033[0m") - - with open(file_path, 'w') as file: - file.write(text) - logger.info( - f"File saved as \033[1;93m{OCR_file}\033[0m:") - '''If there are multiple candidate images for text extraction, - wait for key press before proceeding to the next - image otherwise don't wait - size = [i for i in enumerate(image_list)]''' - if len(image_list) >= 2: - input("\033[5;97mPress Enter to continue\033[0m") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(0) - except FileNotFoundError as e: - logger.error(f"Error: {str(e)}") - except IOError as e: - logger.error( - f"Could not write to output file '{OCR_file}'. \ -Reason: {str(e)}\033[0m") - except Exception as e: - logger.error(f"Error: {type(e).__name__}: {str(e)}") - except Exception as e: - logger.error(f"Error:>>\033[31m{e}\033[0m") - return text - - for image_path in image_list: - OCR_file = image_path[:-4] + ".txt" - ocr_text_extraction(image_path) diff --git a/filemac/Simple_v_Analyzer.py b/filemac/Simple_v_Analyzer.py deleted file mode 100644 index 3b492bc..0000000 --- a/filemac/Simple_v_Analyzer.py +++ /dev/null @@ -1,60 +0,0 @@ -import sys -import cv2 -import numpy as np - - -class SA: - - def __init__(self, video): - self.video = video - - def SimpleAnalyzer(self): - try: - # Read the video file - cap = cv2.VideoCapture(self.video) - print("\033[1;33mInitializing..\033[0m") - # Initialize variables - frame_count = 0 - total_area = 0 - duration = 0 - - print("\033[1;36mWorking on it") - while True: - ret, frame = cap.read() - - if not ret: - break - # Increase frame count and accumulate area - frame_count += 1 - total_area += np.prod(frame.shape[:2]) - - # Calculate current frame duration - fps = cap.get(cv2.CAP_PROP_FPS) - duration += 1 / fps - - # Display the resulting frame - cv2.imshow('Frame', frame) - - # Break the loop after pressing 'q' - if cv2.waitKey(1) == ord('q'): - break - - # Release the video capture object and close all windows - cap.release() - cv2.destroyAllWindows() - - # Print results - print(f"Total Frames: \033[1;32m{frame_count}\033[0m") - print(f"Average Frame Area: \033[1;32m{total_area / frame_count}\033[0m") - print(f"Duration: \033[1;32m{duration}\033[0m seconds") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - print(e) - sys.exit(1) - - -if __name__ == "__main__": - vi = SA("/home/skye/Music/Melody in My Mind.mp4") - vi.SimpleAnalyzer() diff --git a/filemac/__init__.py b/filemac/__init__.py deleted file mode 100644 index e32c40a..0000000 --- a/filemac/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .fmac import main diff --git a/filemac/__pycache__/AudioExtractor.cpython-311.pyc b/filemac/__pycache__/AudioExtractor.cpython-311.pyc deleted file mode 100644 index 36b350c..0000000 Binary files a/filemac/__pycache__/AudioExtractor.cpython-311.pyc and /dev/null differ diff --git a/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc b/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc deleted file mode 100644 index 2e0efeb..0000000 Binary files a/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc and /dev/null differ diff --git a/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc b/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc deleted file mode 100644 index a29f114..0000000 Binary files a/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc and /dev/null differ diff --git a/filemac/__pycache__/colors.cpython-311.pyc b/filemac/__pycache__/colors.cpython-311.pyc deleted file mode 100644 index 995bc01..0000000 Binary files a/filemac/__pycache__/colors.cpython-311.pyc and /dev/null differ diff --git a/filemac/__pycache__/converter.cpython-311.pyc b/filemac/__pycache__/converter.cpython-311.pyc deleted file mode 100644 index cbc7e1f..0000000 Binary files a/filemac/__pycache__/converter.cpython-311.pyc and /dev/null differ diff --git a/filemac/__pycache__/formats.cpython-311.pyc b/filemac/__pycache__/formats.cpython-311.pyc deleted file mode 100644 index d2b6f26..0000000 Binary files a/filemac/__pycache__/formats.cpython-311.pyc and /dev/null differ diff --git a/filemac/colors.py b/filemac/colors.py deleted file mode 100644 index 7e03e49..0000000 --- a/filemac/colors.py +++ /dev/null @@ -1,40 +0,0 @@ -import os - -from colorama import Fore, Style, init - -init(autoreset=True) - -if os.name == "posix": - RESET = '\033[0m' - RED = '\033[91m' - DRED = '\033[1;91m' - GREEN = '\033[92m' - DGREEN = '\033[1;92m' - YELLOW = '\033[93m' - DYELLOW = '\033[1;93m' - BLUE = '\033[94m' - DBLUE = '\033[1;94m' - MAGENTA = '\033[95m' - DMAGENTA = '\033[1;95m' - CYAN = '\033[96m' - DCYAN = '\033[1;96m' - ICYAN = '\033[3;96m' - -elif os.name == "nt": - RESET = Style.RESET_ALL - RED = Fore.LIGHTRED_EX - DRED = Fore.RED - GREEN = Fore.LIGHTGREEN_EX - DGREEN = Fore.GREEN - YELLOW = Fore.LIGHTYELLOW_EX - DYELLOW = Fore.YELLOW - BLUE = Fore.LIGHTBLUE_EX - DBLUE = Fore.BLUE - MAGENTA = Fore.LIGHTMAGENTA_EX - DMAGENTA = Fore.MAGENTA - CYAN = Fore.LIGHTCYAN_EX - DCYAN = Fore.CYAN - ICYAN = Fore.WHITE - -#return RESET, RED, DRED, GREEN, DGREEN, YELLOW, DYELLOW, BLUE, DBLUE, -#MAGENTA, DMAGENTA, CYAN, DCYAN diff --git a/filemac/converter.py b/filemac/converter.py deleted file mode 100644 index a46a46f..0000000 --- a/filemac/converter.py +++ /dev/null @@ -1,1027 +0,0 @@ -############################################################################# -import logging -import logging.handlers -# import math -import os -import re -import sqlite3 -import subprocess -import sys -import time -import traceback -# import pdfminer.high_level -# from typing import Iterable -from pdf2image import convert_from_path -import cv2 -import pandas as pd -import pydub -import PyPDF2 -# import pytesseract -import requests -import speedtest -from docx import Document -# from pydub.playback import play -from gtts import gTTS -# from PyPDF2 import PdfFileReader -from moviepy.editor import VideoFileClip -from pdf2docx import parse -from PIL import Image -from pptx import Presentation -from pydub import AudioSegment -from .colors import (RESET, GREEN, DGREEN, YELLOW, DYELLOW, CYAN, BLUE, DBLUE, - MAGENTA, DMAGENTA, RED, DRED, ICYAN) -from reportlab.lib.pagesizes import letter -from reportlab.platypus import Paragraph, SimpleDocTemplate - -from .formats import (SUPPORTED_AUDIO_FORMATS, SUPPORTED_IMAGE_FORMATS, - SUPPORTED_VIDEO_FORMATS) - -# import pygame -# from aspose.words import Document as aspose_document -# from aspose.slides import Presentation as aspose_presentation -# from show_progress import progress_show -# from PIL import ImageDraw, ImageFont -############################################################################### - -PYGAME_DETECT_AVX2 = 1 -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class MakeConversion: - - '''Initialize the class''' - - def __init__(self, input_file): - self.input_file = input_file - - '''Check input object whether it's a file or a directory if a file append - the file to a set and return it otherwise append directory full path - content to the set and return the set file. The returned set will be - evaluated in the next step as required on the basis of requested operation - For every requested operation, the output file if any is automatically - generated on the basis of the input filename and saved in the sam - directory as the input file - ''' - - def preprocess(self): - try: - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - except Exception as e: - print(e) - -############################################################################### -# Convert word file to pdf document (docx) -############################################################################### - def word_to_pdf(self): - word_list = self.preprocess() - ls = ["doc", "docx"] - word_list = [ - item for item in word_list if any(item.lower().endswith(ext) for ext in ls)] - for word_file in word_list: - if word_file.lower().endswith("doc"): - pdf_file = word_file[:-3] + "pdf" - elif word_file.lower().endswith("docx"): - pdf_file = word_file[:-4] + "pdf" - - try: - print( - f'{BLUE}Converting: {RESET}{word_file} {BLUE}to {RESET}{pdf_file}') - if os.name == 'posix': # Check if running on Linux - # Use subprocess to run the dpkg and grep commands - result = subprocess.run( - ['dpkg', '-l', 'libreoffice'], stdout=subprocess.PIPE, text=True) - if result.returncode != 0: - print( - "Please install libreoffice to use this functionality !") - sys.exit(1) - subprocess.run(['soffice', '--convert-to', - 'pdf', word_file, pdf_file]) - # print(f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}") - elif os.name == "nt": - try: - from docx2pdf import convert - except ImportError: - print("Run pip install docx2pdf for this function to work") - sys.exit(1) - convert(word_file, pdf_file) - print( - f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}") - - except Exception as e: - print(f"Error converting {word_file} to {pdf_file}: {e}") - -############################################################################### -# Convert pdf file to word document (docx) -############################################################################### - def pdf_to_word(self): - pdf_list = self.preprocess() - pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] - for pdf_file in pdf_list: - if pdf_file.lower().endswith("pdf"): - word_file = pdf_file[:-3] + "docx" - - try: - - parse(pdf_file, word_file, start=0, end=None) - - print(f'{GREEN}Converting to word..{RESET}', end='\r') - - logger.info(f"{DMAGENTA} Successfully converted{pdf_file} \ -to {word_file}{RESET}") - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - logger.info(f'{DRED}All conversion attempts have failed: \ -{e}{RESET}') - -############################################################################### -# Convert text file(s) to pdf document (docx) -############################################################################### - def txt_to_pdf(input_file, output_file): - """Convert a .txt file to a PDF.""" - - # Read the contents of the input .txt file - with open(input_file, 'r', encoding='utf-8') as file: - text_contents = file.readlines() - - # Initialize the PDF document - doc = SimpleDocTemplate(output_file, pagesize=letter) - - # Create a story to hold the elements of the PDF - story = [] - - # Iterate through each line in the input .txt file and add it to the PDF - for line in text_contents: - story.append(Paragraph(line.strip(), style="normalText")) - - # Build and write the PDF document - doc.build(story) - -############################################################################### -# Convert word file(s) to pptx document (pptx/ppt) -############################################################################### - def word_to_pptx(self): - word_list = self.preprocess() - word_list = [item for item in word_list if item.lower().endswith( - "docx") or item.lower().endswith("doc")] - - for word_file in word_list: - - if word_list is None: - print("Please provide appropriate file type") - sys.exit(1) - if word_file.lower().endswith("docx"): - pptx_file = word_file[:-4] + "pptx" - elif word_file.lower().endswith("doc"): - pptx_file = word_file[:-3] + "pptx" - try: - # Load the Word document - print(F"{DYELLOW}Load the Word document..{RESET}") - doc = Document(word_file) - - # Create a new PowerPoint presentation - print(F"{DYELLOW}Create a new PowerPoint presentation..{RESET}") - prs = Presentation() - - # Iterate through each paragraph in the Word document - print( - f"{DGREEN}Populating pptx slides with {DYELLOW}{len(doc.paragraphs)}{DGREEN} entries..{RESET}") - count = 0 - for paragraph in doc.paragraphs: - count += 1 - perc = (count/len(doc.paragraphs))*100 - print( - f"{DMAGENTA}Progress:: \033[1;36m{perc:.2f}%{RESET}", end="\r") - # Create a new slide in the PowerPoint presentation - slide = prs.slides.add_slide(prs.slide_layouts[1]) - - # Add the paragraph text to the slide - slide.shapes.title.text = paragraph.text - - # Save the PowerPoint presentation - prs.save(pptx_file) - print(f"\n{DGREEN}Done{RESET}") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - logger.error(e) - -############################################################################### -# Convert word file to txt file''' -############################################################################### - - def word_to_txt(self): - word_list = self.preprocess() - word_list = [item for item in word_list if item.lower().endswith( - "docx") or item.lower().endswith("doc")] - for file_path in word_list: - if file_path.lower().endswith("docx"): - txt_file = file_path[:-4] + "txt" - elif file_path.lower().endswith("doc"): - txt_file = file_path[:-3] + "txt" - try: - doc = Document(file_path) - print("INFO Processing...") - - with open(txt_file, 'w', encoding='utf-8') as f: - Par = 0 - for paragraph in doc.paragraphs: - f.write(paragraph.text + '\n') - Par += 1 - - print(f"Par:{BLUE}{Par}/{len(doc.paragraphs)}{RESET}", end='\r') - logger.info(f"{DMAGENTA}Conversion of file to txt success{RESET}") - - except KeyboardInterrupt: - print("\nExit") - sys.exit() - except Exception as e: - logger.error( - f"Dear user something went amiss while attempting the conversion:\n {e}") - with open("conversion.log", "a") as log_file: - log_file.write(f"Couldn't convert {file_path} to {txt_file}:\ -REASON->{e}") - -############################################################################### -# Convert pdf file to text file -############################################################################### - def pdf_to_txt(self): - pdf_list = self.preprocess() - pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] - for file_path in pdf_list: - txt_file = file_path[:-3] + "txt" - try: - with open(file_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - text = '' - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num] - text += page.extract_text() - with open(txt_file, 'w', encoding='utf-8') as f: - f.write(text) - logger.info(f"{DMAGENTA}Successfully converted {file_path} to \ -{txt_file}{RESET}") - except Exception as e: - logger.error( - f"Oops somethin went astray while converting {file_path} \ -to {txt_file}: {e}") - with open("conversion.log", "a") as log_file: - log_file.write( - f"Error converting {file_path} to {txt_file}: {e}\n") - -############################################################################### -# Convert ppt file to word document -############################################################################### - def ppt_to_word(self): - ppt_list = self.preprocess() - ppt_list = [item for item in ppt_list if item.lower().endswith( - "pptx") or item.lower().endswith("ppt")] - for file_path in ppt_list: - if file_path.lower().endswith("pptx"): - word_file = file_path[:-4] + "docx" - elif file_path.lower().endswith("ppt"): - word_file = file_path[:-3] + "docx" - try: - presentation = Presentation(file_path) - document = Document() - - for slide in presentation.slides: - for shape in slide.shapes: - if shape.has_text_frame: - text_frame = shape.text_frame - for paragraph in text_frame.paragraphs: - new_paragraph = document.add_paragraph() - for run in paragraph.runs: - new_run = new_paragraph.add_run(run.text) - # Preserve bold formatting - new_run.bold = run.font.bold - # Preserve italic formatting - new_run.italic = run.font.italic - # Preserve underline formatting - new_run.underline = run.font.underline - # Preserve font name - new_run.font.name = run.font.name - # Preserve font size - new_run.font.size = run.font.size - try: - # Preserve font color - new_run.font.color.rgb = run.font.color.rgb - except AttributeError: - # Ignore error and continue without - # setting the font color - pass - # Add a new paragraph after each slide - document.add_paragraph() - document.save(word_file) - logger.info(f"{DMAGENTA}Successfully converted {file_path} to \ - {word_file}{RESET}") - except Exception as e: - logger.error( - f"Oops somethin gwent awry while attempting to convert \ - {file_path} to {word_file}:\n>>>{e}") - with open("conversion.log", "a") as log_file: - log_file.write( - f"Oops something went astray while attempting \ - convert {file_path} to {word_file}:{e}\n") - -############################################################################### -# Convert text file to word -############################################################################### - def text_to_word(self): - flist = self.preprocess() - flist = [item for item in flist if item.lower().endswith("txt")] - for file_path in flist: - if file_path.lower().endswith("txt"): - word_file = file_path[:-3] + "docx" - - try: - # Read the text file - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: - text_content = file.read() - - # Filter out non-XML characters - filtered_content = re.sub( - r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]+', '', text_content) - - # Create a new Word document - doc = Document() - # Add the filtered text content to the document - doc.add_paragraph(filtered_content) - - # Save the document as a Word file - doc.save(word_file) - logger.info(f"{DMAGENTA}Successfully converted {file_path} to \ - {word_file}{RESET}") - except FileExistsError as e: - logger.error(f"{str(e)}") - except Exception as e: - logger.error( - f"Oops Unable to perfom requested conversion: {e}\n") - with open("conversion.log", "a") as log_file: - log_file.write( - f"Error converting {file_path} to {word_file}: \ -{e}\n") - -############################################################################### -# Convert xlsx file(s) to word file(s) -############################################################################### - def convert_xls_to_word(self): - xls_list = self.preprocess() - ls = ["xlsx", "xls"] - xls_list = [item for item in xls_list if any( - item.lower().endswith(ext) for ext in ls)] - print(F"{DGREEN}Initializing conversion sequence{RESET}") - for xls_file in xls_list: - if xls_file.lower().endswith("xlsx"): - word_file = xls_file[:-4] + "docx" - elif xls_file.lower().endswith("xls"): - word_file = xls_file[:-3] + "docx" - try: - '''Read the XLS file using pandas''' - - df = pd.read_excel(xls_file) - - '''Create a new Word document''' - doc = Document() - - '''Iterate over the rows of the dataframe and add them to the - Word document''' - logger.info(f"{ICYAN}Converting {xls_file}..{RESET}") - # time.sleep(2) - total_rows = df.shape[0] - for _, row in df.iterrows(): - current_row = _ + 1 - percentage = (current_row / total_rows)*100 - for value in row: - doc.add_paragraph(str(value)) - print(f"Row {DYELLOW}{current_row}/{total_rows} \ -{DBLUE}{percentage:.1f}%{RESET}", end="\r") - # print(f"\033[1;36m{row}{RESET}") - - # Save the Word document - doc.save(word_file) - print(F"{DGREEN}Conversion successful!{RESET}", end="\n") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - print("Oops Conversion failed:", str(e)) - -############################################################################### - '''Convert xlsx/xls file/files to text file format''' -############################################################################### - - def convert_xls_to_text(self): - xls_list = self.preprocess() - ls = ["xlsx", "xls"] - xls_list = [ - item for item in xls_list if any(item.lower().endswith(ext) - for ext in ls)] - print(F"{DGREEN}Initializing conversion sequence{RESET}") - for xls_file in xls_list: - if xls_file .lower().endswith("xlsx"): - txt_file = xls_file[:-4] + "txt" - elif xls_file .lower().endswith("xls"): - txt_file = xls_file[:-3] + "txt" - try: - # Read the XLS file using pandas - logger.info(f"Converting {xls_file}..") - df = pd.read_excel(xls_file) - - # Convert the dataframe to plain text - text = df.to_string(index=False) - chars = len(text) - words = len(text.split()) - lines = len(text.splitlines()) - - print( - f"Preparing to write: {DYELLOW}{chars} \033[1;30m \ -characters{DYELLOW} {words}\033[1;30m words {DYELLOW}{lines}\033[1;30m \ -lines {RESET}", end="\n") - # Write the plain text to the output file - with open(txt_file, 'w') as file: - file.write(text) - - print(F"{DGREEN}Conversion successful!{RESET}", end="\n") - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - print("Oops Conversion failed:", str(e)) - -############################################################################### - '''Convert xlsx/xls file to csv(comma seperated values) format''' -############################################################################### - - def convert_xlsx_to_csv(self): - xls_list = self.preprocess() - ls = ["xlsx", "xls"] - xls_list = [ - item for item in xls_list if any(item.lower().endswith(ext) - for ext in ls)] - for xls_file in xls_list: - if xls_file.lower().endswith("xlsx"): - csv_file = xls_file[:-4] + "csv" - elif xls_file.lower().endswith("xls"): - csv_file = xls_file[:-3] + "csv" - try: - '''Load the Excel file''' - print(F"{DGREEN}Initializing conversion sequence{RESET}") - df = pd.read_excel(xls_file) - logger.info(f"Converting {xls_file}..") - total_rows = df.shape[0] - print(f"Writing {DYELLOW}{total_rows} rows {RESET}", end="\n") - for i in range(101): - print(f"Progress: {i}%", end="\r") - '''Save the DataFrame to CSV''' - df.to_csv(csv_file, index=False) - print(F"{DMAGENTA} Conversion successful{RESET}") - except KeyboardInterrupt: - print("Exiting") - sys.exit(1) - except Exception as e: - print(e) - -############################################################################### -# Convert xlsx file(s) to sqlite -############################################################################### - - def convert_xlsx_to_database(self): - xlsx_list = self.preprocess() - ls = ["xlsx", "xls"] - xlsx_list = [ - item for item in xlsx_list if any(item.lower().endswith(ext) - for ext in ls)] - for xlsx_file in xlsx_list: - if xlsx_file.lower().endswith("xlsx"): - sqlfile = xlsx_file[:-4] - elif xlsx_file.lower().endswith("xls"): - sqlfile = xlsx_file[:-3] - try: - db_file = input( - F"{DBLUE}Please enter desired sql filename: {RESET}") - table_name = input( - "Please enter desired table name: ") - # res = ["db_file", "table_name"] - if any(db_file) == "": - db_file = sqlfile + "sql" - table_name = sqlfile - if not db_file.endswith(".sql"): - db_file = db_file + ".sql" - column = 0 - for i in range(20): - column += 0 - # Read the Excel file into a pandas DataFrame - print(f"Reading {xlsx_file}...") - df = pd.read_excel(xlsx_file) - print(f"{DGREEN}Initializing conversion sequence{RESET}") - print(f"{DGREEN} Connected to sqlite3 database::{RESET}") - # Create a connection to the SQLite database - conn = sqlite3.connect(db_file) - print(F"{DYELLOW} Creating database table::{RESET}") - # Insert the DataFrame into a new table in the database - df.to_sql(table_name, column, conn, - if_exists='replace', index=False) - print( - f"Operation successful{RESET} file saved as \033[32{db_file}{RESET}") - # Close the database connection - conn.close() - except KeyboardInterrupt: - print("\nExiting") - sys.exit(1) - except Exception as e: - logger.error(f"{e}") - -############################################################################### -# Create image objects from given files -############################################################################### - def doc2image(self, outf="png"): - outf_list = ['png', 'jpg'] - if outf not in outf_list: - outf = "png" - path_list = self.preprocess() - ls = ["pdf", "doc", "docx"] - file_list = [ - item for item in path_list if any(item.lower().endswith(ext) - for ext in ls)] - imgs = [] - for file in file_list: - if file.lower().endswith("pdf"): - # Convert the PDF to a list of PIL image objects - print("Generate image objects ..") - images = convert_from_path(file) - - # Save each image to a file - fname = file[:-4] - print(f"{YELLOW}Target images{BLUE} {len(images)}{RESET}") - for i, image in enumerate(images): - print(f"{DBLUE}{i}{RESET}", end="\r") - yd = f"{fname}_{i+1}.{outf}" - image.save(yd) - imgs.append(yd) - print(f"{GREEN}Ok{RESET}") - - return imgs - - -class Scanner: - - def __init__(self, input_file): - self.input_file = input_file - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def scanPDF(self): - pdf_list = self.preprocess() - pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] - - for pdf in pdf_list: - out_f = pdf[:-3] + 'txt' - print(f"{YELLOW}Read pdf ..{RESET}") - - with open(pdf, 'rb') as f: - reader = PyPDF2.PdfReader(f) - text = '' - - pg = 0 - for page_num in range(len(reader.pages)): - pg += 1 - - print(f"{DYELLOW}Progress:{RESET}", end="") - print(f"{CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r") - page = reader.pages[page_num] - text += page.extract_text() - - print(f"\n{text}") - print(F"\n{YELLOW}Write text to {GREEN}{out_f}{RESET}") - with open(out_f, 'w') as f: - f.write(text) - - print(F"{DGREEN}Ok{RESET}") - - def scanAsImgs(self): - file = self.input_file - mc = MakeConversion(file) - img_objs = mc.doc2image() - # print(img_objs) - from .OCRTextExtractor import ExtractText - text = '' - for i in img_objs: - extract = ExtractText(i) - tx = extract.OCR() - if tx is not None: - text += tx - print(text) - print(f"{GREEN}Ok{RESET}") - return text - - -class FileSynthesis: - - def __init__(self, input_file): - self.input_file = input_file - # self.CHUNK_SIZE = 20_000 - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - @staticmethod - def join_audios(files, output_file): - masterfile = output_file + "_master.mp3" - print( - f"{DBLUE}Create a master file {DMAGENTA}{masterfile}{RESET}", end='\r') - # Create a list to store files - ogg_files = [] - # loop through the directory while adding the ogg files to the list - print(files) - for filename in files: - print(f"Join {DBLUE}{len(files)}{RESET} files") - # if filename.endswith('.ogg'): - # ogg_file = os.path.join(path, filename) - ogg_files.append(AudioSegment.from_file(filename)) - - # Concatenate the ogg files - combined_ogg = ogg_files[0] - for i in range(1, len(files)): - combined_ogg += ogg_files[i] - - # Export the combined ogg to new mp3 file or ogg file - combined_ogg.export(output_file + "_master.ogg", format='ogg') - print(F"{DGREEN}Master file:Ok {RESET}") - - def Synthesise(self, text: str, output_file: str, CHUNK_SIZE: int = 20_000, ogg_folder: str = 'tempfile', retries: int = 5) -> None: - """Converts given text to speech using Google Text-to-Speech API.""" - out_ls = [] - try: - if not os.path.exists(ogg_folder): - os.mkdir(ogg_folder) - print(f"{DYELLOW}Get initial net speed..{RESET}") - st = speedtest.Speedtest() # get initial network speed - st.get_best_server() - download_speed: float = st.download() # Keep units as bytes - logger.info( - - f"{GREEN} Conversion to mp3 sequence initialized start\ -speed {CYAN}{download_speed/1_000_000:.2f}Kbps{RESET}") - - for attempt in range(retries): - try: - '''Split input text into smaller parts and generate - individual gTTS objects''' - counter = 0 - for i in range(0, len(text), CHUNK_SIZE): - chunk = text[i:i+CHUNK_SIZE] - output_filename = f"{output_file}_{counter}.ogg" - counter += 1 - # print(output_filename) - if os.path.exists(output_filename): - output_filename = f"{output_file}_{counter+1}.ogg" - # print(output_filename) - tts = gTTS(text=chunk, lang='en', slow=False) - tts.save(output_filename) - out_ls.append(output_filename) - break - # print(out_ls) - '''Handle any network related issue gracefully''' - except Exception in (ConnectionError, ConnectionAbortedError, - ConnectionRefusedError, - ConnectionResetError) as e: - logger.error(f"Sorry boss connection problem encountered: {e} in {attempt+1}/{retries}:") - time.sleep(5) # Wait 5 seconds before retrying - - # Handle connectivity/network error - except requests.exceptions.RequestException as e: - logger.error(f"{e}") - except Exception as e: - logger.error(f'{DRED} Error during conversion attempt \ -{attempt+1}/{retries}:{e}{RESET}') - tb = traceback.extract_tb(sys.exc_info()[2]) - logger.info("\n".join([f" > {line}" - for line in map(str, tb)])) - time.sleep(3) # Wait 5 seconds before retrying - pass - - if attempt >= retries: - logger.error( - f"Conversion unsuccessful after {retries} attempts.") - sys.exit(2) - - finally: - # print(out_ls) - # Combine generated gTTS objects - if len(out_ls) >= 1: - FileSynthesis.join_audios(out_ls, output_file) - - st = speedtest.Speedtest() - logger.info("Done") - print("Get final speed ...") - logger.info( - - f"{YELLOW}Final Network Speed: {st.download()/(10**6):.2f} Kbps{RESET}") - - @staticmethod - def pdf_to_text(pdf_path): - logger.info('''Processing the file...\n''') - logger.info( - F'{GREEN} Initializing pdf to text conversion sequence...{RESET}') - try: - with open(pdf_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - text = '' - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num] - text += page.extract_text() - print(F"{DGREEN}Ok{RESET}") - return text - except Exception as e: - logger.error( - f"{DRED}Failed to extract text from '{YELLOW}{pdf_path}'{RESET}:\n {e}") - - @staticmethod - def text_file(input_file): - try: - with open(input_file, 'r', errors='ignore') as file: - text = file.read().replace('\n', ' ') - return text - except FileNotFoundError: - logger.error("File '{}' was not found.".format(input_file)) - except Exception as e: - logger.error( - F"{DRED}Error converting {input_file} to text: {str(e)}\ -{RESET}") - - @staticmethod - def docx_to_text(docx_path): - try: - logger.info(f"{BLUE} Converting {docx_path} to text...{RESET}") - doc = Document(docx_path) - paragraphs = [paragraph.text for paragraph in doc.paragraphs] - return '\n'.join(paragraphs) - except FileNotFoundError: - logger.error(f"File '{docx_path}' was not found.") - except Exception as e: - logger.error( - F"{DRED}Error converting {docx_path} to text: {e}\ -{RESET}") - - '''Handle input files based on type to initialize conversion sequence''' - - def audiofy(self): - input_list = self.preprocess() - extdoc = ["docx", "doc"] - ls = {"pdf", "docx", "doc", "txt"} - input_list = [item for item in input_list if item.lower().endswith(tuple(ls))] - for input_file in input_list: - if input_file.endswith('.pdf'): - text = FileSynthesis.pdf_to_text(input_file) - output_file = input_file[:-4] - - elif input_file.lower().endswith(tuple(extdoc)): - - text = FileSynthesis.docx_to_text(input_file) - output_file = input_file[:-5] - - elif input_file.endswith('.txt'): - text = FileSynthesis.text_file(input_file) - output_file = input_file[:-4] - - else: - logger.error('Unsupported file format. Please provide \ -a PDF, txt, or Word document.') - sys.exit(1) - try: - FileSynthesis.Synthesise(None, text, output_file) - except KeyboardInterrupt: - sys.exit(1) - - -############################################################################### -# Convert video file to from one format to another''' -############################################################################### - - -class VideoConverter: - - def __init__(self, input_file, out_format): - self.input_file = input_file - self.out_format = out_format - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def CONVERT_VIDEO(self): - try: - input_list = self.preprocess() - out_f = self.out_format.upper() - input_list = [item for item in input_list if any( - item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS)] - print(F"{DYELLOW}Initializing conversion..{RESET}") - - for file in input_list: - if out_f.upper() in SUPPORTED_VIDEO_FORMATS: - _, ext = os.path.splitext(file) - output_filename = _ + '.' + out_f.lower() - print(output_filename) - else: - print("Unsupported output format") - sys.exit(1) - format_codec = { - "MP4": "mpeg4", - "AVI": "rawvideo", - # "OGV": "avc", - "WEBM": "libvpx", - "MOV": "mpeg4", - "MKV": "MPEG4", - "FLV": "flv" - # "WMV": "WMV" - } - '''Load the video file''' - print(f"{DBLUE}oad file{RESET}") - video = VideoFileClip(file) - '''Export the video to a different format''' - print(f"{DMAGENTA}Converting file to {output_filename}{RESET}") - video.write_videofile( - output_filename, codec=format_codec[out_f]) - '''Close the video file''' - print(f"{DGREEN}Done{RESET}") - video.close() - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) - except Exception as e: - print(e) - - -############################################################################### -# Convert Audio file to from one format to another''' -############################################################################### - - -class AudioConverter: - - def __init__(self, input_file, out_format): - self.input_file = input_file - self.out_format = out_format - - def preprocess(self): - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - - def pydub_conv(self): - input_list = self.preprocess() - out_f = self.out_format - input_list = [item for item in input_list if any( - item.lower().endswith(ext) for ext in SUPPORTED_AUDIO_FORMATS)] - print(F"{DYELLOW}Initializing conversion..{RESET}") - for file in input_list: - if out_f.lower() in SUPPORTED_AUDIO_FORMATS: - _, ext = os.path.splitext(file) - output_filename = _ + '.' + out_f - else: - print("Unsupported output format") - sys.exit(1) - fmt = ext[1:] - print(fmt, out_f) - audio = pydub.AudioSegment.from_file(file, fmt) - print(f"{DMAGENTA}Converting to {output_filename}{RESET}") - audio.export(output_filename, format=out_f) - # new_audio = pydub.AudioSegment.from_file('output_audio.') - print(f"{DGREEN}Done{RESET}") - # play(new_audio) - # new_audio.close() - - -############################################################################### -# Convert images file to from one format to another -############################################################################### - - -class ImageConverter: - - def __init__(self, input_file, out_format): - self.input_file = input_file - self.out_format = out_format - - def preprocess(self): - try: - files_to_process = [] - - if os.path.isfile(self.input_file): - files_to_process.append(self.input_file) - elif os.path.isdir(self.input_file): - if os.listdir(self.input_file) is None: - print("Cannot work with empty folder") - sys.exit(1) - for file in os.listdir(self.input_file): - file_path = os.path.join(self.input_file, file) - if os.path.isfile(file_path): - files_to_process.append(file_path) - - return files_to_process - except FileNotFoundError: - print("File not found") - sys.exit(1) - - def convert_image(self): - try: - input_list = self.preprocess() - out_f = self.out_format.upper() - - input_list = [item for item in input_list if any( - item.lower().endswith(ext) for ext in SUPPORTED_IMAGE_FORMATS[out_f])] - for file in input_list: - print(file) - if out_f.upper() in SUPPORTED_IMAGE_FORMATS: - _, ext = os.path.splitext(file) - output_filename = _ + \ - SUPPORTED_IMAGE_FORMATS[out_f].lower() - else: - print("Unsupported output format") - sys.exit(1) - '''Load the image using OpenCV: ''' - print(F"{DYELLOW}Reading input image..{RESET}") - img = cv2.imread(file) - '''Convert the OpenCV image to a PIL image: ''' - print(f"{DMAGENTA}Converting to PIL image{RESET}") - pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - '''Save the PIL image to a different format: ''' - print(f"\033[1;36mSaving image as {output_filename}{RESET}") - pil_img.save(output_filename, out_f) - print(f"{DGREEN}Done{RESET}") - '''Load the image back into OpenCV: ''' - print(f"{DMAGENTA}Load and display image{RESET}") - opencv_img = cv2.imread(output_filename) - '''Display the images: ''' - cv2.imshow('OpenCV Image', opencv_img) - # pil_img.show() - '''Wait for the user to press a key and close the windows: ''' - cv2.waitKey(0) - cv2.destroyAllWindows() - except KeyboardInterrupt: - print("\nExiting..") - sys.exit(1) diff --git a/filemac/dd.py b/filemac/dd.py deleted file mode 100644 index 90fbe1f..0000000 --- a/filemac/dd.py +++ /dev/null @@ -1,10 +0,0 @@ -from OCRTextExtractor import ExtractText -img_objs = ['/home/skye/Software Engineering/Y2/SEM2/RV/SPE 2210 Client Side Programming Year II Semester II_1.png'] -text = '' -for i in img_objs: - extract = ExtractText(i) - tx = extract.OCR() - print(tx) - if tx is not None: - text += tx -print(text) diff --git a/filemac/fmac.py b/filemac/fmac.py deleted file mode 100644 index 91b28ba..0000000 --- a/filemac/fmac.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python3.11.7 -# multimedia_cli/main.py -import argparse -import logging -import logging.handlers -import sys - -from . import handle_warnings -from .AudioExtractor import ExtractAudio -from .colors import (RESET, DYELLOW) -from .converter import (AudioConverter, FileSynthesis, ImageConverter, - MakeConversion, Scanner, VideoConverter) -from .formats import (SUPPORTED_AUDIO_FORMATS_SHOW, SUPPORTED_DOC_FORMATS, - SUPPORTED_IMAGE_FORMATS_SHOW, - SUPPORTED_VIDEO_FORMATS_SHOW) -from .image_op import Compress_Size -from .OCRTextExtractor import ExtractText -from .Simple_v_Analyzer import SA - -# from .formats import SUPPORTED_INPUT_FORMATS, SUPPORTED_OUTPUT_FORMATS -handle_warnings -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class Eval: - - def __init__(self, file, outf): - self.file = file - self.outf = outf - - def document_eval(self): - ls = ["docx", "doc"] - sheetls = ["xlsx", "xls"] - try: - conv = MakeConversion(self.file) - if self.file.lower().endswith(tuple(sheetls)): - if self.outf.lower() == "csv": - conv.convert_xlsx_to_csv() - elif self.outf.lower() == "txt": - conv.convert_xls_to_text() - elif self.outf.lower() == "doc" or self.outf == "docx": - conv.convert_xls_to_word() - elif self.outf.lower() == "db": - conv.convert_xlsx_to_database() - - elif self.file.lower().endswith(tuple(ls)): - if self.outf.lower() == "txt": - conv.word_to_txt() - elif self.outf.lower() == "pdf": - conv.word_to_pdf() - elif self.outf.lower() == "pptx": - conv.word_to_pptx() - elif self.outf.lower() == "audio" or self.outf.lower() == "ogg": - conv = FileSynthesis(self.file) - conv.audiofy() - - elif self.file.endswith('txt'): - if self.outf.lower() == "pdf": - conv.txt_to_pdf() - elif self.outf.lower() == "doc" or self.outf == "docx" or self.outf == "word": - conv.text_to_word() - elif self.outf.lower() == "audio" or self.outf.lower() == "ogg": - conv = FileSynthesis(self.file) - conv.audiofy() - - elif self.file.lower().endswith('ppt') or self.file.lower().endswith('pptx'): - if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word": - conv.ppt_to_word() - - elif self.file.lower().endswith('pdf'): - if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word": - conv.pdf_to_word() - elif self.outf.lower() == "txt": - conv.pdf_to_txt() - elif self.outf.lower() == "audio" or self.outf.lower() == "ogg": - conv = FileSynthesis(self.file) - conv.audiofy() - - else: - print(f"{DYELLOW}Unsupported Conversion type{RESET}") - except Exception as e: - logger.error(e) - - -def main(): - parser = argparse.ArgumentParser( - description="Multimedia Element Operations") - - parser.add_argument( - "--convert_doc", help=f"Converter document file(s) to different format ie pdf_to_docx.\ - example {DYELLOW}filemac --convert_doc example.docx -t pdf{RESET}") - - parser.add_argument( - "--convert_audio", help=f"Convert audio file(s) to and from different format ie mp3 to wav\ - example {DYELLOW}filemac --convert_audio example.mp3 -t wav{RESET}") - - parser.add_argument( - "--convert_video", help=f"Convert video file(s) to and from different format ie mp4 to mkv.\ - example {DYELLOW}filemac --convert_video example.mp4 -t mkv{RESET}") - - parser.add_argument( - "--convert_image", help=f"Convert image file(s) to and from different format ie png to jpg.\ - example {DYELLOW}filemac --convert_image example.jpg -t png{RESET}") - - parser.add_argument( - - "--convert_doc2image", help=f"Convert documents to images ie png to jpg.\ - example {DYELLOW}filemac --convert_doc2image example.pdf -t png{RESET}") - - parser.add_argument("-xA", "--extract_audio", - help=f"Extract audio from a video.\ - example {DYELLOW}filemac -xA example.mp4 {RESET}") - - parser.add_argument( - "-Av", "--Analyze_video", help=f"Analyze a given video.\ - example {DYELLOW}filemac --analyze_video example.mp4 {RESET}") - - parser.add_argument("-t", "--target_format", - help="Target format for conversion (optional)") - - parser.add_argument( - "--resize_image", help=f"change size of an image compress/decompress \ - example {DYELLOW}filemac --resize_image example.png -t png {RESET}") - - parser.add_argument("-t_size", help="used in combination with resize_image \ - to specify target image size") - - parser.add_argument( - "-S", "--scan", help=f"Scan pdf file and extract text\ - example {DYELLOW}filemac --scan example.pdf {RESET}") - - parser.add_argument( - "-SA", "--scanAsImg", help=f"Scan pdf file and extract text\ - example {DYELLOW}filemac --scanAsImg example.pdf {RESET}") - - parser.add_argument("--OCR", help=f"Extract text from an image.\ - example {DYELLOW}filemac --OCR image.png{RESET}") - - args = parser.parse_args() - - -# Call function to handle document conversion inputs before begining conversion - if args.convert_doc == 'help': - print(SUPPORTED_DOC_FORMATS) - sys.exit(1) - if args.convert_doc: - ev = Eval(args.convert_doc, args.target_format) - ev.document_eval() - - -# Call function to handle video conversion inputs before begining conversion - elif args.convert_video: - if args.convert_video == 'help' or args.convert_video is None: - print(SUPPORTED_VIDEO_FORMATS_SHOW) - sys.exit(1) - ev = VideoConverter(args.convert_video, args.target_format) - ev.CONVERT_VIDEO() -# Call function to handle image conversion inputs before begining conversion - - elif args.convert_image: - if args.convert_image == 'help' or args.convert_image is None: - print(SUPPORTED_IMAGE_FORMATS_SHOW) - sys.exit(1) - conv = ImageConverter(args.convert_image, args.target_format) - conv.convert_image() - -# Handle image resizing - elif args.resize_image: - res = Compress_Size(args.resize_image) - res.resize_image(args.t_size) - -# Handle documents to images conversion - elif args.convert_doc2image: - conv = MakeConversion(args.convert_doc2image) - conv.doc2image(args.target_format) - -# Call function to handle audio conversion inputs before begining conversion - elif args.convert_audio: - if args.convert_audio == 'help' or args.convert_audio is None: - print(SUPPORTED_AUDIO_FORMATS_SHOW) - sys.exit(1) - ev = AudioConverter(args.convert_audio, args.target_format) - ev.pydub_conv() - - -# Call module to evaluate audio files before making audio extraction from input video files conversion - elif args.extract_audio: - vi = ExtractAudio(args.extract_audio) - vi.moviepyextract() - -# Call module to scan the input and extract text - elif args.scan: - sc = Scanner(args.scan) - sc.scanPDF() - -# Call module to scan the input FILE as image object and extract text - elif args.scanAsImg: - sc = Scanner(args.scanAsImg) - tx = sc.scanAsImgs() -# Call module to handle Candidate images for text extraction inputs before begining conversion - elif args.OCR: - conv = ExtractText(args.OCR) - conv.OCR() - - elif args.Analyze_video: - analyzer = SA(args.Analyze_video) - analyzer.SimpleAnalyzer() - - -if __name__ == "__main__": - main() diff --git a/filemac/formats.py b/filemac/formats.py deleted file mode 100644 index 6490294..0000000 --- a/filemac/formats.py +++ /dev/null @@ -1,121 +0,0 @@ -# multimedia_cli/formats.py -from .colors import CYAN, DBLUE, DMAGENTA, DYELLOW, RESET - -SUPPORTED_DOC_FORMATS = f""" -|--------------------------------------------------------------------------- -|{DBLUE}Input format{RESET} |{DBLUE}Output format{RESET} | -|________________________________|__________________________________________| -| xlsx {DYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) | -| | | -| doc/docx{DYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) | -| | | -| txt {DYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) | -| | | -| pdf {DYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) | -| | | -| pptx/ppt{DYELLOW}-------------------->{RESET}|doc/docx | -| | -|___________________________________________________________________________| -""" - - -def p(): - print(SUPPORTED_DOC_FORMATS) - - -# Add supported input and output formats for each media type -SUPPORTED_AUDIO_FORMATS = ["wav", # Waveform Audio File Format - "mp3", # MPEG Audio Layer III - "ogg", - "flv", - "ogv", - "webm", - "aac", # Advanced Audio Codec - "bpf", - "aiff", - "flac"] # Free Lossless Audio Codec) - -SUPPORTED_AUDIO_FORMATS_SHOW = f''' -|==============================| -| {DBLUE}Supported I/O formats {RESET} | -|==============================| -| {CYAN} wav {DYELLOW} | -| {CYAN} mp3 {DYELLOW} | -| {CYAN} ogg {DYELLOW} | -| {CYAN} flv {DYELLOW} | -| {CYAN} ogv {DYELLOW} | -| {CYAN} matroska {DYELLOW} | -| {CYAN} mov {DYELLOW} | -| {CYAN} webm {DYELLOW} | -| {CYAN} aac {DYELLOW} | -| {CYAN} bpf {DYELLOW} | --------------------------------- - -''' - -SUPPORTED_VIDEO_FORMATS = ["MP4", # MPEG-4 part 14 - "AVI", # Audio Video Interleave - "OGV", - "WEBM", - "MOV", # QuickTime Movie - "MKV", # Matroska Multimedia Container - MKV is known for its support of high-quality content. - "FLV", # - "WMV"] - -SUPPORTED_VIDEO_FORMATS_SHOW = f''' -,_______________________________________, -|x| {DBLUE}Supported I/O formats{RESET} |x| -|x|-----------------------------------{DYELLOW}|x| -|x| {DMAGENTA} MP4 {DYELLOW} |x| -|x| {DMAGENTA} AVI {DYELLOW} |x| -|x| {DMAGENTA} OGV {DYELLOW} |x| -|x| {DMAGENTA} WEBM{DYELLOW} |x| -|x| {DMAGENTA} MOV {DYELLOW} |x| -|x| {DMAGENTA} MKV {DYELLOW} |x| -|x| {DMAGENTA} FLV {DYELLOW} |x| -|x| {DMAGENTA} WMV {DYELLOW} |x| -|,|___________________________________|,|{DYELLOW} -''' - -SUPPORTED_IMAGE_FORMATS = { - "JPEG": ".jpg", # Joint Photographic Experts Group -Lossy compression - "PNG": ".png", # Joint Photographic Experts Group - not lossy - "GIF": ".gif", # Graphics Interchange Format - "BM": ".bmp", - "BMP": ".dib", - "DXF": ".dxf", # Autocad format 2D - "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression - "EXR": ".exr", - "pic": ".pic", - "pict": "pct", - "PDF": ".pdf", - "WebP": ".webp", - "ICNS": ".icns", - "PSD": ".psd", - "SVG": ".svg", # Scalable vector Graphics - "EPS": ".eps", - "PostSciript": ".ps", - "PS": ".ps"} - -SUPPORTED_IMAGE_FORMATS_SHOW = f''' -__________________________________________ -|x|{DBLUE}Supported I/O formats{RESET} |x| -|x|_____________________________________{DYELLOW}|x| -|x| {DMAGENTA} JPEG {DYELLOW} |x| -|x| {DMAGENTA} PNG {DYELLOW} |x| -|x| {DMAGENTA} GIF {DYELLOW} |x| -|x| {DMAGENTA} BM {DYELLOW} |x| -|x| {DMAGENTA} TIFF {DYELLOW} |x| -|x| {DMAGENTA} EXR {DYELLOW} |x| -|x| {DMAGENTA} PDF {DYELLOW} |x| -|x| {DMAGENTA} WebP{DYELLOW} |x| -|x| {DMAGENTA} ICNS {DYELLOW} |x| -|x| {DMAGENTA} PSD {DYELLOW} |x| -|x| {DMAGENTA} SVG {DYELLOW} |x| -|x| {DMAGENTA} EPS {DYELLOW} |x| -|x| {DMAGENTA} Postscript {DYELLOW} |x| -|_|_____________________________________|x| -''' - -SUPPORTED_DOCUMENT_FORMATS = ['pdf', 'doc', 'docx', 'csv', 'xlsx', 'xls', - 'ppt', 'pptx', 'txt', 'ogg', 'mp3', 'audio'] diff --git a/filemac/handle_warnings.py b/filemac/handle_warnings.py deleted file mode 100644 index 3e592d1..0000000 --- a/filemac/handle_warnings.py +++ /dev/null @@ -1,6 +0,0 @@ -import warnings - -warnings.simplefilter("ignore", RuntimeWarning) -with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message="Your system is avx2 capable but pygame was not built with support for it.", category=RuntimeWarning) diff --git a/filemac/image_op.py b/filemac/image_op.py deleted file mode 100644 index 61cfe6d..0000000 --- a/filemac/image_op.py +++ /dev/null @@ -1,64 +0,0 @@ -from PIL import Image -import os -import logging -import logging.handlers - -logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s') -logger = logging.getLogger(__name__) - - -class Compress_Size: - - def __init__(self, input_image_path): - self.input_image_path = input_image_path - - def resize_image(self, target_size): - ext = input_image_path[-3:] - output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}" - - original_image = Image.open(input_image_path) - original_size = original_image.size - size = os.path.getsize(input_image_path) - print(f"Original image size \033[93m{size/1000_000:.2f}MiB") - - # Calculate the aspect ratio of the original image - aspect_ratio = original_size[0] / original_size[1] - - # Convert the target sixze to bytes - tz = int(target_size[:-2]) - if target_size[-2:].lower() == 'mb': - target_size_bytes = tz * 1024 * 1024 - elif target_size[-2:].lower() == 'kb': - target_size_bytes = tz * 1024 - else: - logger.warning("Invalid units. Please use either \033[1;95m'MB'\033[0m\ - or \033[1;95m'KB'\033[0m") - - # Calculate the new dimensions based on the target size - new_width, new_height = Compress_Size.calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes) - print("\033[94mProcessing ..\033[0m") - resized_image = original_image.resize((new_width, new_height)) - resized_image.save(output_image_path) - t_size = os.path.getsize(output_image_path)/1000_000 - print("\033[1;92mOk\033[0m") - print(f"Image resized to \033[1;93m{t_size:.2f}\033[0m and saved to \033[1;93m{output_image_path}") - - def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes): - # Calculate the new dimensions based on the target size in bytes - original_size_bytes = original_size[0] * original_size[1] * 3 # Assuming 24-bit color depth - scale_factor = (target_size_bytes / original_size_bytes) ** 0.5 - - new_width = int(original_size[0] * scale_factor) - new_height = int(original_size[1] * scale_factor) - - return new_width, new_height - - -if __name__ == "__main__": - input_image_path = input("Enter the path to the input image: ") - target_size = input("Enter the target output size (MB or KB): ") - ext = input_image_path[-3:] - output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}" - - init = Compress_Size(input_image_path) - init.resize_image(target_size) diff --git a/filewarp/__init__.py b/filewarp/__init__.py new file mode 100644 index 0000000..3445a81 --- /dev/null +++ b/filewarp/__init__.py @@ -0,0 +1,78 @@ +""" +....//////// /// /// ///////// /////// ////// ////// //////// + // /// /// // // // // // // // /// + ///////// /// /// //////// // /// // //----// // + // /// //////// //_____ // // // // ////////// + +Converter document file(s) to different format ie pdf_to_docx. + example filewarp --convert_doc example.docx -t pdf + +Convert audio file(s) to and from different format ie mp3 to wav + example filewarp --convert_audio example.mp3 -t wav + +Convert video file(s) to and from different format ie mp4 to mkv. + example filewarp --convert_video example.mp4 -t mkv + +Convert image file(s) to and from different format ie png to jpg. + example filewarp --convert_image example.jpg -t png + +Extract audio from a video. example filewarp -xA example.mp4 + +Analyze a given video. + example filewarp --analyze_video example.mp4 + +hange size of an image compress/decompress + example filewarp --resize_image example.png -t_size 2mb -t png + +Scan pdf file and extract text + example filewarp --scan example.pdf + +Convert pdf file to long image + example filewarp --doc_long_image example.pdf + +Scan [doc, docx, pdf] + file and extract text,-> very effective + example filewarp --scanAsImg example.pdf + +Extract text from an image. + example filewarp --OCR image.png +""" + +from audiobot.cli import cli as audiobot + +from .core.image.core import ( + GrayscaleConverter, + ImageCompressor, + ImageConverter, + ImageDocxConverter, + ImagePdfConverter, +) +from .core.pdf.core import PageExtractor, PDF2LongImageConverter, PDFCombine +from .core.recorder import SoundRecorder +from .core.video.core import VideoConverter + +# from .cli.main import CliInit as main, OperationMapper +from .cli.cli import main +from .cli.converter import DocumentConverter +from voice.VoiceType import VoiceTypeEngine + + +__version__ = "2.1.2" + +__all__ = [ + "audiobot", + "GrayscaleConverter", + "ImageConverter", + "ImageCompressor", + "PDF2LongImageConverter", + "ImagePdfConverter", + "ImageDocxConverter", + "PDFCombine", + "PageExtractor", + "VideoConverter", + "SoundRecorder", + "DocumentConverter", + # "OperationMapper", + "VoiceTypeEngine", + "main", +] diff --git a/filewarp/cli/__init__.py b/filewarp/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/filewarp/cli/_entry_.py b/filewarp/cli/_entry_.py new file mode 100644 index 0000000..b290097 --- /dev/null +++ b/filewarp/cli/_entry_.py @@ -0,0 +1,18 @@ +from rich.console import Console +from rich.theme import Theme + +# Initialize rich console with custom theme +custom_theme = Theme( + { + "info": "cyan", + "warning": "yellow", + "error": "red bold", + "success": "green bold", + "file": "blue", + "dir": "magenta", + "command": "yellow bold", + "arg": "cyan", + } +) + +console = Console(theme=custom_theme) diff --git a/filewarp/cli/banners.py b/filewarp/cli/banners.py new file mode 100644 index 0000000..2737253 --- /dev/null +++ b/filewarp/cli/banners.py @@ -0,0 +1,145 @@ +from rich.console import Console +from rich.panel import Panel +from rich.align import Align + +import importlib.metadata + +try: + __version__ = importlib.metadata.version("filewarp") +except importlib.metadata.PackageNotFoundError: + __version__ = "unknown" + +# ASCII Art Banner +BANNER_old = f""" +╔══════════════════════════════════════════════════════════════╗ +║ ║ +║ ███████╗██╗██╗ ███████╗███╗ ███╗ █████╗ ██████╗ ║ +║ ██╔════╝██║██║ ██╔════╝████╗ ████║██╔══██╗██╔════╝ ║ +║ █████╗ ██║██║ █████╗ ██╔████╔██║███████║██║ ║ +║ ██╔══╝ ██║██║ ██╔══╝ ██║╚██╔╝██║██╔══██║██║ ║ +║ ██║ ██║███████╗███████╗██║ ╚═╝ ██║██║ ██║╚██████╗ ║ +║ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ║ +║ ║ +║ File Management & Conversion ║ +║ Version {__version__} • 2026 ║ +╚══════════════════════════════════════════════════════════════╝ +""" + + +BANNER = f""" +╔══════════════════════════════════════════════════════════════════════════════╗ +║ ║ +║ ███████╗██╗██╗ ███████╗██╗ ██╗ █████╗ ██████╗ ██████╗ ║ +║ ██╔════╝██║██║ ██╔════╝██║ ██║██╔══██╗██╔══██╗██╔══██╗ ║ +║ █████╗ ██║██║ █████╗ ██║ █╗ ██║███████║██████╔╝██████╔╝ ║ +║ ██╔══╝ ██║██║ ██╔══╝ ██║███╗██║██╔══██║██╔══██╗██╔═══╝ ║ +║ ██║ ██║███████╗███████╗╚███╔███╔╝██║ ██║██║ ██║██║ ║ +║ ╚═╝ ╚═╝╚══════╝╚══════╝ ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ║ +║ ║ +║ ║ +║ File Warpping & Conversion Tool ║ +║ Version {__version__} • 2026 ║ +║ ║ +║ ╔══════════════════════════════════════════════════════╗ ║ +║ ║ Warp • Convert • Compress • Encrypt • Transform ║ ║ +║ ╚══════════════════════════════════════════════════════╝ ║ +║ ║ +╚══════════════════════════════════════════════════════════════════════════════╝ +""" + +# Modern Minimalist Banner for FileWarp + +BANNER_v1 = f""" +╭──────────────────────────────────────────────────────────────────────────────╮ +│ │ +│ ┌─┐┬ ┬┌┐┌┬─┐┌┬┐┌─┐┬ ┬ ┬ ┬┌─┐┬ ┬┌─┐ │ +│ │ │ ││││├┬┘ ││├┤ └┬┘ │││├─┤└┬┘├┤ │ +│ └─┘└─┘┘└┘┴└──┴┘└─┘ ┴ └┴┘┴ ┴ ┴ └─┘ │ +│ │ +│ ╭──────────────────────────────────────────────────────────────────────╮ │ +│ │ FILE WRAP v{__version__} │ │ +│ │ The Ultimate File Warpping Solution │ │ +│ ╰──────────────────────────────────────────────────────────────────────╯ │ +│ │ +│ 📦 Wrap files in containers 🔒 Encrypt sensitive data │ +│ 🗜️ Compress with smart algorithms 🔄 Convert between formats │ +│ 📤 Batch process thousands of files 🎨 Preserve metadata │ +│ │ +│ ⚡ "Wrap it once, use it everywhere" ⚡ │ +│ │ +│ 🌐 github.com/skye-cyber filewarp | 📚 Documentation: filewarp.io │ +│ │ +╰──────────────────────────────────────────────────────────────────────────────╯ +""" + +# Cyberpunk Style Banner for FileWarp + +BANNER_v2 = """ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ │ +│ ░▒▓███████▓▒░ ░▒▓█▓▒░▒▓███████▓▒░░▒▓█▓▒░░▒▓█▓▒░▒▓███████▓▒░ ░▒▓██████▓▒░ │ +│ ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░ │ +│ ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░ │ +│ ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓███████▓▒░ ░▒▓██████▓▒░ ░▒▓███████▓▒░ ░▒▓█▓▒░ │ +│ ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░ ░▒▓█▓▒░ │ +│ ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░ ░▒▓█▓▒░ ░▒▓█▓▒░░▒▓█▓▒░ ░▒▓█▓▒░ │ +│ ░▒▓███████▓▒░░▒▓█▓▒░▒▓███████▓▒░ ░▒▓█▓▒░ ░▒▓███████▓▒░ ░▒▓██████▓▒░ │ +│ │ +│ ╔══════════════════════════════════════════════════════════════════════╗ │ +│ ║ FILE WARP - CYBER EDITION ║ │ +│ ║ Advanced File Warpping & Transformation System ║ │ +│ ╚══════════════════════════════════════════════════════════════════════╝ │ +│ │ +│ ⚡ INITIALIZING CORE SYSTEMS... ⚡ │ +│ 🔐 ENCRYPTION: ACTIVE 📦 WRAPPING: READY 🗜️ COMPRESSION: ONLINE │ +│ 📊 MEMORY: 42GB ALLOCATED 🔋 POWER: 100% │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +""" + + +BANNER_v3 = f""" +╭──────────────────────────────────────────────────────────────────────────╮ +│ │ +│ ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲ │ +│ │ +│ F I L E W R A P │ +│ │ +│ ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱ │ +│ │ +│ 📦 Wrap files into containers 🔄 Convert between formats │ +│ 🗜️ Smart compression algorithms 🔒 AES-256 encryption │ +│ 📤 Batch processing 🏷️ Metadata preservation │ +│ │ +│ ╭────────────────────────────────────────────────────────────────╮ │ +│ │ "Your files, perfectly warpped, every time." │ │ +│ ╰────────────────────────────────────────────────────────────────╯ │ +│ │ +│ Version {__version__} | MIT License | github.com/skye-cyber/filewarp │ +│ │ +╰──────────────────────────────────────────────────────────────────────────╯ +""" + +console = Console() + + +def display_banner(): + """Display the FileWarp banner with Rich styling""" + console.print( + Align.left( + Panel( + BANNER, + border_style="cyan", + padding=(1, 2), + title="[bold cyan]FileWarp[/]", + subtitle=f"[dim]v{__version__}[/]", + ) + ) + ) + console.print() # Empty line for spacing + + +def display_banner_simple(): + """Display the animated banner""" + console.print(BANNER, style="bold cyan", justify="center") + console.print("─" * console.width, style="dim") diff --git a/filewarp/cli/cli.py b/filewarp/cli/cli.py new file mode 100644 index 0000000..bd55bce --- /dev/null +++ b/filewarp/cli/cli.py @@ -0,0 +1,639 @@ +#!/usr/bin/env python3 + +import os +import sys + +# from functools import lru_cache +from pathlib import Path + +# Rich imports for amazing UI +from rich.panel import Panel +from rich.progress import Progress + +# Click for better CLI handling +import click + +# Local imports +from ..core.document import DocumentConverter +from ..core.pdf.core import PageExtractor, PDFScanner +from ..core.exceptions import FileSystemError, FilemacError + +# from ..utils.colors import fg, bg, rs +from ..utils.simple import logger +from ._entry_ import console +from .utils import ( + animate_processing, + show_quick_commands, + display_version, + show_supported_formats, + FileWarpGroup, + with_format_table, +) + +try: + from audiobot.cli import cli as audiobot_cli +except ImportError: + pass + + +# RESET = rs + + +@click.group( + cls=FileWarpGroup, + invoke_without_command=True, + context_settings=dict(help_option_names=["-h", "--help"]), +) +@click.option("--version", "-V", is_flag=True, help="Show software version and exit.") +@click.option( + "--no-resume", + is_flag=True, + default=False, + help="Don't resume previous file operation", +) +@click.option( + "--threads", "-t", type=int, default=3, help="Number of threads for operations" +) +@click.pass_context +def cli(ctx, version, no_resume, threads): + """Filemac: Advanced File Management & Conversion Tool""" + + if version: + display_version() + ctx.exit() + + # Store context values + ctx.ensure_object(dict) + ctx.obj["no_resume"] = no_resume + ctx.obj["threads"] = threads + + if ctx.invoked_subcommand is None: + console.print( + "\n[bold]Welcome to Filemac![/] Use [cyan]filewarp --help[/] for commands.\n" + ) + show_quick_commands() + + +# Document Conversion Commands +@cli.command(name="convert-doc") +@with_format_table("document") +@click.argument("files", nargs=-1, required=True) +@click.option("--to", "-tf", required=True, help="Target format for conversion") +@click.option("--isolate", "-iso", help="Isolate specific file types") +@click.option( + "--use-extras", "-X", is_flag=True, help="Use alternative conversion method" +) +@click.pass_context +# @animate_processing("Document conversion") +def convert_document(ctx, files, to, isolate, use_extras): + """Convert documents between formats (PDF, DOCX, etc.)""" + + if files[0] == "help": + from ..utils.formats import create_doc_formats_table + + create_doc_formats_table() + # show_supported_formats("document") + return + + console.print( + Panel( + f"[bold]Converting[/] {len(files)} file(s) to [cyan]{to}[/]", + border_style="green", + ) + ) + + with Progress(console=console) as progress: + task = progress.add_task("[cyan]Converting...", total=len(files)) + + from .converter import MethodMappingEngine, DirectoryConverter, Batch_Audiofy + + if len(files) == 1 and os.path.isdir(files[0]): + converter = DirectoryConverter( + files[0], to, ctx.obj["no_resume"], ctx.obj["threads"], isolate + ) + converter._unbundle_dir_() + else: + for file in files: + if os.path.isfile(file): + ev = MethodMappingEngine(file, to) + ev.document_eval() + progress.update(task, advance=1) + + +# Audio Commands +@cli.command(name="convert-audio") +@with_format_table("audio") +@click.argument("file", required=True) +@click.option("--to", "-tf", required=True, help="Target format for conversion") +@animate_processing("Audio conversion") +def convert_audio(file, to): + """Convert audio files between formats (MP3, WAV, etc.)""" + + if file == "help": + show_supported_formats("audio") + return + + from ..core.audio.core import AudioConverter + + ev = AudioConverter(file, to) + ev.pydub_conv() + + +@cli.command(name="join-audio") +@click.argument("files", nargs=-1, required=True) +@click.option("--output", "-o", help="Output filename") +@animate_processing("Audio joining") +def join_audio(files, output): + """Join multiple audio files into one""" + + from ..core.audio.core import AudioJoiner + + joiner = AudioJoiner(list(files)) + joiner.worker() + + console.print(f"[bold green]✓[/] Joined {len(files)} files successfully") + + +@cli.command(name="extract-audio") +@click.argument("video_file", required=True) +@animate_processing("Audio extraction") +def extract_audio(video_file): + """Extract audio from video files""" + + from ..core.audio.core import AudioExtracter + + vi = AudioExtracter(video_file) + vi.moviepyextract() + + +# Video Commands +@cli.command(name="convert-video") +@with_format_table("video") +@click.argument("file", required=True) +@click.option("--to", "-tf", required=True, help="Target format for conversion") +@animate_processing("Video conversion") +def convert_video(file, to): + """Convert video files between formats (MP4, MKV, etc.)""" + + if file == "help": + show_supported_formats("video") + return + + from ..core.video.core import VideoConverter + + ev = VideoConverter(file, to) + ev.CONVERT_VIDEO() + + +@cli.command(name="analyze-video") +@click.argument("video_file", required=True) +@animate_processing("Analysing Video") +def analyze_video(video_file): + """Analyze video file properties""" + + from ..miscellaneous.video_analyzer import SimpleAnalyzer + + analyzer = SimpleAnalyzer(video_file) + analyzer.SimpleAnalyzer() + + +# Image Commands +@cli.command(name="convert-image") +@with_format_table("image") +@click.argument("file", required=True) +@click.option("--to", "-tf", required=True, help="Target format for conversion") +# @animate_processing("Image conversion") +def convert_image(file, to): + """Convert image files between formats (PNG, JPG, etc.)""" + + if file == "help": + show_supported_formats("image") + return + + from ..core.image.core import ImageConverter + + conv = ImageConverter(file, to) + conv.convert_image() + + +@cli.command(name="resize-image") +@click.argument("image_file", required=True) +@click.option("--size", "-s", required=True, help="Target size (e.g., 2mb, 800x600)") +@click.option("--to", "-tf", help="Target format") +@animate_processing("Image resizing") +def resize_image(image_file, size, to): + """Resize or compress images""" + + from ..core.image.core import ImageCompressor + + res = ImageCompressor(image_file) + res.resize_image(size) + + +@cli.command(name="images-to-pdf") +@click.argument("sources", nargs=-1, required=True) +@click.option("--sort", is_flag=True, help="Sort images") +@click.option("--base", is_flag=True, help="Base name for output") +@click.option("--walk", is_flag=True, help="Process subdirectories") +@click.option("--clean", is_flag=True, help="Clean up after processing") +@animate_processing("Image to PDF conversion") +def images_to_pdf(sources, sort, base, walk, clean): + """Convert images to PDF""" + + from ..core.image.core import ImagePdfConverter + + sources_list = list(sources) + + if len(sources_list) > 1 or os.path.isfile(os.path.abspath(sources_list[0])): + converter = ImagePdfConverter(image_list=sources_list) + else: + converter = ImagePdfConverter( + input_dir=sources_list[0], order=sort, base=base, walk=walk, clean=clean + ) + + converter.run() + + +@cli.command(name="images-to-word") +@click.argument("sources", nargs=-1, required=True) +@animate_processing("Image to Word conversion") +def images_to_word(sources): + """Convert images to Word document""" + + from ..core.image.core import ImageDocxConverter + + sources_list = list(sources) + + if len(sources_list) > 1: + converter = ImageDocxConverter(image_list=sources_list) + else: + converter = ImageDocxConverter(input_dir=sources_list[0]) + + converter.run() + + +@cli.command(name="grayscale") +@click.argument("sources", nargs=-1, required=True) +@animate_processing("Grayscale conversion") +def convert_grayscale(sources): + """Convert images to grayscale""" + + from ..core.image.core import GrayscaleConverter + + sources_list = list(sources) + converter = ( + GrayscaleConverter(sources_list) + if len(sources_list) > 1 + else GrayscaleConverter(sources_list[0]) + ) + converter.run() + + +# PDF Commands +@cli.command(name="pdf-join") +@click.argument("pdfs", nargs=-1, required=True) +@click.option("--order", "-o", default="AAB", help="Page order pattern") +@animate_processing("PDF joining") +def pdf_join(pdfs, order): + """Join multiple PDF files""" + + if pdfs[0].lower() == "help": + from ..utils.helpmaster import pdf_combine_help + + opts, helper, example = pdf_combine_help() + console.print(Panel(helper, title="PDF Join Help", border_style="blue")) + console.print(f"[yellow]Example:[/] {example}") + return + + from ..core.pdf.core import PDFCombine + + console.print(Panel(f"[bold]Joining[/] {len(pdfs)} PDF files", border_style="blue")) + + init = PDFCombine(list(pdfs), None, None, order) + init.controller() + + +@cli.command(name="extract-pages") +@click.argument("pdf_file", required=True) +@click.argument("pages", nargs=-1, type=str, required=True) +@animate_processing("Page extraction") +def extract_pages(pdf_file, pages): + """Extract specific pages from PDF""" + + args = [pdf_file] + [str(p) for p in pages] + PageExtractor.run(args) + + +@cli.command(name="extract-images") +@click.argument("pdf_file", required=True) +@click.option("--size", help="Image size (e.g., 256x82)") +@animate_processing("Image extraction") +def extract_images(pdf_file, size): + """Extract images from PDF""" + + from ..core.image.extractor import process_files + + if size: + size_tuple = tuple([int(x) for x in size.lower().split("x")]) + process_files([pdf_file], tsize=size_tuple) + else: + process_files([pdf_file]) + + +@cli.command(name="scan-pdf") +@click.argument("pdf_file", required=True) +def scan_pdf(pdf_file): + """Scan PDF and extract text""" + try: + sc = PDFScanner(pdf_file) + sc.scanPDF() + except Exception as e: + print(e) + + +@cli.command(name="scan-as-image") +@click.argument("pdf_file", required=True) +def scan_as_image(pdf_file): + """Scan PDF as images then extract text""" + try: + sc = PDFScanner(pdf_file) + sc.scanAsImgs() + except Exception as e: + print(e) + + +@cli.command(name="scan-long") +@click.argument("pdf_file", required=True) +@click.option("--separator", "-sep", default="\n", help="Text separator") +def scan_long(pdf_file, separator): + """Scan document as long image (effective for complex layouts)""" + try: + sc = PDFScanner(pdf_file, separator) + sc.scanAsLongImg() + except Exception as e: + print(e) + + +@cli.command(name="pdf2long-image") +@click.argument("pdf_file", required=True) +def pdf_to_long_image(pdf_file): + """Convert PDF to long image""" + try: + from ..core.pdf.core import PDF2LongImageConverter + + conv = PDF2LongImageConverter(pdf_file) + conv.preprocess() + except Exception as e: + print(e) + + +# SVG Commands +@cli.command(name="convert-svg") +@click.argument("svg_file", required=True) +@click.option("--to", "-tf", required=True, help="Target format (png, pdf, svg)") +@animate_processing("Converting SVG") +def convert_svg(svg_file, to): + """Convert SVG files to other formats""" + + from ..core.svg.core import SVGConverter + from ..utils.file_utils import generate_filename + + converter = SVGConverter() + + svg_path = Path(svg_file) + + if not svg_path.exists(): + console.print(f"[bold red]Error:[/] SVG File not found {to}") + return + + converters = { + "png": converter.to_png, + "pdf": converter.to_pdf, + "svg": converter.to_svg, + } + + convert_func = converters.get(to) + if not convert_func: + console.print(f"[bold red]Error:[/] Invalid target format: {to}") + console.print("Supported formats: [cyan]png[/], [cyan]pdf[/], [cyan]svg[/]") + return + + output = generate_filename(Path(svg_path.absolute().parent.as_posix()), to) + convert_func( + input_svg=svg_path.as_posix(), output_path=output.as_posix(), is_string=False + ) + + +# OCR Commands +@cli.command(name="ocr") +@click.argument("images", nargs=-1, required=True) +@click.option("--separator", "-sep", default="\n", help="Text separator") +@animate_processing("OCR processing") +def perform_ocr(images, separator): + """Extract text from images using OCR""" + + from ..core.ocr import ExtractText + + ocr = ExtractText(list(images), separator) + ocr.run() + + +# Document to Image +@cli.command(name="doc-to-image") +@click.argument("document", required=True) +@click.option("--to", "-tf", required=True, help="Target image format") +@animate_processing("Document to image conversion") +def doc_to_image(document, to): + """Convert documents to images""" + + conv = DocumentConverter(document) + conv.doc2image(to) + + +# HTML to Word +@cli.command(name="html2word") +@click.argument("html_files", nargs=-1, required=True) +@animate_processing("HTML to Word conversion") +def html_to_word(html_files): + """Convert HTML files to Word documents""" + + from ..core.html import HTML2Word + from ..utils.file_utils import generate_filename + + converter = HTML2Word() + + for html_file in html_files: + output = generate_filename(Path(html_file).absolute().parent, "docx") + converter.convert_file(html_file, output) + + +# Markdown to Word +@cli.command(name="markdown2word") +@click.argument("markdown_file", required=True) +@animate_processing("Markdown to Word conversion") +def markdown_to_word(markdown_file): + """Convert Markdown to Word with Mermaid rendering""" + pass + + +# Text to Word +@cli.command(name="text2word") +@click.argument("text_file", required=True) +@click.option("--font-size", default=12, help="Font size") +@click.option("--font-name", default="Times New Roman", help="Font name") +@animate_processing("Text to Word conversion") +def text_to_word(text_file, font_size, font_name): + """Convert styled text to Word document""" + + from ..core.text.core import StyledText + + init = StyledText(text_file, None, font_size, font_name) + init.text_to_word() + + +# Document Conversion Commands +@cli.command(name="edit-video") +@with_format_table("video") +@click.argument("files", nargs=-1, required=True) +@click.option("--range", "-r", type=str, help="Comma seperated ranges eg 0,30") +@click.option("--trim_start", type=int, help="Trim the first n seconds") +@click.option("--trim_end", type=int, help="Trim the last n seconds") +@click.option( + "--quality", + type=str, + default="medium", + help="Output video quality: ultrafast,fast,medium,slow,veryslow\n \ + Fast imply fast encoding hence large size", +) +@click.pass_context +def edit_video(ctx, files, range, trim_start, trim_end, quality): + """Convert documents between formats (PDF, DOCX, etc.)""" + + if files[0] == "help": + from ..utils.formats import create_video_formats_table + + return create_video_formats_table() + + console.print( + Panel( + f"[bold]Editing[/] {len(files)} file(s) by [cyan]triming[/]", + border_style="green", + expand=False, + ) + ) + + from ..core.video.Editor import VideoEditor + from ..utils.file_utils import generate_filename + from ..utils.decorators import for_loop + # from ..core.video.models import VideoQuality + + editor = VideoEditor() + + # with Progress(console=console) as progress: + # task = progress.add_task("[cyan]Editing...", total=len(files)) + + # if len(files) == 1 and not Path(files[0]).is_dir(): + # output_file = generate_filename( + # Path(files[0]).parent, Path(files[0]).suffix, "" + # ) + # if trim_start: + # editor.trim_start(files[0], output_file, quality=quality) + # elif trim_end: + # editor.trim_end(files[0], output_file, quality=quality) + # elif range: + # trange = range.split(",") + # if len(tuple(trange)) > 1: + # editor.trim_video( + # files[0], output_file, tuple(trange), quality=quality + # ) + # console.print(f"File: {output_file}") + # else: + @for_loop(files) + def process(self, file): + try: + file = Path(file) + if file.exists(): + output_file = generate_filename(file.parent, file.suffix.strip("."), "") + if trim_start: + editor.trim_start(file, output_file, trim_start, quality=quality) + elif trim_end: + editor.trim_end(file, output_file, trim_end, quality=quality) + elif range: + trange = [int(r) for r in range.split(",")] + if len(tuple(trange)) == 1: + trange = [0, trange[0]] + + editor.trim_video(file, output_file, tuple(trange), quality=quality) + except Exception as e: + print(e) + + process(None) + + +# Recording and Voice Commands +@cli.command(name="record") +def record_audio(): + """Record audio from microphone""" + + from ..core.recorder import SoundRecorder + + console.print(Panel("[bold]Audio Recording[/]", border_style="red")) + console.print("[yellow]Press Ctrl+C to stop recording[/]") + + rec = SoundRecorder() + rec.run() + + +@cli.command(name="voice-type") +def voice_type(): + """Use voice to type text""" + + from voice.VoiceType import VoiceTypeEngine + + try: + console.print(Panel("[bold]Voice Typing Active[/]", border_style="green")) + console.print("[yellow]Speak clearly... Press Ctrl+C to stop[/]") + + engine = VoiceTypeEngine() + engine.start() + except KeyboardInterrupt: + console.print("\n[bold yellow]Voice typing stopped[/]") + except Exception as e: + console.print(f"[bold red]Error:[/] {str(e)}") + + +# Audio Effects (via audiobot) +@cli.command(name="audio-effects") +@click.argument("args", nargs=-1) +def audio_effects(args): + """Apply audio effects and voice changes""" + + try: + audiobot_cli(list(args)) + except NameError: + console.print("[bold red]Error:[/] audiobot module not available") + + +# Main entry point +def main(): + """Main entry point with error handling""" + try: + cli(obj={}) + except KeyboardInterrupt: + console.print("\n[yellow]Operation cancelled by user[/]") + sys.exit(0) + except FilemacError as e: + console.print(f"[bold red]Filemac Error:[/] {str(e)}") + sys.exit(1) + except FileSystemError as e: + console.print(f"[bold red]File System Error:[/] {str(e)}") + sys.exit(1) + except Exception as e: + raise + console.print(f"[bold red]Unexpected Error:[/] {str(e)}") + logger.critical(f"Critical failure: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/filewarp/cli/converter.py b/filewarp/cli/converter.py new file mode 100644 index 0000000..e6ef5c3 --- /dev/null +++ b/filewarp/cli/converter.py @@ -0,0 +1,206 @@ +import os +import sys +from typing import List, Union +from ..core.warning import default_supressor +from ..utils.simple import logger +from ..utils.colors import fg, rs +from ..core.tts.gtts import GoogleTTS +from ..utils.formats import ( + SUPPORTED_AUDIO_FORMATS_DIRECT, +) +from ..core.document import DocumentConverter + +RESET = rs +default_supressor() + + +class DirectoryConverter: + """ + If the input file in convert_doc argument is a directory, walk throught the directory and + converter all the surported files to the target format + """ + + def __init__(self, _dir_, _format_, no_resume, threads, _isolate_=None): + self._dir_ = _dir_ + self._format_ = _format_ + self._isolate_ = _isolate_ + self.no_resume = no_resume + self.threads = threads + # Handle isolation and non isolation modes distinctively + self._ls_ = ( + ["pdf", "docx", "doc", "xlsx", "ppt", "pptxxls", "txt"] + if _isolate_ is None + else [_isolate_] + ) + if self._isolate_: + print(f"INFO\t {fg.FMAGENTA}Isolate {fg.DCYAN}{self._isolate_}{RESET}") + + def _unbundle_dir_(self): + if self._format_ in SUPPORTED_AUDIO_FORMATS_DIRECT: + return Batch_Audiofy(self._dir_, self.no_resume, self.threads) + try: + for root, dirs, files in os.walk(self._dir_): + for file in files: + _ext_ = file.split(".")[-1] + + _path_ = os.path.join(root, file) + + if _ext_ in self._ls_ and os.path.exists(_path_): + print(f"INFO\t {fg.FYELLOW}Parse {fg.BLUE}{_path_}{RESET}") + init = MethodMappingEngine(_path_, self._format_) + init.document_eval() + + except FileNotFoundError as e: + print(e) + + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + + except Exception as e: + print(e) + pass + + +class Batch_Audiofy: + def __init__( + self, + obj: Union[ + os.PathLike, + str, + List[Union[os.PathLike, str]], + tuple[str], + ], + no_resume: bool, + threads: int = 3, + ): + self.folder = obj + self.no_resume = no_resume + self.threads = threads + self.worker() + + def worker(self): + conv = GoogleTTS(self.folder, resume=self.no_resume) + inst = conv.THAudio(conv) + inst.audiofy(num_threads=self.threads) + + +class MethodMappingEngine: + """ + Class to handle document conversions based on their extensions and the target + output document format + """ + + def __init__(self, file, outf): + self.file = file + self.outf = outf + + def spreedsheet(self, conv): + if self.outf.lower() == "csv": + conv.convert_xlsx_to_csv() + elif self.outf.lower() in ("txt", "text"): + conv.convert_xls_to_text() + elif self.outf.lower() in list(self.doc_ls): + conv.convert_xls_to_word() + elif self.outf.lower() == "db": + conv.convert_xlsx_to_database() + else: + print(f"{fg.RED}Unsupported output format❌{RESET}") + + def word(self, conv): + if self.outf.lower() in ("txt", "text"): + conv.word_to_txt() + elif self.outf.lower() == "pdf": + conv.word_to_pdf() + elif self.outf.lower() in ("pptx", "ppt"): + conv.word_to_pptx() + elif self.outf.lower() in ("audio", "ogg"): + conv = GoogleTTS(self.file) + conv.audiofy() + else: + print(f"{fg.RED}Unsupported output format❌{RESET}") + + def text(self, conv): + if self.outf.lower() == "pdf": + conv.txt_to_pdf() + elif self.outf.lower() in ("doc", "docx", "word"): + conv.text_to_word() + elif self.outf.lower() in ("audio", "ogg"): + conv = GoogleTTS(self.file) + conv.audiofy() + else: + print(f"{fg.RED}Unsupported output format❌{RESET}") + + def ppt(self, conv): + if self.outf.lower() in ("doc", "docx", "word"): + conv.ppt_to_word() + elif self.outf.lower() in ("text", "txt"): + word = conv.ppt_to_word() + conv = DocumentConverter(word) + conv.word_to_txt() + elif self.outf.lower() in ("pptx"): + conv.convert_ppt_to_pptx(self.file) + elif self.outf.lower() in ("audio", "ogg", "mp3", "wav"): + conv = GoogleTTS(self.file) + conv.audiofy() + else: + print(f"{fg.RED}Unsupported output format❌{RESET}") + + def pdf(self, conv): + if self.outf.lower() in ("doc", "docx", "word"): + conv.pdf_to_word() + elif self.outf.lower() in ("txt", "text"): + conv.pdf_to_txt() + elif self.outf.lower() in ("audio", "ogg", "mp3", "wav"): + conv = GoogleTTS(self.file) + conv.audiofy() + else: + print(f"{fg.RED}Unsupported output format❌{RESET}") + + def document_eval(self): + self.doc_ls = ["docx", "doc"] + sheetls = ["xlsx", "xls"] + try: + conv = DocumentConverter(self.file) + if self.file.lower().endswith(tuple(sheetls)): + self.spreedsheet(conv=conv) + + elif self.file.lower().endswith(tuple(self.doc_ls)): + self.word(conv=conv) + + elif self.file.endswith("txt"): + self.text(conv=conv) + + elif self.file.split(".")[-1].lower() in ("ppt", "pptx"): + self.ppt(conv) + + elif self.file.lower().endswith("pdf"): + self.pdf(conv) + + elif self.file.lower().endswith("csv"): + if self.outf.lower() in ("xls", "xlsx", "excel"): + conv.convert_csv_to_xlsx() + + else: + print(f"{fg.BYELLOW}Unsupported Conversion type❌{RESET}") + pass + except Exception as e: + logger.error(e) + + +def _isolate_file(_dir_, target): + try: + isolated_files = [] + for root, dirs, files in os.walk(_dir_): + for file in files: + if file.lower().endswith(target): + _path_ = os.path.join(root, file) + isolated_files.append(_path_) + return isolated_files + except FileNotFoundError as e: + print(e) + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + except Exception as e: + print(e) diff --git a/filewarp/cli/utils.py b/filewarp/cli/utils.py new file mode 100644 index 0000000..bdaf2f9 --- /dev/null +++ b/filewarp/cli/utils.py @@ -0,0 +1,294 @@ +import time +import sys +import click +from rich.text import Text +from rich.table import Table +from rich.panel import Panel +from rich.align import Align + +from rich import box +from rich.progress import ( + Progress, + SpinnerColumn, + TextColumn, + # BarColumn, + # TaskProgressColumn, +) +from ._entry_ import console +from .banners import display_banner + + +def create_progress_spinner(message: str): + """Create a progress spinner""" + return Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console, + transient=True, + ) + + +def animate_processing(message: str): + """Decorator to animate processing""" + + def decorator(func): + def wrapper(*args, **kwargs): + with console.status(f"[bold cyan]{message}", spinner="dots"): + time.sleep(0.5) # Small delay for effect + result = func(*args, **kwargs) + console.print(f"[bold green]✓[/] {message} completed!") + return result + + return wrapper + + return decorator + + +class RichHelpFormatter(click.HelpFormatter): + """Custom help formatter with rich formatting""" + + def write_usage(self, prog, args="", prefix="Usage: "): + usage = Text() + usage.append(prefix, style="bold green") + usage.append(prog, style="bold cyan") + if args: + usage.append(f" {args}", style="yellow") + console.print(usage) + + def write_dl(self, rows, col_max=30, col_spacing=2): + table = Table(show_header=False, box=box.SIMPLE, padding=(0, 2)) + table.add_column("Command", style="bold cyan", no_wrap=True) + table.add_column("Description", style="white") + + for row in rows: + table.add_row(row[0], row[1]) + + console.print(table) + + +def show_quick_commands(): + """Show quick command reference""" + table = Table(title="Quick Commands", box=box.ROUNDED, border_style="blue") + table.add_column("Command", style="cyan", no_wrap=True) + table.add_column("Description", style="white") + table.add_column("Example", style="yellow") + + commands = [ + ("convert-doc", "Convert documents", "filewarp convert-doc file.docx --to pdf"), + ( + "convert-audio", + "Convert audio files", + "filewarp convert-audio song.mp3 --to wav", + ), + ( + "convert-video", + "Convert videos", + "filewarp convert-video video.mp4 --to mkv", + ), + ( + "convert-image", + "Convert images", + "filewarp convert-image photo.jpg --to png", + ), + ("ocr", "Extract text from images", "filewarp ocr image.png"), + ("pdf-join", "Join PDF files", "filewarp pdf-join file1.pdf file2.pdf"), + ("scan", "Scan PDF for text", "filewarp scan document.pdf"), + ] + + for cmd, desc, example in commands: + table.add_row(cmd, desc, example) + + console.print(table) + + +def display_version(): + """Display version with style""" + import importlib.metadata + + try: + __version__ = importlib.metadata.version("filewarp") + except importlib.metadata.PackageNotFoundError: + __version__ = "unknown" + + version_text = Text() + version_text.append("Filemac ", style="bold cyan") + version_text.append(__version__, style="bold green") + version_text.append(" • Advanced File Management", style="dim") + + panel = Panel(Align.center(version_text), border_style="blue", padding=(1, 2)) + console.print(panel) + + +# Utility Functions +def show_supported_formats(format_type: str): + """Show supported formats for a specific conversion type""" + try: + if format_type == "document": + from ..utils.formats import create_doc_formats_table + + # console.print("\n[bold]📄 Document Formats:[/]") + console.print(create_doc_formats_table()) + console.print("") + + elif format_type == "audio": + from ..utils.formats import create_audio_formats_table + + # console.print("\n[bold]🎵 Audio Formats:[/]") + console.print(create_audio_formats_table()) + console.print("") + + elif format_type == "video": + from ..utils.formats import create_video_formats_table + + # console.print("\n[bold]🎬 Video Formats:[/]") + # Call the function if it returns a table, otherwise assume it's a table object + table = create_video_formats_table() + console.print(table if callable(table) else table) + console.print("") + + elif format_type == "image": + from ..utils.formats import create_image_formats_table + + # console.print("\n[bold]🖼️ Image Formats:[/]") + console.print(create_image_formats_table()) + console.print("") + + except ImportError as e: + console.print(f"[yellow]Format tables not available: {e}[/]") + + +class FileWarpGroup(click.Group): + """Custom Click Group that displays banner with help""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Store console instance + self.console = console + + def get_command(self, ctx, cmd_name): + """Get command and store its metadata""" + cmd = super().get_command(ctx, cmd_name) + if cmd: + # Store the command's format table type in context for later use + if hasattr(cmd.callback, "_format_table_type"): + ctx.obj = ctx.obj or {} + ctx.obj["format_table_type"] = cmd.callback._format_table_type + return cmd + + def get_help(self, ctx): + """Override get_help to include banner""" + return super().get_help(ctx) + + def format_help(self, ctx, formatter): + """Override format_help to include banner and quick commands""" + + # Check if we have a format table type stored in context + if ctx.obj and ctx.obj.get("format_table_type"): + format_type = ctx.obj["format_table_type"] + show_supported_formats(format_type) + + # Show quick commands for main help (no subcommand) + else: + display_banner() + self.show_quick_commands() + + # Show main help + super().format_help(ctx, formatter) + + def format_commands(self, ctx, formatter): + """Override to add quick commands for main help""" + # Call parent first + super().format_commands(ctx, formatter) + + # Show quick commands for main help (no specific command) + if not ctx.obj or not ctx.obj.get("format_table_type"): + self.show_quick_commands() + + def main(self, *args, **kwargs): + """Override main to handle help display""" + try: + # Check if this is a help invocation before processing + if any(arg in sys.argv for arg in ["--help", "-h"]): + # Show banner immediately for help commands + # display_banner() + + # Check if it's a subcommand help + if len(sys.argv) > 1 and sys.argv[1] not in ["--help", "-h"]: + from click import Context + + cmd_name = sys.argv[1] + cmd = self.get_command(Context(self), cmd_name) + + # Show format table if command has metadata + if cmd and hasattr(cmd.callback, "_format_table_type"): + format_type = cmd.callback._format_table_type + show_supported_formats(format_type) + + return super().main(*args, **kwargs) + except SystemExit as e: + if e.code == 0: # Help exit + sys.exit(0) + raise + + def main_old(self, *args, **kwargs): + """Override main to handle help display""" + try: + return super().main(*args, **kwargs) + except SystemExit as e: + # If it's a help exit (code 0), we've already shown banner and tables + if e.code == 0: + sys.exit(0) + raise + + def show_quick_commands(self): + """Show quick command reference""" + table = Table(title="Quick Commands", box=box.ROUNDED, border_style="blue") + table.add_column("Command", style="cyan", no_wrap=True) + table.add_column("Description", style="white") + table.add_column("Example", style="yellow") + + commands = [ + ( + "convert-doc", + "Convert documents", + "filewrap convert-doc file.docx --to pdf", + ), + ( + "convert-audio", + "Convert audio files", + "filewrap convert-audio song.mp3 --to wav", + ), + ( + "convert-video", + "Convert videos", + "filewrap convert-video video.mp4 --to mkv", + ), + ( + "convert-image", + "Convert images", + "filewrap convert-image photo.jpg --to png", + ), + ("ocr", "Extract text from images", "filewrap ocr image.png"), + ("pdf-join", "Join PDF files", "filewrap pdf-join file1.pdf file2.pdf"), + ("--help", "Show help for any command", "filewrap convert-doc --help"), + ] + + for cmd, desc, example in commands: + table.add_row(cmd, desc, example) + + self.console.print(table) + self.console.print( + "\n[dim]Use 'filewrap COMMAND --help' for specific command options[/]\n" + ) + + +def with_format_table(format_type): + """Decorator to show format table for specific commands (lets default help run)""" + + def decorator(f): + # We don't override the command's behavior + # Just attach metadata for the group to use + f._format_table_type = format_type + return f + + return decorator diff --git a/filewarp/core/__init__.py b/filewarp/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/filewarp/core/audio/core.py b/filewarp/core/audio/core.py new file mode 100644 index 0000000..e94203f --- /dev/null +++ b/filewarp/core/audio/core.py @@ -0,0 +1,258 @@ +import os +import re +import sys +from typing import Union +from moviepy import VideoFileClip +from pydub import AudioSegment +from tqdm.auto import tqdm +from .m4a_converter import m4a +from rich.progress import Progress +from ...utils.colors import fg, rs +from ...utils.formats import SUPPORTED_AUDIO_FORMATS_DIRECT, SUPPORTED_AUDIO_FORMATS + +RESET = rs + + +class AudioConverter: + """Convert Audio file to from one format to another""" + + def __init__(self, input_file, out_format): + self.input_file = input_file + self.out_format = out_format + + def preprocess(self): + files_to_process = [] + + if os.path.isfile(self.input_file): + files_to_process.append(self.input_file) + elif os.path.isdir(self.input_file): + if os.listdir(self.input_file) is None: + print(f"{fg.RED}Cannot work with empty folder{RESET}") + sys.exit(1) + for file in os.listdir(self.input_file): + file_path = os.path.join(self.input_file, file) + if os.path.isfile(file_path): + files_to_process.append(file_path) + + return files_to_process + + def pydub_conv(self): + try: + input_list = self.preprocess() + out_f = self.out_format + input_list = [ + item + for item in input_list + if any(item.lower().endswith(ext) for ext in SUPPORTED_AUDIO_FORMATS) + ] + def wav_redudancy(): + # Load the mp3 file using Pydub + audio = AudioSegment.from_file(file, fmt) + # Export the audio to a temporary file in wav format (ffmpeg can convert from wav to m4a) + audio.export("temp.wav", format="wav") + + for file in tqdm(input_list): + if out_f.lower() in SUPPORTED_AUDIO_FORMATS_DIRECT: + _, ext = os.path.splitext(file) + output_filename = _ + "." + out_f + fmt = ext[1:] + # print(fmt, out_f) + audio = AudioSegment.from_file(file, fmt) + # print(f"{fg.BMAGENTA}Converting to {output_filename}{RESET}") + audio.export(output_filename, format=out_f) + # new_audio = pydub.AudioSegment.from_file('output_audio.') + # print(f"{fg.BGREEN}Done{RESET}") + + elif file[-3:].lower() == "m4a" or out_f.lower() == "m4a": + m4a(file, out_f) + + elif ( + out_f.lower() in SUPPORTED_AUDIO_FORMATS + and not SUPPORTED_AUDIO_FORMATS_DIRECT + ): + print("Pending Implemantation For the format") + + else: + print(f"{fg.RED}Unsupported output format{RESET}") + sys.exit(1) + + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + +class AudioJoiner: + def __init__(self, obj: Union[list, tuple[str]], masterfile=None): + self.obj = obj + self.masterfile = masterfile + self.files = [] + + if isinstance(self.obj, list): + self.isdir = False + for file in self.obj: + self.files.append(file) + + elif os.path.isdir(self.obj): + self.obj = self.obj + self.isdir = True + + for file in list(os.listdir(self.obj)): + path = os.path.join(self.obj, file) + if path.split(".")[-1] in SUPPORTED_AUDIO_FORMATS_DIRECT: + self.files.append(path) + + else: + print("Pass") + pass + + @staticmethod + # Function to extract the number from the filename + def extract_number(filename): + match = re.search(r"_(\d+(\.\d+)?)\.ogg", filename) + if match: + try: + return float(match.group(1)) + except TypeError: + return int(match.group(1)) + else: + return 0 + + def worker(self): + try: + if len(self.files) == 0: + print("No files to work on") + print("\nQuit!") + exit(0) + if self.masterfile is None: + masterfile = os.path.splitext(self.files[0])[0] + "_master.ogg" + print(os.path.splitext(self.files[0])) + else: + masterfile = self.masterfile + # print(f"{fg.YELLOW}Master file = {fg.BLUE}{masterfile}{RESET}") + + self.ext = os.path.splitext(masterfile)[-1] + _format = self.ext if self.ext in SUPPORTED_AUDIO_FORMATS_DIRECT else "ogg" + # print(f"{fg.BYELLOW}Format = {fg.BBLUE}{_format}{RESET}") + + # print(f"{fg.BBLUE}Create a master file{RESET}") + # Create a list to store files + ogg_files = [] + + # Sort the filenames based on the extracted number + _sorted_filenames = sorted(self.files, key=self.extract_number) + + print("." * 20, "Remove Empty files", "." * 20) + sorted_filenames = [] + with Progress() as progress: + task = progress.add_task("[magenta]Preparing..", total=None) + for i, fl in enumerate(_sorted_filenames): + if os.path.getsize(fl) == 0: + print("Empty file, skipping..") + continue + else: + sorted_filenames.append(fl) + progress.update(task, advance=None) + + # loop through the directory while adding the ogg files to the list + with Progress() as progress: + task2 = progress.add_task( + "[cyan]Create list", total=len(sorted_filenames) + ) + for i, filename in enumerate(sorted_filenames): + # print(f"{BWHITE}File {DCYAN}{filename}{RESET}") + ogg_files.append(AudioSegment.from_file(filename)) + progress.update(task2, advance=i) + + # Concatenate the ogg files + combined_ogg = ogg_files[0] + with Progress() as progress: + task3 = progress.add_task( + "[magenta]Joining... ", total=len(sorted_filenames) + ) + for i in range(1, len(sorted_filenames)): + combined_ogg += ogg_files[i] + progress.update(task3, advance=i) + + # Export the combined ogg to new mp3 file or ogg file + combined_ogg.export(masterfile, format=_format) + print(f"{fg.BGREEN}Master file:Ok🤏") + """ + if self.isdir: + query = input(f"{BBLUE}Remove the directory ?(y/n)").lower() in ('y', 'yes') + if query: + shutil.rmtree(self.obj) + """ + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + except Exception as e: + raise + print(f"{fg.RED}{e}{RESET}") + + +class AudioExtracter: + def __init__(self, input_file): + self.input_file = input_file + + def preprocess(self): + try: + files_to_process = [] + + if os.path.isfile(self.input_file): + files_to_process.append(self.input_file) + elif os.path.isdir(self.input_file): + if os.listdir(self.input_file) is None: + print(f"{fg.RED}Cannot work with empty folder{RESET}") + sys.exit(1) + for file in os.listdir(self.input_file): + file_path = os.path.join(self.input_file, file) + ls = ["mp4", "mkv"] + if os.path.isfile(file_path) and any( + file_path.lower().endswith(ext) for ext in ls + ): + files_to_process.append(file_path) + + return files_to_process + except Exception as e: + print(e) + + def moviepyextract(self): + try: + video_list = self.preprocess() + for input_video in video_list: + video = VideoFileClip(input_video) + audio = video.audio + basename, _ = os.path.splitext(input_video) + outfile = basename + ".wav" + audio.write_audiofile(outfile) + # print(f"\033[1;32mFile saved as \033[36m{outfile}\033[0m") + except KeyboardInterrupt: + print("\nExiting..") + sys.exit(1) + except Exception as e: + print(e) + + def ffmpeg_extractor(self): + import subprocess + + video_list = self.preprocess() + for input_video in video_list: + # Extract audio + subprocess.run( + ["ffmpeg", "-i", f"{input_video}", f"{input_video.split('.')[0]}.mp3"] + ) + # Merge audio and video + # subprocess.run(["ffmpeg", "-i", f"{input_video}", "-i", "audio.mp3", "-c:v", "copy", "-c:a", "aac", f"{input_video}"]) + + def pydub_extractor(self): + import subprocess + + video_list = self.preprocess() + for input_video in video_list: + # Ensure FFmpeg is installed + subprocess.run(["ffmpeg", "-version"]) + # Extract audio + video = AudioSegment.from_file(f"{input_video}") + video.export(f"{input_video.split('.')[0]}.mp3", format="mp3") diff --git a/filewarp/core/audio/m4a_converter.py b/filewarp/core/audio/m4a_converter.py new file mode 100644 index 0000000..fd61f13 --- /dev/null +++ b/filewarp/core/audio/m4a_converter.py @@ -0,0 +1,54 @@ +import os +import subprocess +from ...utils.formats import SUPPORTED_AUDIO_FORMATS +from ...utils.security.vul_mitigate import SecurePython + + +def convert_m4a_(obj_file, _out_f: str): + try: + out_obj = obj_file.replace(obj_file.split(".")[-1], _out_f) + if obj_file[-3:].lower() == "m4a": + command = [ + "ffmpeg", + "-i", + f"{obj_file}", + "-c:a", + "libmp3lame", + "-b:a", + "320k", + f"{out_obj}", + ] + elif _out_f == "m4a": + command = [ + "ffmpeg", + "-i", + f"{obj_file}", + "-c:a", + "aac", + "-b:a", + "128k", + f"{out_obj}", + ] + subprocess.run(command, check=True, text=True) + return out_obj + except Exception as e: + print(f"\033[91m{e}\033[0m") + + +def m4a(obj, _out_f: str): + try: + secure = SecurePython() + if os.path.isdir(obj): + print("Detected directory input") + for root, dirs, files in os.walk(obj): + for file in files: + if file.endswith(list(SUPPORTED_AUDIO_FORMATS)): + print(f"\033[1;96m{file}\033[0m") + fpath = secure.safe_filepath(root, file) + convert_m4a_(fpath, _out_f) + elif os.path.isfile(obj): + convert_m4a_(obj, _out_f) + except Exception as e: + print(f"\033[91m{e}\033[0m") + finally: + print("\033[1;92mDone") diff --git a/filewarp/core/document.py b/filewarp/core/document.py new file mode 100644 index 0000000..98c89d6 --- /dev/null +++ b/filewarp/core/document.py @@ -0,0 +1,873 @@ +"""Handler for dcoument conversion operations requested by the cli entry""" + +import os +import re +import subprocess +import sys +from docx import Document +from pdf2image import convert_from_path +from rich.progress import Progress +from tqdm import tqdm +from ..utils.simple import logger +from ..utils.colors import fg, bg, rs + +RESET = rs + +DEFAULT_SEPARATOR = "\n" + +_ext_word = ["doc", "docx"] +_ext_ppt_ = ["ppt", "pptx"] +_ext_xls = ["xls", "xlsx"] + +PYGAME_DETECT_AVX2 = 1 + + +class DocumentConverter: + """Implementats all document conversion methods""" + + def __init__(self, input_file): + self.input_file = input_file + + def preprocess(self): + """Check input object whether it`s a file or a directory if a file append + the file to a set and return it otherwise append directory full path + content to the set and return the set file. The returned set will be + evaluated in the next step as required on the basis of requested operation + For every requested operation, the output file if any is automatically + generated on the basis of the input filename and saved in the same + directory as the input file. + Exit if the folder is empty + """ + + try: + files_to_process = [] + + if os.path.isfile(self.input_file): + files_to_process.append(self.input_file) + elif os.path.isdir(self.input_file): + if os.listdir(self.input_file) is None: + print("Cannot work with empty folder") + sys.exit(1) + for file in os.listdir(self.input_file): + file_path = os.path.join(self.input_file, file) + if os.path.isfile(file_path): + files_to_process.append(file_path) + + return files_to_process + except Exception as e: + print(e) + + def word_to_pdf(self): + """Convert word file to pdf document (docx) + ->Check if running on Linux + ->Use subprocess to run the dpkg and grep commands""" + word_list = self.preprocess() + + word_list = [ + item for item in word_list if item.split(".")[-1].lower() in ("doc", "docx") + ] + for word_file in word_list: + pdf_file_dir = os.path.dirname(word_file) + pdf_file = os.path.splitext(word_file)[0] + ".pdf" + + try: + if os.name == "posix": # Check if running on Linux + print( + f"{fg.BLUE}Converting: {RESET}{word_file} {fg.BLUE}to {RESET}{pdf_file}" + ) + # Use subprocess to run the dpkg and grep commands + result = subprocess.run( + ["dpkg", "-l", "libreoffice"], stdout=subprocess.PIPE, text=True + ) + if result.returncode != 0: + logger.exception(f"{fg.RED}Libreoffice not found !{RESET}") + print( + f"{fg.CYAN}Initiating critical redundacy measure !{RESET}" + ) + self.word2pdf_extra(word_file) + subprocess.run( + [ + "soffice", + "--convert-to", + "pdf", + word_file, + "--outdir", + pdf_file_dir, + ] + ) + + print( + f"{fg.BMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}" + ) + return pdf_file + + elif os.name == "nt": + self.word2pdf_extra(word_file) + return pdf_file + + except Exception as e: + print(f"Error converting {word_file} to {pdf_file}: {e}") + + @staticmethod + def word2pdf_extra(obj, outf=None): + """For window users since it requires Microsoft word to be installed""" + for file in obj: + file = os.path.abspath(file) + if file.split(".")[-1] not in ("doc", "docx"): + logger.error(f"{fg.RED}File is not a word file{RESET}") + sys.exit(1) + pdf_file = os.path.splitext(file)[0] + ".pdf" if outf is None else outf + try: + if not os.path.isfile(file): + print(f"The file {obj} does not exist or is not a valid file.") + sys.exit("Exit!") + logger.info( + f"{fg.BLUE}Converting: {RESET}{file} {fg.BLUE}to {RESET}{pdf_file}" + ) + from docx2pdf import convert + + convert(file, pdf_file) + print(f"{fg.GREEN}Conversion ✅{RESET}") + sys.exit(0) + except ImportError: + logger.warning( + f"{fg.RED}docx2pdf Not found. {fg.CYAN}Run pip install docx2pdf{RESET}" + ) + except Exception as e: + raise + logger.error(e) + + def pdf_to_word(self): + """Convert pdf file to word document (docx)""" + pdf_list = self.preprocess() + pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] + for pdf_file in pdf_list: + word_file = ( + pdf_file[:-3] + "docx" if pdf_file.lower().endswith("pdf") else None + ) + + try: + command = [ + "lowriter", + "--headless", + '--infilter="writer_pdf_import"', + '--convert-todoc:"MS Word 97"', + pdf_file, + ] + print(f"{fg.BYELLOW}Parse the pdf document..{RESET}") + from pdf2docx import parse + + parse(pdf_file, word_file, start=0, end=None) + + logger.info(f"{fg.MAGENTA}New file is {fg.CYAN}{word_file}{RESET}") + logger.info(f"{fg.BGREEN}Success👨‍💻✅{RESET}") + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except Exception as e: + logger.info(f"{bg.RED}All conversion attempts have failed: {e}{RESET}") + + def txt_to_pdf(self): + """Convert text file(s) to pdf document (docx) + ->Read the contents of the input .txt file + ->Initialize the PDF document + ->Create a story to hold the elements of the PDF + ->Iterate through each line in the input .txt file and add it to the PDF + ->Build and write the PDF document""" + txt_list = self.preprocess() + from reportlab.platypus import Paragraph, SimpleDocTemplate + from reportlab.lib.pagesizes import letter + + _list_ = [item for item in txt_list if item.lower().endswith("txt")] + for _file_ in _list_: + _pdf_ = _file_[:-3] + "pdf" if _file_.lower().endswith("txt") else None + # Read the contents of the input .txt file + with open(_file_, "r", encoding="utf-8") as file: + text_contents = file.readlines() + + # Initialize the PDF document + logger.info(f"{fg.BYELLOW}Initialize the PDF document{RESET}") + doc = SimpleDocTemplate(_pdf_, pagesize=letter) + + # Create a story to hold the elements of the PDF + logger.info( + f"{fg.BYELLOW}Create a story to hold the elements of the PDF{RESET}" + ) + story = [] + + # Iterate through each line in the input .txt file and add it to the PDF + logger.info( + f"{fg.BYELLOW}Iterate through each line in the input .txt file and add it to the PDF{RESET}" + ) + _line_count_ = 0 + try: + for line in text_contents: + _line_count_ += 1 + logger.info( + f"Lines {fg.BBLUE}{_line_count_}{RESET}/{len(text_contents)}" + ) + story.append(Paragraph(line.strip(), style="normalText")) + + except KeyboardInterrupt: + print("\nQuit❕⌨️") + sys.exit(1) + except Exception as e: + logger.error(e) + pass + # Build and write the PDF document + logger.info(f"{fg.BYELLOW}Build and write the PDF document{RESET}") + doc.build(story) + logger.info(f"{fg.MAGENTA}New file is {fg.CYAN}{_pdf_}{RESET}") + print(f"\n{fg.BGREEN}Success👨‍💻✅{RESET}") + + def word_to_pptx(self): + """Convert word file(s) to pptx document (pptx/ppt) + -> Load the Word document + ->Create a new PowerPoint presentation + ->Iterate through each paragraph in the Word document + ->Create a new slide in the PowerPoint presentation + ->Add the paragraph text to the slide + """ + from pptx import Presentation + + word_list = self.preprocess() + word_list = [ + item for item in word_list if item.split(".")[-1].lower() in ("doc", "docx") + ] + + for word_file in word_list: + if word_list is None: + print("Please provide appropriate file type") + sys.exit(1) + ext = os.path.splitext(word_file)[-1][1:] + + pptx_file = ( + (os.path.splitext(word_file)[0] + ".pptx") + if ext in list(_ext_word) + else None + ) + + try: + # Load the Word document + print(f"{fg.BYELLOW}Load the Word document..{RESET}") + doc = Document(word_file) + + # Create a new PowerPoint presentation + print(f"{fg.BYELLOW}Create a new PowerPoint presentation..{RESET}") + prs = Presentation() + + # Iterate through each paragraph in the Word document + print( + f"{fg.BGREEN}Populating pptx slides with {fg.BYELLOW}{len(doc.paragraphs)}{fg.BGREEN} entries..{RESET}" + ) + count = 0 + for paragraph in doc.paragraphs: + count += 1 + perc = (count / len(doc.paragraphs)) * 100 + print( + f"{fg.BMAGENTA}Progress:: {fg.BCYAN}{perc:.2f}%{RESET}", + end="\r", + ) + # Create a new slide in the PowerPoint presentation + slide = prs.slides.add_slide(prs.slide_layouts[1]) + + # Add the paragraph text to the slide + slide.shapes.title.text = paragraph.text + + # Save the PowerPoint presentation + prs.save(pptx_file) + logger.info(f"{fg.MAGENTA}New file is {fg.CYAN}{pptx_file}{RESET}") + print(f"\n{fg.BGREEN}Success👨‍💻✅{RESET}") + except KeyboardInterrupt: + print("\nQuit❕⌨️") + sys.exit(1) + except Exception as e: + logger.error(e) + + def word_to_txt(self): + """Convert word file to txt file""" + word_list = self.preprocess() + word_list = [ + item for item in word_list if item.split(".")[-1].lower() in ("dox", "docx") + ] + + for file_path in word_list: + ext = os.path.splitext(file_path)[-1][1:] + txt_file = ( + (os.path.splitext(file_path)[0] + ".txt") + if ext in list(_ext_word) + else "output.txt" + ) + + try: + logger.info(f"{fg.BLUE}Create Doument Tablet{RESET}") + doc = Document(file_path) + + with open(txt_file, "w", encoding="utf-8") as f: + Par = 0 + for paragraph in doc.paragraphs: + f.write(paragraph.text + "\n") + Par += 1 + + print( + f"Par:{fg.BLUE}{Par}/{len(doc.paragraphs)}{RESET}", + end="\r", + ) + logger.info(f"{fg.MAGENTA}Conversion of file to txt success{RESET}") + + logger.info(f"File: {fg.GREEN}{txt_file}{RESET}") + return txt_file + except KeyboardInterrupt: + print("\nQuit❕⌨️") + sys.exit() + except Exception as e: + logger.error(f"{fg.RED}{e}{RESET}") + with open("conversion.log", "a") as log_file: + log_file.write( + f"Couldn't convert {file_path} to {txt_file}:REASON->{e}" + ) + + def pdf_to_txt(self): + """Convert pdf file to text file""" + import PyPDF2 + + pdf_list = self.preprocess() + pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] + for file_path in pdf_list: + txt_file = file_path[:-3] + "txt" + try: + print(f"{fg.BYELLOW}Open and read the pdf document..{RESET}") + with open(file_path, "rb") as file: + pdf_reader = PyPDF2.PdfReader(file) + text = "" + _pg_ = 0 + print(f"{fg.YELLOW}Convert pages..{RESET}") + for page_num in range(len(pdf_reader.pages)): + _pg_ += 1 + logger.info( + f"Page {fg.BBLUE}{_pg_}{RESET}/{len(pdf_reader.pages)}" + ) + page = pdf_reader.pages[page_num] + text += page.extract_text() + with open(txt_file, "w", encoding="utf-8") as f: + f.write(text) + logger.info(f"{fg.MAGENTA}New file is {fg.CYAN}{txt_file}{RESET}") + logger.info(f"{fg.BGREEN}Success👨‍💻✅{RESET}") + except Exception as e: + logger.error(f"{fg.RED}{e}{RESET}") + with open("conversion.log", "a") as log_file: + log_file.write(f"Error converting {file_path} to {txt_file}: {e}\n") + + def pptx_to_txt(self, dest=None): + """Convert ppt file to tetx document""" + ppt_list = self.preprocess() + from pptx import Presentation + + ppt_list = [ + item for item in ppt_list if item.split(".")[-1].lower() in ("ppt", "pptx") + ] + try: + for file_path in ppt_list: + ext = os.path.splitext(file_path)[-1][1:] + + txt_file = (os.path.splitext(file_path)[0]) + ".txt" + + file_path = os.path.abspath(file_path) + + if ext == "ppt": + file_path = self.convert_ppt_to_pptx( + file_path + ) # First convert the ppt to pptx + + presentation = Presentation(file_path) + + logger.info( + f"Slide count ={fg.BMAGENTA} {len(presentation.slides)}{RESET}" + ) + + _slide_count_ = 0 + + with Progress() as progress: + task = progress.add_task( + "[magenta]Preparing..", total=len(presentation.slides) + ) + + for slide in presentation.slides: + _slide_count_ += 1 + # progress.console.print(F"Slide {_slide_count_}/{len(presentation.slides)}", end='\n') + + for shape in slide.shapes: + if shape.has_text_frame: + text_frame = shape.text_frame + + for paragraph in text_frame.paragraphs: + # Create a paragraph in the Word document if it contains text + # Ensure text exists + if any(run.text.strip() for run in paragraph.runs): + for run in paragraph.runs: + text = run.text.strip() + if text and text != " ": + with open(txt_file, "a") as fl: + fl.write(text) + # return txt_file + + progress.update(task, advance=1) + + if dest == "text": + with open(txt_file, "r") as fl: + text_buffer = fl.read() + print(text_buffer) + return text_buffer + + logger.info(f"{fg.MAGENTA}New file is {fg.CYAN}{txt_file}{RESET}") + logger.info(f"{fg.BGREEN}Success👨‍💻✅{RESET}") + except Exception as e: + logger.error(f"\n❌Oops! {bg.RED}{e}{RESET}") + + @staticmethod + def convert_ppt_to_pptx(obj: os.PathLike): + import platform + from pptx import Presentation + + try: + if obj.endswith("ppt"): + if platform.system() in ("Linux", "MacOS") or os.name == "posix": + subprocess.run( + ["soffice", "--headless", "--convert-to", "pptx", obj] + ) + return os.path.splitext(obj)[0] + ".pptx" + elif platform.system() in ("Windows") or os.name == "nt": + import win32com.client + + powerpoint = win32com.client.Dispatch("PowerPoint.Application") + powerpoint.Visible = 1 + ppt = powerpoint.Presentations.Open(obj) + pptx_file = os.path.splitext(obj)[0] + ".pptx" + ppt.SaveAs(pptx_file, 24) # 24 is the format for pptx + ppt.Close() + powerpoint.Quit() + return pptx_file + else: + print(f"{fg.RED}Unable to identify the system{RESET}") + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + except Exception as e: + logger.error(f"{fg.RED}{e}{RESET}") + + def ppt_to_word(self): + from docx.enum.text import WD_PARAGRAPH_ALIGNMENT + from docx.shared import Pt + from docx.shared import RGBColor as docxRGBColor + from pptx.dml.color import RGBColor as pptxRGBColor + from pptx import Presentation + + """Convert ppt file to word document\n + ->Preserves bold formatting + """ + ppt_list = self.preprocess() + ppt_list = [ + item for item in ppt_list if item.split(".")[-1].lower() in ("ppt", "pptx") + ] + for file_path in ppt_list: + ext = os.path.splitext(file_path)[-1][1:] + word_file = ( + (os.path.splitext(file_path)[0] + ".docx") + if ext in list(_ext_ppt_) + else None + ) + try: + logger.info(f"{fg.BYELLOW}Create Doument Tablet{RESET}") + file_path = os.path.abspath(file_path) + if ext == "ppt": + file_path = self.convert_ppt_to_pptx( + file_path + ) # First convert the ppt to pptx + presentation = Presentation(file_path) + document = Document() + logger.info( + f"Slide count ={fg.BMAGENTA} {len(presentation.slides)}{RESET}" + ) + _slide_count_ = 0 + with Progress() as progress: + task = progress.add_task( + "[magenta]Preparing..", total=len(presentation.slides) + ) + for slide in presentation.slides: + _slide_count_ += 1 + # progress.console.print(F"Slide {_slide_count_}/{len(presentation.slides)}", end='\n') + slide_text = "" + for shape in slide.shapes: + if shape.has_text_frame: + text_frame = shape.text_frame + + for paragraph in text_frame.paragraphs: + # Create a paragraph in the Word document if it contains text + # Ensure text exists + if any(run.text.strip() for run in paragraph.runs): + # print("Has text") + new_paragraph = document.add_paragraph() + + # Set general paragraph properties + new_paragraph.alignment = ( + WD_PARAGRAPH_ALIGNMENT.JUSTIFY + ) # Justify text + new_paragraph.space_after = Pt(6) + new_paragraph.space_before = Pt(6) + new_paragraph.line_spacing = 1.15 + + for run in paragraph.runs: + if run.text.strip(): + slide_text += ( + run.text + ) # Only add non-empty text runs + # print(run.text.strip(), end='\n') + new_run = new_paragraph.add_run( + run.text + ) + + # Preserve bold, italic, underline, font name, and size + new_run.bold = run.font.bold + new_run.italic = run.font.italic + new_run.underline = run.font.underline + new_run.font.name = run.font.name + new_run.font.size = run.font.size + + # Preserve font color + try: + if ( + run.font.color + and run.font.color.rgb + ): + pptx_color = run.font.color.rgb + # If the color is white (255, 255, 255), change it to black (0, 0, 0) + if pptx_color == pptxRGBColor( + 255, 255, 255 + ): + new_run.font.color.rgb = ( + docxRGBColor(0, 0, 0) + ) # Black + else: + # Assign color properly to the Word run + new_run.font.color.rgb = ( + docxRGBColor( + pptx_color[0], + pptx_color[1], + pptx_color[2], + ) + ) + except AttributeError: + pass + + progress.update(task, advance=1) + document.save(word_file) + logger.info(f"{fg.MAGENTA}New file is {fg.CYAN}{word_file}{RESET}") + logger.info(f"{fg.BGREEN}Success👨‍💻✅{RESET}") + return word_file + except Exception as e: + logger.error(f"\n❌Oops! {bg.RED}{e}{RESET}") + with open("conversion.log", "a") as log_file: + log_file.write(f"\n❌Oops! {e}") + + def text_to_word(self): + """Convert text file to word\n + ->Read the text file\n + ->Filter out non-XML characters\n + ->Create a new Word document\n + ->Add the filtered text content to the document""" + flist = self.preprocess() + flist = [item for item in flist if item.lower().endswith("txt")] + for file_path in flist: + if file_path.lower().endswith("txt"): + word_file = file_path[:-3] + "docx" + + try: + # Read the text file + logger.info(f"{fg.BCYAN}Open and read the text file{RESET}") + with open(file_path, "r", encoding="utf-8", errors="ignore") as file: + text_content = file.read() + + # Filter out non-XML characters + filtered_content = re.sub( + r"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]+", "", text_content + ) + + # Create a new Word document + logger.info(f"{fg.BYELLOW}Create Doument Tablet{RESET}") + doc = Document() + # Add the filtered text content to the document + doc.add_paragraph(filtered_content) + + # Save the document as a Word file + doc.save(word_file) + logger.info(f"{fg.MAGENTA}New file is {fg.BCYAN}{word_file}{RESET}") + logger.info(f"{fg.BGREEN}Success👨‍💻✅{RESET}") + except FileExistsError as e: + logger.error(f"{str(e)}📁") + except Exception as e: + logger.error(f"\n❌Oops something went awry {fg.RED}{e}{RESET}") + with open("conversion.log", "a") as log_file: + log_file.write(f"\n❌Oops something went astray{fg.RED}{e}{RESET}") + + def convert_xls_to_word(self): + """Convert xlsx file(s) to word file(s)\n + ->Read the XLS file using pandas\n + ->Create a new Word document\n + ->Iterate over the rows of the dataframe and add them to the Word document""" + xls_list = self.preprocess() + import pandas as pd + + xls_list = [ + item for item in xls_list if item.split(".")[-1].lower() in ("xls", "xlsx") + ] + + print(f"{fg.BGREEN}Initializing conversion sequence{RESET}") + + for xls_file in xls_list: + ext = os.path.splitext(xls_file)[-1][1:] + word_file = ( + (os.path.splitext(xls_file)[0] + ".docx") + if ext in list(_ext_xls) + else None + ) + try: + """Read the XLS file using pandas""" + + df = pd.read_excel(xls_file) + + """Create a new Word document""" + doc = Document() + + """Iterate over the rows of the dataframe and add them to the + Word document""" + logger.info(f"{fg.ICYAN}Converting {xls_file}..{RESET}") + # time.sleep(2) + total_rows = df.shape[0] + for _, row in df.iterrows(): + current_row = _ + 1 + percentage = (current_row / total_rows) * 100 + for value in row: + doc.add_paragraph(str(value)) + print( + f"Row {fg.BYELLOW}{current_row}/{total_rows} {fg.BBLUE}{percentage:.1f}%{RESET}", + end="\r", + ) + # print(f"\033[1;36m{row}{RESET}") + + # Save the Word document + doc.save(word_file) + print(f"{fg.BGREEN}Conversion successful!{RESET}", end="\n") + except KeyboardInterrupt: + print("\nQuit⌨️") + sys.exit(1) + except Exception as e: + print(f"{bg.RED}Oops Conversion failed:❕{RESET}", str(e)) + + def convert_xls_to_text(self): + """Convert xlsx/xls file/files to text file format + ->Read the XLS file using pandas + ->Convert the dataframe to plain text + ->Write the plain text to the output file""" + xls_list = self.preprocess() + import pandas as pd + + xls_list = [ + item + for item in xls_list + if any(item.lower().endswith(ext) for ext in _ext_xls) + ] + print(f"{fg.BGREEN}Initializing conversion sequence{RESET}") + for xls_file in tqdm(xls_list): + ext = os.path.splitext(xls_file)[-1][1:] + txt_file = ( + (os.path.splitext(xls_file)[0] + ".txt") + if ext in list(_ext_xls) + else None + ) + try: + # Read the XLS file using pandas + logger.info(f"Converting {xls_file}..") + df = pd.read_excel(xls_file) + + # Convert the dataframe to plain text + text = df.to_string(index=False) + chars = len(text) + words = len(text.split()) + lines = len(text.splitlines()) + + print( + f"Preparing to write: {fg.BYELLOW}{chars} \033[1;30m characters{fg.BYELLOW} {words}\033[1;30m words {fg.BYELLOW}{lines}\033[1;30m lines {RESET}", + end="\n", + ) + # Write the plain text to the output file + with open(txt_file, "w") as file: + file.write(text) + + print(f"{fg.BGREEN}Conversion successful!{RESET}", end="\n") + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except Exception as e: + print("Oops Conversion failed:", str(e)) + + def convert_xlsx_to_csv(self): + """Convert xlsx/xls file to csv(comma seperated values) format + ->Load the Excel file + ->Save the DataFrame to CSV""" + xls_list = self.preprocess() + import pandas as pd + + xls_list = [ + item for item in xls_list if item.split(".")[-1].lower() in ("xls", "xlsx") + ] + for xls_file in tqdm(xls_list): + ext = os.path.splitext(xls_file)[-1][1:] + csv_file = ( + (os.path.splitext(xls_file)[0] + ".csv") + if ext in list(_ext_xls) + else None + ) + try: + """Load the Excel file""" + print(f"{fg.BGREEN}Initializing conversion sequence{RESET}") + df = pd.read_excel(xls_file) + logger.info(f"Converting {xls_file}..") + total_rows = df.shape[0] + print(f"Writing {fg.BYELLOW}{total_rows} rows {RESET}", end="\n") + for i in range(101): + print(f"Progress: {i}%", end="\r") + """Save the DataFrame to CSV""" + df.to_csv(csv_file, index=False) + print(f"{fg.BMAGENTA} Conversion successful{RESET}") + + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except Exception as e: + print(e) + + def convert_csv_to_xlsx(self): + csv_list = self.preprocess() + csv_list = [item for item in csv_list if item.split(".")[-1].lower() in ("csv")] + import pandas as pd + from openpyxl import load_workbook + + with Progress() as progress: + task = progress.add_task("[cyan]Coverting", total=len(csv_list)) + for file in csv_list: + file_name = file[:-3] + "xlsx" + df = pd.read_csv(file) + # excel engines ('openpyxl' or 'xlsxwriter') + df.to_excel(file_name, engine="openpyxl", index=False) + + # Load the workbook and the sheet + workbook = load_workbook(file_name) + sheet = workbook.active + + # print("Adjust Columns") + for column in sheet.columns: + max_length = 0 + column_letter = column[0].column_letter + + max_length = max(len(str(cell.value)) for cell in column) + """for cell in column: + try: + if len(str(cell.value)) > max_length: + max_length = len(cell.value) + + except Exception: + pass + """ + + adjusted_width = max_length + 2 + sheet.column_dimensions[column_letter].width = adjusted_width + + # Save the workbook + workbook.save(file_name) + progress.update(task, advance=1) + + def convert_xlsx_to_database(self): + """Convert xlsx file(s) to sqlite + ->Read the Excel file into a pandas DataFrame + ->Create a connection to the SQLite database + ->Insert the DataFrame into a new table in the database + ->Close the database connection""" + xlsx_list = self.preprocess() + import pandas as pd + import sqlite3 + + xlsx_list = [ + item for item in xlsx_list if item.split(".")[-1].lower() in ("xls", "xlsx") + ] + for xlsx_file in tqdm(xlsx_list): + sqlfile = ( + (os.path.splitext(xlsx_file)[0] + ".sql") + if (xlsx_file.split(".")[0]) in ("xls", "xlsx") + else None + ) + try: + db_file = input(f"{fg.BBLUE}Please enter desired sql filename: {RESET}") + table_name = input("Please enter desired table name: ") + # res = ["db_file", "table_name"] + if any(db_file) == "": + db_file = sqlfile + table_name = sqlfile[:-4] + if not db_file.endswith(".sql"): + db_file = sqlfile + column = 0 + for i in range(20): + column += 0 + # Read the Excel file into a pandas DataFrame + print(f"Reading {xlsx_file}...") + df = pd.read_excel(xlsx_file) + print(f"{fg.BGREEN}Initializing conversion sequence{RESET}") + print(f"{fg.BGREEN} Connected to sqlite3 database::{RESET}") + # Create a connection to the SQLite database + conn = sqlite3.connect(db_file) + print(f"{fg.BYELLOW} Creating database table::{RESET}") + # Insert the DataFrame into a new table in the database + df.to_sql(table_name, column, conn, if_exists="replace", index=False) + print( + f"Operation successful{RESET} file saved as \033[32{db_file}{RESET}" + ) + # Close the database connection + conn.close() + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except Exception as e: + logger.error(f"{e}") + + def doc2image(self, outf="png"): + """Create image objects from given files""" + outf = "png" if outf not in ("png", "jpg") else outf + path_list = self.preprocess() + file_list = [ + item + for item in path_list + if item.split(".")[-1].lower() in ("pdf", "doc", "docx") + ] + imgs = [] + for file in file_list: + if file.lower().endswith("pdf"): + # Convert the PDF to a list of PIL image objects + print(f"{fg.BLUE}Generate image objects ..{RESET}") + images = convert_from_path(file) + + # Save each image to a file + fname = file[:-4] + print(f"{fg.YELLOW}Target images{fg.BLUE} {len(images)}{RESET}") + + with Progress() as progress: + task = progress.add_task( + "[magenta]Generating images ", total=len(images) + ) + for i, image in enumerate(images): + # print(f"{Bfg.BLUE}{i}{RESET}", end="\r") + yd = f"{fname}_{i + 1}.{outf}" + image.save(yd) + imgs.append(yd) + progress.update(task, advance=1) + # print(f"\n{fg.GREEN}Ok{RESET}") + + return imgs diff --git a/filewarp/core/exceptions.py b/filewarp/core/exceptions.py new file mode 100644 index 0000000..e10bf82 --- /dev/null +++ b/filewarp/core/exceptions.py @@ -0,0 +1,43 @@ +class FilemacError(Exception): + """Custom filewarp exception handler""" + + pass + + +class ValidationError(FilemacError): + """Raised when validation fails.""" + + pass + + +class SystemPermissionError(FilemacError): + """ + Raised when user cannot acess to system reasource due to insuficient permissions. + Eg command execusion + """ + + pass + + +class FileSystemError(FilemacError): + """ + Raises when there is file/folder ie FileSystem acess error not related to permissions. + ie write error + """ + + pass + + +class AuthorizationError(FilemacError): + """ + Raised when there is an *Explicit* file/dir/resource access denial. + When priviledge elevelation is required. + """ + + pass + + +class ConfigurationError(FilemacError): + """Raised when invalid configuration.""" + + pass diff --git a/filewarp/core/html/__init__.py b/filewarp/core/html/__init__.py new file mode 100644 index 0000000..0064a2e --- /dev/null +++ b/filewarp/core/html/__init__.py @@ -0,0 +1,18 @@ +""" +Custom HTML to DOCX Converter for CVs and Professional Documents +""" + +from .core.converter import HTML2Word +from .core.html_parser import HTMLParser +from .core.style_manager import StyleManager +from .styles.css_parser import CSSParser +from .styles.style_applier import StyleApplier + +__version__ = "1.0.0" +__all__ = [ + "HTML2Word", + "HTMLParser", + "StyleManager", + "CSSParser", + "StyleApplier", +] diff --git a/filewarp/core/html/core.py b/filewarp/core/html/core.py new file mode 100644 index 0000000..411ee00 --- /dev/null +++ b/filewarp/core/html/core.py @@ -0,0 +1,365 @@ +""" +custom_html_to_docx.py +A reliable HTML to DOCX converter specifically designed for CVs and professional documents +""" + +from docx import Document +from docx.shared import Pt, Inches, RGBColor +from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK +from docx.oxml.ns import qn +from docx.oxml import parse_xml +import re +from typing import List, Dict, Any +import html as html_parser + + +class CVHTMLConverter: + """A specialized HTML to DOCX converter for CVs and professional documents""" + + def __init__(self): + self.doc = None + self.current_paragraph = None + self.styles = { + "h1": {"size": 16, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.CENTER}, + "h2": {"size": 14, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "h3": {"size": 12, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "normal": {"size": 11, "bold": False, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "bold": {"size": 11, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "italic": { + "size": 11, + "italic": True, + "alignment": WD_ALIGN_PARAGRAPH.LEFT, + }, + } + + def convert(self, html_content: str, output_path: str) -> Document: + """ + Convert HTML content to DOCX document + + Args: + html_content: HTML string to convert + output_path: Path for output DOCX file + + Returns: + Document: The created Word document + """ + self.doc = Document() + self._setup_document_styles() + + # Clean and parse HTML + cleaned_html = self._clean_html(html_content) + self._parse_html(cleaned_html) + + self.doc.save(output_path) + return self.doc + + def _setup_document_styles(self): + """Setup document styles and formatting""" + # Set normal style + style = self.doc.styles["Normal"] + font = style.font + font.name = "Calibri" + font.size = Pt(11) + + # Create custom styles for CV + self._create_style("CV Title", 16, True, WD_ALIGN_PARAGRAPH.CENTER) + self._create_style("CV Heading", 14, True, WD_ALIGN_PARAGRAPH.LEFT) + self._create_style("CV Subheading", 12, True, WD_ALIGN_PARAGRAPH.LEFT) + + def _create_style(self, style_name: str, font_size: int, bold: bool, alignment): + """Create a custom style""" + try: + style = self.doc.styles.add_style( + style_name, 1 + ) # 1 = WD_STYLE_TYPE.PARAGRAPH + font = style.font + font.name = "Calibri" + font.size = Pt(font_size) + font.bold = bold + style.paragraph_format.alignment = alignment + except: + # Style might already exist + pass + + def _clean_html(self, html: str) -> str: + """Clean and normalize HTML content""" + # Remove multiple spaces and newlines + html = re.sub(r"\s+", " ", html) + + # Ensure proper tag formatting + html = html.replace("
", "
").replace("
", "
") + + # Decode HTML entities + html = html_parser.unescape(html) + + return html.strip() + + def _parse_html(self, html: str): + """Parse HTML content and convert to DOCX""" + # Split by tags while preserving content + tokens = self._tokenize_html(html) + self._process_tokens(tokens) + + def _tokenize_html(self, html: str) -> List[Dict[str, Any]]: + """Tokenize HTML into manageable chunks""" + tokens = [] + pos = 0 + + while pos < len(html): + # Find next tag + tag_match = re.search(r"]*>", html[pos:]) + + if not tag_match: + # Add remaining text + if pos < len(html): + tokens.append({"type": "text", "content": html[pos:]}) + break + + tag_start = tag_match.start() + pos + tag_end = tag_match.end() + pos + + # Add text before tag + if tag_start > pos: + tokens.append({"type": "text", "content": html[pos:tag_start]}) + + # Add tag + tag_content = html[tag_start:tag_end] + is_closing = tag_content.startswith(""), + } + ) + + pos = tag_end + + return tokens + + def _process_tokens(self, tokens: List[Dict[str, Any]]): + """Process tokens and build document""" + stack = [] # Track open tags + + for token in tokens: + if token["type"] == "text": + self._add_text(token["content"], stack) + elif token["type"] == "tag": + if token["is_closing"]: + # Close tag + if stack and stack[-1]["name"] == token["name"]: + stack.pop() + self._handle_closing_tag(token["name"]) + else: + # Open tag + stack.append(token) + self._handle_opening_tag(token) + + def _handle_opening_tag(self, tag: Dict[str, Any]): + """Handle opening tag""" + tag_name = tag["name"] + + if tag_name in ["h1", "h2", "h3"]: + self._add_heading(tag_name) + elif tag_name == "br": + self._add_line_break() + elif tag_name == "p": + self._start_paragraph() + elif tag_name == "div": + if self.current_paragraph: + self.current_paragraph = None + elif tag_name == "ul": + self.in_list = True + elif tag_name == "li": + self._start_list_item() + + def _handle_closing_tag(self, tag_name: str): + """Handle closing tag""" + if tag_name in ["h1", "h2", "h3", "p"]: + self.current_paragraph = None + elif tag_name == "ul": + self.in_list = False + self.current_paragraph = None + + def _add_heading(self, level: str): + """Add heading based on level""" + self.current_paragraph = self.doc.add_paragraph() + + if level == "h1": + self.current_paragraph.style = "CV Title" + elif level == "h2": + self.current_paragraph.style = "CV Heading" + else: + self.current_paragraph.style = "CV Subheading" + + def _start_paragraph(self): + """Start a new paragraph""" + self.current_paragraph = self.doc.add_paragraph() + + def _start_list_item(self): + """Start a new list item""" + self.current_paragraph = self.doc.add_paragraph(style="List Bullet") + + def _add_line_break(self): + """Add line break""" + if self.current_paragraph: + self.current_paragraph.add_run().add_break(WD_BREAK.LINE) + else: + self.doc.add_paragraph() + + def _add_text(self, text: str, stack: List[Dict[str, Any]]): + """Add text with current formatting""" + if not text.strip(): + return + + # Create paragraph if none exists + if not self.current_paragraph: + self.current_paragraph = self.doc.add_paragraph() + + run = self.current_paragraph.add_run(text) + + # Apply formatting based on stack + self._apply_formatting(run, stack) + + def _apply_formatting(self, run, stack: List[Dict[str, Any]]): + """Apply formatting based on tag stack""" + font = run.font + font.name = "Calibri" + font.size = Pt(11) + + # Check for bold (strong, b) + bold_tags = ["strong", "b", "h1", "h2", "h3"] + if any(tag["name"] in bold_tags for tag in stack): + font.bold = True + + # Check for italic (em, i) + italic_tags = ["em", "i"] + if any(tag["name"] in italic_tags for tag in stack): + font.italic = True + + # Check for underline (u) + underline_tags = ["u"] + if any(tag["name"] in underline_tags for tag in stack): + font.underline = True + + +class AdvancedCVConverter(CVHTMLConverter): + """Enhanced converter with better CSS support and layout management""" + + def __init__(self): + super().__init__() + self.section_spacing = Pt(12) + self.current_style = {} + + def convert_cv_html(self, html_file_path: str, output_path: str) -> Document: + """ + Convert CV HTML file to DOCX with enhanced formatting + + Args: + html_file_path: Path to HTML file + output_path: Output DOCX path + """ + with open(html_file_path, "r", encoding="utf-8") as f: + html_content = f.read() + + return self.convert(html_content, output_path) + + def _extract_css_styles(self, html: str) -> Dict[str, Dict]: + """Extract CSS styles from style tags""" + styles = {} + style_matches = re.findall(r"]*>(.*?)", html, re.DOTALL) + + for style_content in style_matches: + # Parse CSS rules (simplified) + rules = re.findall(r"\.(\w+)\s*\{([^}]+)\}", style_content) + for class_name, properties in rules: + styles[class_name] = self._parse_css_properties(properties) + + return styles + + def _parse_css_properties(self, css: str) -> Dict[str, str]: + """Parse CSS properties into dictionary""" + properties = {} + declarations = css.split(";") + + for declaration in declarations: + if ":" in declaration: + prop, value = declaration.split(":", 1) + properties[prop.strip().lower()] = value.strip() + + return properties + + def _apply_css_style(self, run, style: Dict): + """Apply CSS-style formatting""" + if "font-weight" in style and "bold" in style["font-weight"]: + run.font.bold = True + + if "font-style" in style and "italic" in style["font-style"]: + run.font.italic = True + + if "text-align" in style: + if "center" in style["text-align"]: + run.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif "right" in style["text-align"]: + run.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT + + if "color" in style: + color = self._parse_css_color(style["color"]) + if color: + run.font.color.rgb = color + + def _parse_css_color(self, color_str: str) -> RGBColor: + """Parse CSS color string to RGBColor""" + # Handle hex colors + hex_match = re.match( + r"#([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})", color_str + ) + if hex_match: + r, g, b = [int(x, 16) for x in hex_match.groups()] + return RGBColor(r, g, b) + + # Handle rgb() colors + rgb_match = re.match(r"rgb\((\d+),\s*(\d+),\s*(\d+)\)", color_str) + if rgb_match: + r, g, b = [int(x) for x in rgb_match.groups()] + return RGBColor(r, g, b) + + return None + + +# Usage examples and helper functions +def create_cv_from_html_template(): + """Create a CV using our HTML template""" + converter = AdvancedCVConverter() + + # Example usage + with open("/home/skye/Downloads/MWG-CV.html", "r") as f: + html_content = f.read() + + return converter.convert(html_content, "professional_cv.docx") + + +def main(): + """Main demonstration function""" + print("Custom HTML to DOCX Converter for CVs") + print("=====================================") + + # Example 2: Create from our CV template + create_cv_from_html_template() + print("✓ Professional CV created successfully!") + + print("\nLibrary features:") + print("- Custom HTML parsing optimized for CVs") + print("- Professional styling and formatting") + print("- List and bullet point support") + print("- Heading hierarchy") + print("- Basic CSS style support") + print("- Robust error handling") + + +if __name__ == "__main__": + main() diff --git a/filewarp/core/html/core/__init__.py b/filewarp/core/html/core/__init__.py new file mode 100644 index 0000000..29adda4 --- /dev/null +++ b/filewarp/core/html/core/__init__.py @@ -0,0 +1,5 @@ +from .converter import HTML2Word +from .html_parser import HTMLParser +from .style_manager import StyleManager + +__all__ = ["HTML2Word", "HTMLParser", "StyleManager"] diff --git a/filewarp/core/html/core/converter.py b/filewarp/core/html/core/converter.py new file mode 100644 index 0000000..9da0745 --- /dev/null +++ b/filewarp/core/html/core/converter.py @@ -0,0 +1,692 @@ +""" +Main converter class that orchestrates the HTML to DOCX conversion +""" + +import re +from pathlib import Path +from docx import Document +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.oxml import parse_xml +from docx.oxml.ns import qn +from docx.shared import Inches +from typing import Dict, List +from docx.oxml import OxmlElement +from .html_parser import HTMLParser +from .style_manager import StyleManager +from ..utils.validation import validate_html, validate_file_path + + +class HTML2Word: + """Main converter class that coordinates HTML parsing and DOCX generation""" + + def __init__(self, default_font: str = "Calibri", default_size: int = 11): + self.doc = None + self.default_font = default_font + self.default_size = default_size + self.html_parser = HTMLParser() + self.style_manager = StyleManager(default_font, default_size) + + # Conversion state + self.current_paragraph = None + self.current_style = {} + self.tag_stack = [] + self.block_element = {} + + def __enter__(self): + self.block_element = self.html_parser.block_elements.copy().add("ol").add("ul") + + def convert(self, html_content: str, output_path: str) -> Document: + """ + Convert HTML content to DOCX document + + Args: + html_content: HTML string to convert + output_path: Path for output DOCX file + + Returns: + Document: The created Word document + """ + # Validate inputs + validate_html(html_content) + + output_path = ( + output_path.as_posix() if isinstance(output_path, Path) else output_path + ) + + validate_file_path(output_path, "output") + + # Initialize document + self.doc = Document() + self.style_manager.setup_document_styles(self.doc) + + # Parse HTML and extract styles + parsed_data = self.html_parser.parse(html_content) + + # Convert to DOCX + self._convert_elements(parsed_data["elements"], parsed_data["styles"]) + + # Save document + self.doc.save(output_path) + return self.doc + + def convert_file(self, html_file_path: str, output_path: str) -> Document: + """ + Convert HTML file to DOCX document + + Args: + html_file_path: Path to HTML file + output_path: Path for output DOCX file + + Returns: + Document: The created Word document + """ + validate_file_path(html_file_path, "input") + + with open(html_file_path, "r", encoding="utf-8") as f: + html_content = f.read() + + return self.convert(html_content, output_path) + + def _convert_elements(self, elements: List[Dict], styles: Dict): + """Convert parsed HTML elements to DOCX format""" + for element in elements: + self._convert_element(element, styles) + + def _convert_element(self, element: Dict, styles: Dict): + """Convert a single HTML element to DOCX""" + element_type = element["type"] + + if element_type == "text": + self._add_text_element(element, styles) + elif element_type == "element": + self._handle_html_element(element, styles) + + def _handle_html_element(self, element: Dict, styles: Dict): + """Handle HTML element based on tag type""" + tag_name = element["tag"].lower() + + # Push to stack for styling + self.tag_stack.append(element) + + is_block_element = tag_name in self.block_element + + try: + # For block elements, ensure we start a new paragraph context + if is_block_element and self.current_paragraph: + # Only start new paragraph if the current one has content + if self.current_paragraph.text.strip(): + self.current_paragraph = None + + # Check for grid/flex containers first + if tag_name in ("div", "section", "container"): + attributes = element.get("attributes", {}) + style_attr = attributes.get("style", "") + is_grid = "display:grid" in style_attr.replace(" ", "") + is_flex = "display:flex" in style_attr.replace(" ", "") + + if is_grid or is_flex: + self._handle_grid_container(element, styles) + else: + self._add_div(element, styles) + + elif tag_name in ["h1", "h2", "h3", "h4", "h5", "h6"]: + self._add_heading(element, styles) + elif tag_name == "p": + self._add_paragraph(element, styles) + elif tag_name == "table": + self._add_table(element, styles) + elif tag_name == "span": + self._add_span(element, styles) + elif tag_name == "br": + self._add_line_break() + elif tag_name == "hr": + self._add_horizontal_rule() + elif tag_name == "ul": + self._start_list(element, styles) + elif tag_name == "ol": + self._start_numbered_list(element, styles) + elif tag_name == "li": + self._add_list_item(element, styles) + elif tag_name == "strong" or tag_name == "b": + self._add_bold_text(element, styles) + elif tag_name == "em" or tag_name == "i": + self._add_italic_text(element, styles) + elif tag_name == "u": + self._add_underline_text(element, styles) + elif tag_name == "pre": + self._add_preformatted_text(element, styles) + elif tag_name == "tr": + self._add_table_row(element, styles) + elif tag_name == "td" or tag_name == "th": + self._add_table_cell(element, styles) + else: + # Process children for unknown tags + self._convert_elements(element.get("children", []), styles) + + finally: + # Always pop from stack + if self.tag_stack and self.tag_stack[-1] == element: + self.tag_stack.pop() + + # For block elements, ensure we clear the paragraph context after processing + if is_block_element: + self.current_paragraph = None + + def _add_preformatted_text(self, element: Dict, styles: Dict): + """Add preformatted text with preserved whitespace""" + if not self.current_paragraph: + self.current_paragraph = self.doc.add_paragraph() + + # Process all text content in pre tag + self._process_preformatted_content(element.get("children", []), styles) + self.current_paragraph = None + + def _process_preformatted_content(self, elements: List[Dict], styles: Dict): + """Process content for pre tags with preserved formatting""" + for element in elements: + if element["type"] == "text": + text = element.get("content", "") + if text: + # Preserve all whitespace in pre tags + run = self.current_paragraph.add_run(text) + self.style_manager.apply_styles_to_run(run, self.tag_stack, styles) + elif element["type"] == "element": + self._handle_html_element(element, styles) + + def _add_text_element(self, element: Dict, styles: Dict): + """Add text element with proper styling and line breaks""" + if not self.current_paragraph: + self.current_paragraph = self.doc.add_paragraph() + + text = element.get("content", "") + + # Handle text with line breaks + if "\n" in text: + lines = text.split("\n") + for i, line in enumerate(lines): + if line.strip(): # Only add non-empty lines + run = self.current_paragraph.add_run(line.strip()) + self.style_manager.apply_styles_to_run(run, self.tag_stack, styles) + + # Add line break except for the last line + if i < len(lines) - 1 and line.strip(): + self._add_line_break() + else: + # Single line of text + if text.strip(): + run = self.current_paragraph.add_run(text.strip()) + self.style_manager.apply_styles_to_run(run, self.tag_stack, styles) + + def _add_heading(self, element: Dict, styles: Dict): + """Add heading with appropriate level""" + level = element["tag"][1] # Extract number from h1, h2, etc. + self.current_paragraph = self.doc.add_paragraph() + + # Apply heading style + self.style_manager.apply_heading_style( + self.current_paragraph, level, element, styles + ) + + # Process children + self._convert_elements(element.get("children", []), styles) + + self.current_paragraph = None + + def _add_paragraph(self, element: Dict, styles: Dict): + """Add paragraph""" + self.current_paragraph = self.doc.add_paragraph() + self.style_manager.apply_paragraph_style( + self.current_paragraph, element, styles + ) + + # Process children + self._convert_elements(element.get("children", []), styles) + + self.current_paragraph = None + + def _add_div(self, element: Dict, styles: Dict): + """Add div element - ensure new paragraph for block-level elements""" + # For divs that contain block-level content, start a new paragraph + attributes = element.get("attributes", {}) + display_style = attributes.get("style", "") + try: + # Check for display: inline in styles + is_inline = "display:inline" in display_style + except AttributeError: + pass + + has_block_content = ( + any( + child.get("tag") in self.block_element + for child in element.get("children", []) + if child["type"] == "element" + ) + or is_inline + ) + + if has_block_content and self.current_paragraph: + self.current_paragraph = None + + self._convert_elements(element.get("children", []), styles) + + def _add_span(self, element: Dict, styles: Dict): + """Add span with inline styling""" + self._convert_elements(element.get("children", []), styles) + + def _add_line_break(self): + """Add a proper line break in Word document""" + if self.current_paragraph: + # Only add break if the paragraph has content + if self.current_paragraph.text.strip(): + self.current_paragraph.add_run().add_break() + else: + # If empty, add a space to maintain the break + self.current_paragraph.add_run(" ") + else: + # Create a new paragraph for the line break + self.current_paragraph = self.doc.add_paragraph() + self.current_paragraph.add_run(" ") + + # def _add_horizontal_rule(self): + """Add horizontal rule""" + # self.doc.add_paragraph().add_run("_" * 50) + + def _add_horizontal_rule(self): + """Add a proper horizontal rule/line""" + try: + # Create a new paragraph for the horizontal rule + hr_paragraph = self.doc.add_paragraph() + + # Add border to the paragraph to create the horizontal line + p_pr = hr_paragraph._p.get_or_add_pPr() + + # Create paragraph borders + p_borders = OxmlElement("w:pBdr") + + # Create bottom border for the horizontal line + bottom_border = OxmlElement("w:bottom") + bottom_border.set(qn("w:val"), "single") + bottom_border.set(qn("w:sz"), "6") # Line thickness (6 = 0.75 pt) + bottom_border.set(qn("w:space"), "1") # Spacing above the line + bottom_border.set(qn("w:color"), "auto") # Automatic color + + # Add the bottom border to the borders element + p_borders.append(bottom_border) + + # Add borders to paragraph properties + p_pr.append(p_borders) + + # Add some spacing after the horizontal rule + p_spacing = OxmlElement("w:spacing") + p_spacing.set(qn("w:after"), "120") # 120 twips = 6 points spacing after + p_pr.append(p_spacing) + + # Clear current paragraph context + self.current_paragraph = None + + except Exception as e: + # Fallback: create a simple horizontal line with underscores + fallback_paragraph = self.doc.add_paragraph() + fallback_paragraph.add_run("_" * 50) # Simple underscore line + self.current_paragraph = None + print(f"Horizontal rule fallback used: {e}") + + def _start_list(self, element: Dict, styles: Dict): + """Start unordered list""" + self._convert_elements(element.get("children", []), styles) + + def _start_numbered_list(self, element: Dict, styles: Dict): + """Start ordered list""" + self._convert_elements(element.get("children", []), styles) + + def _add_list_item(self, element: Dict, styles: Dict): + """Add list item""" + self.current_paragraph = self.doc.add_paragraph(style="List Bullet") + self._convert_elements(element.get("children", []), styles) + self.current_paragraph = None + + def _add_bold_text(self, element: Dict, styles: Dict): + """Add bold text""" + self._convert_elements(element.get("children", []), styles) + + def _add_italic_text(self, element: Dict, styles: Dict): + """Add italic text""" + self._convert_elements(element.get("children", []), styles) + + def _add_underline_text(self, element: Dict, styles: Dict): + """Add underline text""" + self._convert_elements(element.get("children", []), styles) + + # Table handling methods + def _add_table(self, element: Dict, styles: Dict): + """Create a new table""" + # Save current state + saved_paragraph = self.current_paragraph + + attributes = element.get("attributes", {}) + + # Calculate table dimensions + rows = self._count_table_rows(element) + cols = self._count_table_columns(element) + + try: + # Create table with calculated dimensions + if rows > 0 and cols > 0: + # Create table at current position + self.current_table = self.doc.add_table(rows=rows, cols=cols) + self.current_table.autofit = True + self.current_table.allow_autofit = True + + # Apply table styles + self._apply_table_styles(self.current_table, attributes, styles) + + # Reset row and cell counters + self.current_row_index = 0 + self.current_cell_index = 0 + + # Process table content + self._convert_elements(element.get("children", []), styles) + + # Clean up empty rows/cells if needed + self._cleanup_table() + + # Add a paragraph after the table for proper flow + self.current_paragraph = self.doc.add_paragraph() + + except Exception as e: + print(f"Table creation error: {e}") + # Fallback: add a simple paragraph + self.current_paragraph = self.doc.add_paragraph() + self.current_paragraph.add_run("[Table content]") + + finally: + # Restore or clear table context + self.current_table = None + # Restore paragraph context if it was saved + if saved_paragraph: + self.current_paragraph = saved_paragraph + + def _count_table_rows(self, table_element: Dict) -> int: + """Count the number of rows in the table""" + rows = 0 + for child in table_element.get("children", []): + if child.get("type") == "element" and child.get("tag", "").lower() == "tr": + rows += 1 + return max(rows, 1) # At least 1 row + + def _count_table_columns(self, table_element: Dict) -> int: + """Count the maximum number of columns in the table""" + max_cols = 0 + for child in table_element.get("children", []): + if child.get("type") == "element" and child.get("tag", "").lower() == "tr": + col_count = 0 + for cell in child.get("children", []): + if cell.get("type") == "element" and cell.get( + "tag", "" + ).lower() in [ + "td", + "th", + ]: + # Handle colspan + colspan = int(cell.get("attributes", {}).get("colspan", 1)) + col_count += colspan + max_cols = max(max_cols, col_count) + return max(max_cols, 1) # At least 1 column + + def _add_table_row(self, element: Dict, styles: Dict): + """Add a table row""" + if not self.current_table or self.current_row_index >= len( + self.current_table.rows + ): + return + + self.current_row = self.current_table.rows[self.current_row_index] + self.current_cell_index = 0 + + # Process row content + self._convert_elements(element.get("children", []), styles) + + self.current_row_index += 1 + self.current_row = None + + def _add_table_cell(self, element: Dict, styles: Dict): + """Add content to a table cell""" + if not self.current_row or self.current_cell_index >= len( + self.current_row.cells + ): + return + + cell = self.current_row.cells[self.current_cell_index] + tag_name = element.get("tag", "").lower() + attributes = element.get("attributes", {}) + + # Handle colspan and rowspan + colspan = int(attributes.get("colspan", 1)) + # rowspan = int(attributes.get("rowspan", 1)) + + # Apply cell styles + self._apply_cell_styles(cell, attributes, styles, tag_name == "th") + + # Save current paragraph context and switch to cell context + saved_paragraph = self.current_paragraph + self.current_paragraph = ( + cell.paragraphs[0] if cell.paragraphs else cell.add_paragraph() + ) + + # Process cell content + self._convert_elements(element.get("children", []), styles) + + # Restore paragraph context + self.current_paragraph = saved_paragraph + self.current_cell_index += colspan + + def _apply_table_styles(self, table, attributes: Dict, styles: Dict): + """Apply styles to the table""" + # Apply auto-fit by default + table.autofit = True + + # Apply specific width if provided + width_attr = attributes.get("width") or attributes.get("style", "") + if "width" in width_attr: + width_match = re.search(r"width:\s*(\d+)(px|%)", width_attr) + if width_match: + width_value = int(width_match.group(1)) + if width_match.group(2) == "%": + # Convert percentage to approximate width (Word doesn't support % directly) + table_width = Inches(6) # Approximate page width + width_value = int(table_width * width_value / 100) + # table.width = WidthDocx(width_value) # Would need proper width handling + + # Apply border styles + if "border" in attributes.get("style", ""): + self._apply_table_borders(table) + + def _apply_table_borders(self, table): + """Apply borders to table""" + try: + tbl = table._tbl + tblPr = tbl.tblPr + + # Add table borders + tblBorders = OxmlElement("w:tblBorders") + + for border_name in ["top", "left", "bottom", "right", "insideH", "insideV"]: + border = OxmlElement(f"w:{border_name}") + border.set(qn("w:val"), "single") + border.set(qn("w:sz"), "4") + border.set(qn("w:space"), "0") + border.set(qn("w:color"), "auto") + tblBorders.append(border) + + tblPr.append(tblBorders) + except Exception as e: + print(f"Table border styling failed: {e}") + + def _apply_cell_styles( + self, cell, attributes: Dict, styles: Dict, is_header: bool = False + ): + """Apply styles to table cell""" + # Set header styling + if is_header: + for paragraph in cell.paragraphs: + for run in paragraph.runs: + run.font.bold = True + + # Apply background color + bg_color = self._extract_background_color(attributes, styles) + if bg_color: + try: + shading_elm = parse_xml(f'') + cell._tc.get_or_add_tcPr().append(shading_elm) + except Exception: + pass + + # Apply text alignment + align = attributes.get("align") or self._extract_text_align(attributes, styles) + if align: + for paragraph in cell.paragraphs: + if align == "center": + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif align == "right": + paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT + elif align == "justify": + paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY + + def _extract_background_color(self, attributes: Dict, styles: Dict) -> str: + """Extract background color from attributes and styles""" + # Check inline style + style_attr = attributes.get("style", "") + bg_match = re.search(r"background-color:\s*(#[0-9a-fA-F]+|\w+)", style_attr) + if bg_match: + return bg_match.group(1) + + # Check class styles + class_attr = attributes.get("class", "") + if class_attr: + for class_name in class_attr.split(): + css_selector = f".{class_name}" + if ( + css_selector in styles + and "background-color" in styles[css_selector] + ): + return styles[css_selector]["background-color"] + + return None + + def _extract_text_align(self, attributes: Dict, styles: Dict) -> str: + """Extract text alignment from attributes and styles""" + # Check inline style + style_attr = attributes.get("style", "") + align_match = re.search(r"text-align:\s*(\w+)", style_attr) + if align_match: + return align_match.group(1) + + # Check class styles + class_attr = attributes.get("class", "") + if class_attr: + for class_name in class_attr.split(): + css_selector = f".{class_name}" + if css_selector in styles and "text-align" in styles[css_selector]: + return styles[css_selector]["text-align"] + + return None + + def _cleanup_table(self): + """Clean up empty table rows or cells""" + if not self.current_table: + return + + # Remove completely empty rows + rows_to_remove = [] + for i, row in enumerate(self.current_table.rows): + if all(cell.text.strip() == "" for cell in row.cells): + rows_to_remove.append(i) + + # Remove rows in reverse order to avoid index issues + for i in sorted(rows_to_remove, reverse=True): + try: + self.current_table._tbl.remove(self.current_table.rows[i]._tr) + except Exception: + pass + + def _handle_grid_container(self, element: Dict, styles: Dict): + """Handle grid container (simulate with table)""" + attributes = element.get("attributes", {}) + style_attr = attributes.get("style", "") + + # Check if this is a grid container + is_grid = "display:grid" in style_attr or "display: grid" in style_attr + is_flex = "display:flex" in style_attr or "display: flex" in style_attr + + if is_grid: + self._handle_css_grid(element, styles) + elif is_flex: + self._handle_flex_container(element, styles) + else: + self._convert_elements(element.get("children", []), styles) + + def _handle_css_grid(self, element: Dict, styles: Dict): + """Simulate CSS Grid with a table""" + attributes = element.get("attributes", {}) + style_attr = attributes.get("style", "") + + # Extract grid template columns + grid_cols = 1 + grid_template_match = re.search( + r"grid-template-columns:\s*(repeat\((\d+),\s*1fr\)|[\w\s\(\)]+)", style_attr + ) + if grid_template_match: + if "repeat" in grid_template_match.group(1): + grid_cols = int(grid_template_match.group(2)) + else: + # Count columns by splitting + cols = grid_template_match.group(1).split() + grid_cols = len(cols) + + # Count rows needed + children = [ + child + for child in element.get("children", []) + if child.get("type") == "element" + ] + grid_rows = (len(children) + grid_cols - 1) // grid_cols + + if grid_rows > 0 and grid_cols > 0: + # Create table to simulate grid + table = self.doc.add_table(rows=grid_rows, cols=grid_cols) + table.autofit = True + + # Fill table with grid items + child_index = 0 + for row in table.rows: + for cell in row.cells: + if child_index < len(children): + child = children[child_index] + # Process child element in cell + saved_paragraph = self.current_paragraph + self.current_paragraph = cell.paragraphs[0] + self._convert_element(child, styles) + self.current_paragraph = saved_paragraph + child_index += 1 + + def _handle_flex_container(self, element: Dict, styles: Dict): + """Handle flex container (simulate with table row)""" + attributes = element.get("attributes", {}) + + # Create a single-row table to simulate flex container + children = [ + child + for child in element.get("children", []) + if child.get("type") == "element" + ] + + if children: + table = self.doc.add_table(rows=1, cols=len(children)) + table.autofit = True + + for i, child in enumerate(children): + cell = table.rows[0].cells[i] + saved_paragraph = self.current_paragraph + self.current_paragraph = cell.paragraphs[0] + self._convert_element(child, styles) + self.current_paragraph = saved_paragraph diff --git a/filewarp/core/html/core/html_parser.py b/filewarp/core/html/core/html_parser.py new file mode 100644 index 0000000..58a6bfb --- /dev/null +++ b/filewarp/core/html/core/html_parser.py @@ -0,0 +1,390 @@ +""" +HTML parsing functionality with CSS style extraction +""" + +import re +import html as html_parser +from typing import Dict, List, Any, Tuple +from ..utils.validation import validate_html + + +class HTMLParser: + """Advanced HTML parser with CSS style extraction""" + + def __init__(self): + self.styles = {} + self.prev_element = {} # Store previous element for line breaks + + # Block-level elements that should have automatic line breaks + self.block_elements = { + "div", + "p", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + # "ul", + # "ol", + # "li", + "section", + "article", + "header", + "footer", + "nav", + "aside", + "main", + "figure", + "figcaption", + } + + def parse(self, html_content: str) -> Dict[str, Any]: + """ + Parse HTML content and extract structure and styles + + Returns: + Dict with 'elements' and 'styles' keys + """ + validate_html(html_content) + + # Clean HTML + cleaned_html = self._clean_html(html_content) + + # Extract CSS styles + self.styles = self._extract_styles(cleaned_html) + + # Remove style tags from content + content_html = self._remove_style_tags(cleaned_html) + + # Parse HTML structure + elements = self._parse_structure(content_html) + + return {"elements": elements, "styles": self.styles} + + def strip_comments(self, html): + # Remove HTML comments + html = re.sub(r"", "", html, flags=re.DOTALL) + + # Remove JS comments within blocks + html = re.sub( + r"(]*>)(.*?)()", + lambda m: m.group(1) + + re.sub(r"(?m)//.*?$|/\*.*?\*/", "", m.group(2), flags=re.DOTALL) + + m.group(3), + html, + flags=re.DOTALL | re.IGNORECASE, + ) + return html + + def _clean_html(self, html: str) -> str: + """Clean and normalize HTML content""" + # Remove + html = html.replace("", "") + html = html.replace("", "") + + # Remove comments + # html = re.sub(r"", "", html, flags=re.DOTALL) + # Remove js comments + # html = re.sub(r"(?m)//.*?$|/\*.*?\*/", "", html, flags=re.DOTALL) + html = self.strip_comments(html) + + # Preserve line breaks by replacing them with markers + html = re.sub(r"\n+", "", html) + # html = html.replace("-", "—") # Replace - with — + + # Remove title + html = re.sub( + r"]*>.*?", + "", + html, + flags=re.DOTALL | re.IGNORECASE, + ) + + # Remove multiple spaces but preserve single spaces + # html = re.sub(r"[ \t]+", " ", html) + + # Remove multiple spaces and newlines + html = re.sub(r"\s+", " ", html) + + # Ensure proper tag formatting + html = html.replace("
", "
").replace("
", "
") + + # Handle self-closing tags + html = re.sub(r"<(img|br|hr|input)([^>]*)(?", r"<\1\2/>", html) + + # Decode HTML entities + html = html_parser.unescape(html) + return html.strip() + + def _extract_styles(self, html: str) -> Dict[str, Dict]: + """Extract CSS styles from style tags and inline styles""" + styles = {} + + # Extract from style tags + style_matches = re.findall( + r"]*>(.*?)", html, re.DOTALL | re.IGNORECASE + ) + for style_content in style_matches: + styles.update(self._parse_css_rules(style_content)) + + return styles + + def _parse_css_rules(self, css_content: str) -> Dict[str, Dict]: + """Parse CSS rules into a dictionary""" + styles = {} + + # Remove comments + css_content = re.sub(r"/\*.*?\*/", "", css_content, flags=re.DOTALL) + + # Parse rules + rules = re.findall(r"([^{]+)\{([^}]+)\}", css_content) + + for selector, properties in rules: + selector = selector.strip() + style_dict = self._parse_css_properties(properties) + + if selector: + styles[selector] = style_dict + + return styles + + def _parse_css_properties(self, css_properties: str) -> Dict[str, str]: + """Parse CSS properties string into dictionary""" + properties = {} + declarations = [d.strip() for d in css_properties.split(";") if d.strip()] + + for declaration in declarations: + if ":" in declaration: + prop, value = declaration.split(":", 1) + prop = prop.strip().lower() + value = value.strip() + properties[prop] = value + + return properties + + def _remove_style_tags(self, html: str) -> str: + """Remove style tags from HTML""" + return re.sub( + r"]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE + ) + + def _parse_structure(self, html: str) -> List[Dict]: + """Parse HTML structure into a tree of elements and with automatic line breaks for block elements""" + tokens = self._tokenize_html(html) + elements, _ = self._build_element_tree(tokens) + return elements + + def _tokenize_html(self, html: str) -> List[Dict]: + """Tokenize HTML into tags and text while preserving line breaks""" + tokens = [] + pos = 0 + + # First, normalize line breaks and preserve them with markers + html = self._preserve_line_breaks(html) + + while pos < len(html): + # Find next tag + tag_match = re.search(r"]*)>", html[pos:]) + + if not tag_match: + # Add remaining text + if pos < len(html): + text_content = html[pos:] + text_content = self._restore_line_breaks(text_content) + if text_content.strip(): + tokens.append({"type": "text", "content": text_content}) + break + + tag_start = tag_match.start() + pos + tag_end = tag_match.end() + pos + + # Add text before tag + if tag_start > pos: + text_content = html[pos:tag_start] + text_content = self._restore_line_breaks(text_content) + if text_content.strip(): + tokens.append({"type": "text", "content": text_content}) + + # Extract tag information + full_tag = html[tag_start:tag_end] + tag_name = tag_match.group(1).lower() + attributes = self._parse_attributes(tag_match.group(2)) + is_closing = full_tag.startswith("") + + current_element = { + "type": "tag", + "name": tag_name, + "full_tag": full_tag, + "attributes": attributes, + "is_closing": is_closing, + "is_self_closing": is_self_closing, + } + + # Add automatic line break logic + self._add_auto_line_break(tokens, current_element) + + tokens.append(current_element) + self.prev_element = current_element + pos = tag_end + + return tokens + + def _add_auto_line_break(self, tokens: List[Dict], current_element: Dict): + """Automatically add line breaks between block elements when needed""" + if not self.prev_element: + return + + prev_name = self.prev_element.get("name", "") + current_name = current_element.get("name", "") + prev_is_closing = self.prev_element.get("is_closing", False) + current_is_closing = current_element.get("is_closing", False) + + # Case 1: Closing block element followed by another block element + if ( + prev_is_closing + and prev_name in self.block_elements + and not current_is_closing + and current_name in self.block_elements + ): + # Add line break between block elements + line_break = { + "type": "tag", + "name": "br", + "full_tag": "
", + "attributes": {}, + "is_closing": False, + "is_self_closing": True, + } + tokens.append(line_break) + self.prev_element = line_break + + # Case 2: Closing block element followed by text (content within same block) + elif ( + prev_is_closing + and prev_name in self.block_elements + and current_element["type"] == "text" + and current_element.get("content", "").strip() + ): + # This handles content that should be on new lines within the same block + line_break = { + "type": "tag", + "name": "br", + "full_tag": "
", + "attributes": {}, + "is_closing": False, + "is_self_closing": True, + } + tokens.append(line_break) + self.prev_element = line_break + + # Case 3: Text followed by opening block element + elif ( + self.prev_element["type"] == "text" + and not current_is_closing + and current_name in self.block_elements + ): + # Add line break before new block element + line_break = { + "type": "tag", + "name": "br", + "full_tag": "
", + "attributes": {}, + "is_closing": False, + "is_self_closing": True, + } + tokens.append(line_break) + self.prev_element = line_break + + def _preserve_line_breaks(self, html: str) -> str: + """Preserve line breaks by converting them to markers""" + # Replace line breaks with a unique marker that won't interfere with HTML parsing + html = html.replace("\r\n", "\n") # Normalize Windows line endings + html = html.replace("\r", "\n") # Normalize Mac line endings + + # Use a unique marker that won't appear in normal text + html = html.replace("\n", "⏎") # Using a special character as marker + return html + + def _restore_line_breaks(self, text: str) -> str: + """Restore line breaks from markers""" + return text.replace("⏎", "\n") + + def _parse_attributes(self, attribute_string: str) -> Dict[str, str]: + """Parse HTML attributes string into dictionary""" + attributes = {} + + # Find all attribute=value pairs + pattern = r'(\w+)\s*=\s*["\']([^"\']*)["\']' + matches = re.findall(pattern, attribute_string) + + for key, value in matches: + attributes[key.lower()] = value + + # Also look for boolean attributes + boolean_attrs = re.findall(r"(\w+)(?=\s+|>)", attribute_string) + for attr in boolean_attrs: + if attr.lower() not in attributes: + attributes[attr.lower()] = "true" + + return attributes + + def _add_multiple_line_breaks(self, count: int): + """Add multiple line breaks""" + if count <= 0: + return + + for i in range(count): + self._add_line_break() + + def _build_element_tree( + self, tokens: List[Dict], start_index: int = 0 + ) -> Tuple[List[Dict], int]: + """Build a tree structure from tokens""" + elements = [] + i = start_index + + while i < len(tokens): + token = tokens[i] + + if token["type"] == "text": + elements.append({"type": "text", "content": token["content"]}) + i += 1 + + elif token["type"] == "tag": + if token["is_closing"]: + # Return when we hit a closing tag + return elements, i + 1 + + elif token["is_self_closing"]: + # Self-closing tag - add as element with no children + elements.append( + { + "type": "element", + "tag": token["name"], + "attributes": token["attributes"], + "children": [], + } + ) + i += 1 + + else: + # Opening tag - recursively process children + child_elements, next_index = self._build_element_tree(tokens, i + 1) + + elements.append( + { + "type": "element", + "tag": token["name"], + "attributes": token["attributes"], + "children": child_elements, + } + ) + + i = next_index + + else: + i += 1 + + return elements, i diff --git a/filewarp/core/html/core/style_manager.py b/filewarp/core/html/core/style_manager.py new file mode 100644 index 0000000..3c476dc --- /dev/null +++ b/filewarp/core/html/core/style_manager.py @@ -0,0 +1,311 @@ +""" +Style management and application for DOCX elements +""" + +from docx import Document +from docx.shared import Pt, Inches, RGBColor +from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING +from docx.oxml.ns import qn +from typing import Dict, List, Any +import re + +from ..utils.color_utils import ColorConverter + + +class StyleManager: + """Manages styles and applies them to DOCX elements""" + + def __init__(self, default_font: str = "Calibri", default_size: int = 11): + self.default_font = default_font + self.default_size = default_size + self.color_converter = ColorConverter() + + # Style mappings + self.heading_styles = { + "1": {"size": 16, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.CENTER}, + "2": {"size": 14, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "3": {"size": 12, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "4": {"size": 11, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "5": {"size": 11, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + "6": {"size": 11, "bold": True, "alignment": WD_ALIGN_PARAGRAPH.LEFT}, + } + + def setup_document_styles(self, doc: Document): + """Setup default document styles""" + # Set normal style + style = doc.styles["Normal"] + font = style.font + font.name = self.default_font + font.size = Pt(self.default_size) + + # Create custom styles + self._create_cv_styles(doc) + + def _create_cv_styles(self, doc: Document): + """Create custom styles for CV""" + styles_config = { + "Title": { + "size": 16, + "bold": True, + "alignment": WD_ALIGN_PARAGRAPH.CENTER, + }, + "Heading": { + "size": 14, + "bold": True, + "alignment": WD_ALIGN_PARAGRAPH.LEFT, + }, + "Subheading": { + "size": 12, + "bold": True, + "alignment": WD_ALIGN_PARAGRAPH.LEFT, + }, + "Contact": { + "size": 10, + "bold": False, + "alignment": WD_ALIGN_PARAGRAPH.CENTER, + }, + } + + for style_name, config in styles_config.items(): + try: + style = doc.styles.add_style(style_name, 1) # WD_STYLE_TYPE.PARAGRAPH + font = style.font + font.name = self.default_font + font.size = Pt(config["size"]) + font.bold = config["bold"] + style.paragraph_format.alignment = config["alignment"] + except ValueError: + # Style might already exist + pass + + def apply_styles_to_run(self, run, tag_stack: List[Dict], styles: Dict): + """Apply styles to a text run based on tag stack and CSS styles""" + # Apply basic font + run.font.name = self.default_font + run.font.size = Pt(self.default_size) + + # Apply styles from tag stack and CSS + self._apply_inline_styles(run, tag_stack, styles) + self._apply_css_styles(run, tag_stack, styles) + + def _apply_inline_styles(self, run, tag_stack: List[Dict], styles: Dict): + """Apply inline styles from HTML attributes""" + for element in tag_stack: + if element.get("type") == "element": + attributes = element.get("attributes", {}) + style_attr = attributes.get("style", "") + + if style_attr: + self._apply_style_attribute(run, style_attr) + + def _apply_css_styles(self, run, tag_stack: List[Dict], styles: Dict): + """Apply CSS styles from style definitions""" + for element in tag_stack: + if element.get("type") == "element": + tag_name = element.get("tag", "") + attributes = element.get("attributes", {}) + + # Check for class-based styles + class_attr = attributes.get("class", "") + if class_attr: + for class_name in class_attr.split(): + css_selector = f".{class_name}" + if css_selector in styles: + self._apply_css_properties(run, styles[css_selector]) + + # Check for tag-based styles + tag_selector = tag_name + if tag_selector in styles: + self._apply_css_properties(run, styles[tag_selector]) + + def _apply_style_attribute(self, run, style_attr: str): + """Apply style attribute to run""" + properties = self._parse_style_attribute(style_attr) + self._apply_css_properties(run, properties) + + def _parse_style_attribute(self, style_attr: str) -> Dict[str, str]: + """Parse style attribute string into properties dictionary""" + properties = {} + declarations = [d.strip() for d in style_attr.split(";") if d.strip()] + + for declaration in declarations: + if ":" in declaration: + prop, value = declaration.split(":", 1) + properties[prop.strip().lower()] = value.strip() + + return properties + + def _apply_css_properties(self, run, properties: Dict[str, str]): + """Apply CSS properties to a run""" + for prop, value in properties.items(): + try: + if prop == "font-weight": + if value in ["bold", "bolder", "700", "800", "900"]: + run.font.bold = True + + elif prop == "font-style": + if value == "italic": + run.font.italic = True + + elif prop == "text-decoration": + if "underline" in value: + run.font.underline = True + + elif prop == "color": + color = self.color_converter.parse_color(value) + if color: + run.font.color.rgb = color + + elif prop == "font-size": + size = self._parse_font_size(value) + if size: + run.font.size = Pt(size) + + elif prop == "font-family": + run.font.name = value.split(",")[0].strip().strip("\"'") + + elif prop == "background-color": + # Word doesn't directly support background color for text runs + # This would require more complex handling with shading + pass + + except Exception: + # Continue with other properties if one fails + continue + + def _parse_font_size(self, size_str: str) -> float: + """Parse font size string to points""" + try: + # Handle pixel values (approximate conversion: 1px ≈ 0.75pt) + if "px" in size_str: + return float(size_str.replace("px", "").strip()) * 0.75 + + # Handle point values + elif "pt" in size_str: + return float(size_str.replace("pt", "").strip()) + + # Handle em values (approximate) + elif "em" in size_str: + return float(size_str.replace("em", "").strip()) * self.default_size + + # Handle percentage + elif "%" in size_str: + return ( + float(size_str.replace("%", "").strip()) / 100 + ) * self.default_size + + # Handle named sizes + elif size_str in ["xx-small", "x-small", "small", "medium"]: + return self.default_size + elif size_str == "large": + return self.default_size * 1.2 + elif size_str == "x-large": + return self.default_size * 1.5 + elif size_str == "xx-large": + return self.default_size * 2 + + # Assume points if no unit + else: + return float(size_str) + + except (ValueError, TypeError): + return None + + def apply_heading_style(self, paragraph, level: str, element: Dict, styles: Dict): + """Apply heading style to paragraph""" + # Apply basic heading style + if level in self.heading_styles: + config = self.heading_styles[level] + paragraph.style = self._get_heading_style_name(level) + + # Apply additional CSS styles + self._apply_paragraph_css_styles(paragraph, element, styles) + + def apply_paragraph_style(self, paragraph, element: Dict, styles: Dict): + """Apply styles to paragraph""" + self._apply_paragraph_css_styles(paragraph, element, styles) + + def _apply_paragraph_css_styles(self, paragraph, element: Dict, styles: Dict): + """Apply CSS styles to paragraph""" + attributes = element.get("attributes", {}) + + # Check for inline styles + style_attr = attributes.get("style", "") + if style_attr: + properties = self._parse_style_attribute(style_attr) + self._apply_paragraph_css_properties(paragraph, properties) + + # Check for class-based styles + class_attr = attributes.get("class", "") + if class_attr: + for class_name in class_attr.split(): + css_selector = f".{class_name}" + if css_selector in styles: + self._apply_paragraph_css_properties( + paragraph, styles[css_selector] + ) + + def _apply_paragraph_css_properties(self, paragraph, properties: Dict[str, str]): + """Apply CSS properties to paragraph""" + for prop, value in properties.items(): + try: + if prop == "text-align": + if value == "center": + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + elif value == "right": + paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT + elif value == "justify": + paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY + else: + paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT + + elif prop == "margin" or prop == "margin-top": + # Convert margin to spacing + margin = self._parse_size_value(value) + if margin: + paragraph.paragraph_format.space_after = Pt(margin) + + elif prop == "margin-bottom": + margin = self._parse_size_value(value) + if margin: + paragraph.paragraph_format.space_after = Pt(margin) + + elif prop == "line-height": + if value == "normal": + paragraph.paragraph_format.line_spacing_rule = ( + WD_LINE_SPACING.SINGLE + ) + else: + try: + line_height = float(value) + paragraph.paragraph_format.line_spacing = line_height + except ValueError: + pass + + except Exception: + continue + + def _parse_size_value(self, size_str: str) -> float: + """Parse size value to points""" + try: + if "px" in size_str: + return float(size_str.replace("px", "").strip()) * 0.75 + elif "pt" in size_str: + return float(size_str.replace("pt", "").strip()) + elif "em" in size_str: + return float(size_str.replace("em", "").strip()) * self.default_size + else: + return float(size_str) + except (ValueError, TypeError): + return None + + def _get_heading_style_name(self, level: str) -> str: + """Get appropriate style name for heading level""" + if level == "1": + return "Title" + elif level == "2": + return "Heading" + elif level == "3": + return "Subheading" + else: + return "Normal" diff --git a/filewarp/core/html/examples/__init__.py b/filewarp/core/html/examples/__init__.py new file mode 100644 index 0000000..5c612b4 --- /dev/null +++ b/filewarp/core/html/examples/__init__.py @@ -0,0 +1,3 @@ +from .cv_templates import CVTemplates + +__all__ = ["CVTemplates"] diff --git a/filewarp/core/html/examples/templates.py b/filewarp/core/html/examples/templates.py new file mode 100644 index 0000000..d4e3261 --- /dev/null +++ b/filewarp/core/html/examples/templates.py @@ -0,0 +1,228 @@ +""" +Example CV templates for testing and demonstration +""" + + +class Templates: + """Collection of CV HTML templates""" + + @staticmethod + def get_basic_cv(): + """Get basic CV template""" + return """ + + + + + + +
+

MWANGANGI KALOVWE

+
+ Phone: 0769330481 | Email: kalovwemwangangi18@gmail.com
+ Address: Kabati, Mutonguni Ward, Kitui County | Postal: 9-90203, Tulia +
+
+ +
+
PROFESSIONAL SUMMARY
+

Detail-oriented Electrical and Electronics Technician with specialized training in power systems and hands-on experience in geothermal power plant operations. Skilled in electrical system maintenance, troubleshooting, and circuit analysis.

+
+ +
+
EDUCATION
+

+ 2021 - 2024
+ Ikutha Technical and Vocational College
+ Diploma in Electrical and Electronics (Power Option)
+ Completed: April 3, 2024 +

+
+ +
+
PROFESSIONAL EXPERIENCE
+

+ May 2023 - July 2023
+ KenGen - Olkaria Geothermal Power Plants
+ Electrical Maintenance Intern +

+
    +
  • Performed maintenance of electrical systems and power distribution equipment
  • +
  • Maintained turbine generators and auxiliary systems
  • +
  • Conducted battery maintenance and testing
  • +
+
+ + + """ + + @staticmethod + def get_advanced_template(): + """Get advanced template with more styling""" + return """ + + + + + + +
+
MWANGANGI KALOVWE
+
+ 📞 0769330481 | ✉️ kalovwemwangangi18@gmail.com
+ 📍 Kabati, Mutonguni Ward, Kitui County | 📮 9-90203, Tulia +
+
+ +
+
Professional Summary
+

+ Detail-oriented Electrical and Electronics Technician with specialized training in power systems + and hands-on experience in geothermal power plant operations. Skilled in electrical system maintenance, + troubleshooting, and circuit analysis. Seeking to leverage technical expertise and problem-solving + abilities in a challenging electrical engineering role. +

+
+ +
+
Education
+ +
+
2021 - 2024
+
Ikutha Technical and Vocational College
+
Diploma in Electrical and Electronics (Power Option)
+
Completed: April 3, 2024
+
+ +
+
January 2016 - November 2019
+
Kea Secondary School
+
Kenya Certificate of Secondary Education (KCSE)
+
Mean Grade: C- (Minus)
+
+
+ +
+
Technical Skills
+
+
Electrical System Maintenance
+
Power System Operations
+
Circuit Analysis
+
PLC Programming
+
Solar Installation
+
Transformer Maintenance
+
Battery Systems
+
Technical Reporting
+
+
+ + + """ diff --git a/filewarp/core/html/styles/__init__.py b/filewarp/core/html/styles/__init__.py new file mode 100644 index 0000000..208878b --- /dev/null +++ b/filewarp/core/html/styles/__init__.py @@ -0,0 +1,4 @@ +from .css_parser import CSSParser +from .style_applier import StyleApplier + +__all__ = ["CSSParser", "StyleApplier"] diff --git a/filewarp/core/html/styles/css_parser.py b/filewarp/core/html/styles/css_parser.py new file mode 100644 index 0000000..cbbffda --- /dev/null +++ b/filewarp/core/html/styles/css_parser.py @@ -0,0 +1,69 @@ +""" +Advanced CSS parsing functionality +""" + +import re +from typing import Dict, List + + +class CSSParser: + """Advanced CSS parser with support for various CSS features""" + + def __init__(self): + self.styles = {} + + def parse_css(self, css_content: str) -> Dict[str, Dict]: + """Parse CSS content into style dictionary""" + # Remove comments + css_content = re.sub(r"/\*.*?\*/", "", css_content, flags=re.DOTALL) + + # Parse rules + rules = re.findall(r"([^{]+)\{([^}]+)\}", css_content) + + for selector, properties in rules: + selector = selector.strip() + style_dict = self._parse_properties(properties) + + if selector: + self.styles[selector] = style_dict + + return self.styles + + def _parse_properties(self, properties: str) -> Dict[str, str]: + """Parse CSS properties string""" + style_dict = {} + declarations = [d.strip() for d in properties.split(";") if d.strip()] + + for declaration in declarations: + if ":" in declaration: + prop, value = declaration.split(":", 1) + prop = prop.strip().lower() + value = value.strip() + style_dict[prop] = value + + return style_dict + + def get_styles_for_element( + self, tag: str, classes: List[str] = None, element_id: str = None + ) -> Dict[str, str]: + """Get combined styles for an element based on tag, classes, and ID""" + combined_styles = {} + + # Tag styles + if tag in self.styles: + combined_styles.update(self.styles[tag]) + + # Class styles + if classes: + for class_name in classes: + class_selector = f".{class_name}" + if class_selector in self.styles: + combined_styles.update(self.styles[class_selector]) + + # ID styles + if element_id: + id_selector = f"#{element_id}" + if id_selector in self.styles: + combined_styles.update(self.styles[id_selector]) + + return combined_styles diff --git a/filewarp/core/html/styles/style_applier.py b/filewarp/core/html/styles/style_applier.py new file mode 100644 index 0000000..feb7bb6 --- /dev/null +++ b/filewarp/core/html/styles/style_applier.py @@ -0,0 +1,83 @@ +""" +Style application logic for different CSS properties +""" + +from docx.shared import Pt, RGBColor +from docx.enum.text import WD_ALIGN_PARAGRAPH +from typing import Dict +import re + +from ..utils.color_utils import ColorConverter + + +class StyleApplier: + """Applies CSS styles to DOCX elements""" + + def __init__(self): + self.color_converter = ColorConverter() + + def apply_text_styles(self, run, styles: Dict[str, str]): + """Apply text-related styles to a run""" + for prop, value in styles.items(): + self._apply_text_style(run, prop, value) + + def _apply_text_style(self, run, prop: str, value: str): + """Apply a single text style property""" + try: + if prop == "color": + color = self.color_converter.parse_color(value) + if color: + run.font.color.rgb = color + + elif prop == "font-size": + size = self._parse_font_size(value) + if size: + run.font.size = Pt(size) + + elif prop == "font-family": + run.font.name = value.split(",")[0].strip().strip("\"'") + + elif prop == "font-weight": + if value in ["bold", "bolder", "700", "800", "900"]: + run.font.bold = True + elif value in ["normal", "lighter", "400"]: + run.font.bold = False + + elif prop == "font-style": + if value == "italic": + run.font.italic = True + elif value == "normal": + run.font.italic = False + + elif prop == "text-decoration": + if "underline" in value: + run.font.underline = True + if "line-through" in value: + run.font.strike = True + + elif prop == "text-transform": + if value == "uppercase": + run.text = run.text.upper() + elif value == "lowercase": + run.text = run.text.lower() + elif value == "capitalize": + run.text = run.text.title() + + except Exception: + pass + + def _parse_font_size(self, size_str: str) -> float: + """Parse font size to points""" + try: + if "px" in size_str: + return float(size_str.replace("px", "").strip()) * 0.75 + elif "pt" in size_str: + return float(size_str.replace("pt", "").strip()) + elif "em" in size_str: + return float(size_str.replace("em", "").strip()) * 11 # Default size + elif "%" in size_str: + return (float(size_str.replace("%", "").strip()) / 100) * 11 + else: + return float(size_str) + except (ValueError, TypeError): + return None diff --git a/filewarp/core/html/tests.py b/filewarp/core/html/tests.py new file mode 100644 index 0000000..c4dda59 --- /dev/null +++ b/filewarp/core/html/tests.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +""" +Test script for the CV Converter library +""" + +import os +import sys + +# Add the library to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "cv_converter")) + +from filewarp.core.html import HTML2Word +from filewarp.core.html.examples.templates import Templates + + +def test_basic_conversion(): + """Test basic conversion""" + print("Testing basic CV conversion...") + + converter = HTML2Word() + html_content = Templates.get_basic_template() + + converter.convert(html_content, "test_basic_cv.docx") + print("✓ Basic CV created: test_basic_cv.docx") + + +def test_advanced_conversion(): + """Test advanced conversion with styling""" + print("Testing advanced CV conversion...") + + converter = HTML2Word() + html_content = Templates.get_advanced_cv() + + converter.convert(html_content, "test_advanced_cv.docx") + print("✓ Advanced CV created: test_advanced_cv.docx") + + +def test_file_conversion(): + """Test conversion from HTML file""" + print("Testing file-based conversion...") + + # Create test HTML file + with open("test_cv.html", "w", encoding="utf-8") as f: + f.write(Templates.get_basic_template()) + + converter = HTML2Word() + converter.convert_file("test_cv.html", "test_file_cv.docx") + print("✓ File-based CV created: test_file_cv.docx") + + +def main(): + """Run all tests""" + print("CV Converter Library Test Suite") + print("=" * 40) + + try: + test_basic_conversion() + test_advanced_conversion() + test_file_conversion() + + print("\n" + "=" * 40) + print("All tests completed successfully! 🎉") + print("\nGenerated files:") + for file in [ + "test_basic_cv.docx", + "test_advanced_cv.docx", + "test_file_cv.docx", + ]: + if os.path.exists(file): + print(f" - {file}") + + except Exception as e: + print(f"\n❌ Error during testing: {e}") + import traceback + + traceback.print_exc() + + +if __name__ == "__main__": + # main() + converter = HTML2Word() + converter.convert_file("/home/skye/Downloads/MWG-CV.html", "test.docx") diff --git a/filewarp/core/html/utils/__init__.py b/filewarp/core/html/utils/__init__.py new file mode 100644 index 0000000..d779482 --- /dev/null +++ b/filewarp/core/html/utils/__init__.py @@ -0,0 +1,9 @@ +from .color_utils import ColorConverter +from .validation import validate_css, validate_html, validate_file_path + +__all__ = [ + "ColorConverter", + "validate_css", + "validate_html", + "validate_file_path", +] diff --git a/filewarp/core/html/utils/color_utils.py b/filewarp/core/html/utils/color_utils.py new file mode 100644 index 0000000..a13d09c --- /dev/null +++ b/filewarp/core/html/utils/color_utils.py @@ -0,0 +1,121 @@ +""" +Color conversion and parsing utilities +""" + +import re +from docx.shared import RGBColor +from typing import Optional + + +class ColorConverter: + """Converts various color formats to RGBColor""" + + def __init__(self): + self.named_colors = { + "black": RGBColor(0, 0, 0), + "white": RGBColor(255, 255, 255), + "red": RGBColor(255, 0, 0), + "green": RGBColor(0, 128, 0), + "blue": RGBColor(0, 0, 255), + "yellow": RGBColor(255, 255, 0), + "cyan": RGBColor(0, 255, 255), + "magenta": RGBColor(255, 0, 255), + "gray": RGBColor(128, 128, 128), + "grey": RGBColor(128, 128, 128), + "orange": RGBColor(255, 165, 0), + "purple": RGBColor(128, 0, 128), + "brown": RGBColor(165, 42, 42), + "pink": RGBColor(255, 192, 203), + "navy": RGBColor(0, 0, 128), + "teal": RGBColor(0, 128, 128), + "olive": RGBColor(128, 128, 0), + "maroon": RGBColor(128, 0, 0), + "silver": RGBColor(192, 192, 192), + "lime": RGBColor(0, 255, 0), + "aqua": RGBColor(0, 255, 255), + "fuchsia": RGBColor(255, 0, 255), + } + + def parse_color(self, color_str: str) -> Optional[RGBColor]: + """ + Parse color string and return RGBColor + + Supports: + - Hex: #RRGGBB, #RGB + - RGB: rgb(r, g, b) + - RGBA: rgba(r, g, b, a) - alpha ignored + - Named colors: red, blue, etc. + """ + if not color_str: + return None + + color_str = color_str.strip().lower() + + # Named colors + if color_str in self.named_colors: + return self.named_colors[color_str] + + # Hex colors + hex_match = re.match(r"#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})", color_str) + if hex_match: + r, g, b = [int(x, 16) for x in hex_match.groups()] + return RGBColor(r, g, b) + + # Short hex colors + short_hex_match = re.match(r"#([0-9a-f])([0-9a-f])([0-9a-f])", color_str) + if short_hex_match: + r, g, b = [int(x * 2, 16) for x in short_hex_match.groups()] + return RGBColor(r, g, b) + + # RGB colors + rgb_match = re.match(r"rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", color_str) + if rgb_match: + r, g, b = [int(x) for x in rgb_match.groups()] + return RGBColor(r, g, b) + + # RGBA colors (ignore alpha) + rgba_match = re.match( + r"rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)", color_str + ) + if rgba_match: + r, g, b = [int(x) for x in rgba_match.groups()[:3]] + return RGBColor(r, g, b) + + # HSL colors (basic conversion) + hsl_match = re.match(r"hsl\(\s*(\d+)\s*,\s*(\d+)%\s*,\s*(\d+)%\s*\)", color_str) + if hsl_match: + h, s, l = [int(x) for x in hsl_match.groups()] + return self._hsl_to_rgb(h, s, l) + + return None + + def _hsl_to_rgb(self, h: int, s: int, l: int) -> RGBColor: + """Convert HSL color to RGB (simplified)""" + # Normalize values + h = h % 360 + s = max(0, min(100, s)) / 100 + l = max(0, min(100, l)) / 100 + + # Simplified conversion + c = (1 - abs(2 * l - 1)) * s + x = c * (1 - abs((h / 60) % 2 - 1)) + m = l - c / 2 + + if 0 <= h < 60: + r, g, b = c, x, 0 + elif 60 <= h < 120: + r, g, b = x, c, 0 + elif 120 <= h < 180: + r, g, b = 0, c, x + elif 180 <= h < 240: + r, g, b = 0, x, c + elif 240 <= h < 300: + r, g, b = x, 0, c + else: + r, g, b = c, 0, x + + r = int((r + m) * 255) + g = int((g + m) * 255) + b = int((b + m) * 255) + + return RGBColor(r, g, b) diff --git a/filewarp/core/html/utils/validation.py b/filewarp/core/html/utils/validation.py new file mode 100644 index 0000000..4a0d2d6 --- /dev/null +++ b/filewarp/core/html/utils/validation.py @@ -0,0 +1,93 @@ +""" +Validation utilities for the converter +""" + +import os +import re +from pathlib import Path + + +def validate_html(html_content: str) -> bool: + """ + Validate HTML content + + Args: + html_content: HTML string to validate + + Returns: + bool: True if valid + + Raises: + ValueError: If HTML content is invalid + """ + if not html_content or not isinstance(html_content, str): + raise ValueError("HTML content must be a non-empty string") + + if len(html_content.strip()) == 0: + raise ValueError("HTML content cannot be empty or whitespace only") + + # Basic check for HTML tags + if not re.search(r"<[^>]+>", html_content): + raise ValueError("HTML content must contain valid HTML tags") + + return True + + +def validate_file_path(file_path: str, file_type: str = "input") -> bool: + """ + Validate file path + + Args: + file_path: Path to validate + file_type: Type of file ('input' or 'output') + + Returns: + bool: True if valid + + Raises: + ValueError: If file path is invalid + FileNotFoundError: If input file doesn't exist + """ + if ( + not file_path + or not isinstance(file_path, str) + and not isinstance(file_path, Path) + ): + raise ValueError(f"{file_type} file path must be a non-empty string") + + if file_type == "input": + if not os.path.exists(file_path): + raise FileNotFoundError(f"Input file not found: {file_path}") + + if not os.path.isfile(file_path): + raise ValueError(f"Input path is not a file: {file_path}") + + elif file_type == "output": + output_dir = os.path.dirname(file_path) + if output_dir and not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except OSError as e: + raise ValueError(f"Cannot create output directory: {e}") + + # Check file extension + if not file_path.lower().endswith((".html", ".htm", ".docx")): + raise ValueError(f"File must have .html, .htm, or .docx extension: {file_path}") + + return True + + +def validate_css(css_content: str) -> bool: + """ + Validate CSS content + + Args: + css_content: CSS string to validate + + Returns: + bool: True if valid + """ + if not css_content or not isinstance(css_content, str): + raise ValueError("CSS content must be a non-empty string") + + return True diff --git a/filewarp/core/image/core.py b/filewarp/core/image/core.py new file mode 100644 index 0000000..1b2ebb1 --- /dev/null +++ b/filewarp/core/image/core.py @@ -0,0 +1,770 @@ +import shutil +from reportlab.pdfgen import canvas +from reportlab.lib.pagesizes import letter +import re +from pathlib import Path +from docx.shared import Inches, Mm +from docx import Document +import os +import sys +from tqdm import tqdm +from PIL import Image +import cv2 +from typing import List, Tuple, Union, Optional +from ...utils.simple import logger +from ...utils.decorators import Decorators +from ...utils.formats import SUPPORTED_IMAGE_FORMATS +from ...utils.file_utils import modify_filename_if_exists, DirectoryScanner +from ...utils.colors import fg, rs + +RESET = rs + + +class ImageCompressor: + def __init__(self, input_image_path): + self.input_image_path = input_image_path + + def resize_image(self, target_size): + try: + input_image_path = self.input_image_path + ext = input_image_path[-3:] + output_image_path = ( + os.path.splitext(input_image_path)[0] + f"_resized.{ext}" + ) + + original_image = Image.open(input_image_path) + original_size = original_image.size + size = os.path.getsize(input_image_path) + print(f"Original image size {fg.YELLOW}{size / 1000_000:.2f}MiB{RESET}") + + # Calculate the aspect ratio of the original image + aspect_ratio = original_size[0] / original_size[1] + + # Convert the target sixze to bytes + tz = int(target_size[:-2]) + if target_size[-2:].lower() == "mb": + target_size_bytes = tz * 1024 * 1024 + elif target_size[-2:].lower() == "kb": + target_size_bytes = tz * 1024 + else: + logger.warning( + f"Invalid units. Please use either {fg.BMAGENTA}'MB'{RESET}\ + or {fg.BMAGENTA}'KB'{RESET}" + ) + + # Calculate the new dimensions based on the target size + new_width, new_height = ImageCompressor.calculate_new_dimensions( + original_size, aspect_ratio, target_size_bytes + ) + resized_image = original_image.resize((new_width, new_height)) + resized_image.save(output_image_path, optimize=True, format="png") + t_size = os.path.getsize(output_image_path) / 1000_000 + + except KeyboardInterrupt: + print("\nQuit⏹️") + sys.exit(1) + except KeyError: + print("KeyError") + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes): + try: + # Calculate the new dimensions based on the target size in bytes + original_size_bytes = ( + original_size[0] * original_size[1] * 3 + ) # Assuming 24-bit color depth + scale_factor = (target_size_bytes / original_size_bytes) ** 0.5 + + new_width = int(original_size[0] * scale_factor) + new_height = int(original_size[1] * scale_factor) + + return new_width, new_height + except KeyboardInterrupt: + print("\nQuit⏹️") + sys.exit(1) + except KeyError: + print("KeyError") + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + +class ImageConverter: + """Convert images file to from one format to another""" + + def __init__(self, input_file, out_format): + self.input_file = input_file + self.out_format = out_format + + def preprocess(self) -> list: + try: + files_to_process = [] + + if os.path.isfile(self.input_file): + files_to_process.append(self.input_file) + elif os.path.isdir(self.input_file): + if os.listdir(self.input_file) is None: + print("Cannot work with empty folder") + sys.exit(1) + for file in os.listdir(self.input_file): + file_path = os.path.join(self.input_file, file) + if os.path.isfile(file_path): + files_to_process.append(file_path) + + return files_to_process + except FileNotFoundError: + print("File not found❕") + sys.exit(1) + + def convert_image(self) -> os.PathLike: + try: + input_list = self.preprocess() + out_f = self.out_format.upper() + out_f = "JPEG" if out_f == "JPG" else out_f + input_list = [ + item + for item in input_list + if any( + item.lower().endswith(ext) + for ext in SUPPORTED_IMAGE_FORMATS.values() + ) + ] + + for file in tqdm(input_list, unit="files"): + if out_f.upper() in SUPPORTED_IMAGE_FORMATS: + _ = os.path.splitext(file)[0] + output_filename = _ + SUPPORTED_IMAGE_FORMATS[out_f].lower() + else: + print("Unsupported output format") + sys.exit(1) + """Load the image using OpenCV: """ + img = cv2.imread(file) + """Convert the OpenCV image to a PIL image: """ + pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) + + pil_img.save(output_filename, out_f) + + return output_filename + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except AssertionError: + print("Assertion failed.") + except KeyError: + print( + f"{fg.RED}ERROR:\tPending Implementation for{fg.ICYAN} {out_f} {fg.BWHITE}format{RESET}" + ) + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + +class GrayscaleConverter: + """ + Class for converting images to grayscale and saving the processed output. + + Attributes: + input_obj (Optional[Union[list[str], str, os.PathLike]]): Input file(s) or directory. + output_file (Optional[Union[list[str], str, os.PathLike]]): Output file path or directory. + """ + + def __init__( + self, + input_obj: Union[List[str], Tuple[str], str, os.PathLike], + output_file: Optional[Union[list[str], str, os.PathLike]] = None, + ): + """ + Initializes the GrayscaleConverter object. + + Args: + input_obj: Input file(s) or directory. + output_file: Output file path or directory. + """ + self.input_obj = input_obj + self.output_file = output_file + + def get_output_file( + self, image_path: Optional[Union[str, os.PathLike]] = None + ) -> Union[str, os.PathLike]: + """ + Computes the correct output file path for a given input file. + + Args: + image_path: Path to the input file. + + Returns: + The computed output file path. + """ + if self.output_file and self.output_file.endswith( + tuple(SUPPORTED_IMAGE_FORMATS.values()) + ): + return os.path.abspath(self.output_file) + if self.output_file: + return os.path.abspath(os.path.splitext(self.output_file)[0] + ".png") + if image_path: + return os.path.abspath( + os.path.splitext(os.path.basename(image_path))[0] + ".png" + ) + return "default_output.txt" + + def run(self): + """ + Runs the image to grayscale conversion operation on the input files. + + Applies the for_loop_decorator to process each image in the input list. + """ + file_list = DirectoryScanner(self.input_obj).run() + + @Decorators().for_loop_decorator(file_list) + def process_image(self, image_path): + """Processes a single image, converting it to grayscale and saving.""" + try: + img = cv2.imread(image_path) + if img is None: + raise FileNotFoundError(f"Could not read image: {image_path}") + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold( + gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU + ) + self.save_pil_image(thresh, image_path) + except FileNotFoundError as e: + logger.error(f"{fg.RED}{e}{RESET}") + except Exception as e: + raise + logger.error(f"An unexpected error occurred: {fg.RED}{e}{RESET}") + + process_image(self) + + def save_pil_image(self, thresh, image_path): + """ + Saves a NumPy array representing a grayscale image as a PIL Image. + + Args: + thresh: The NumPy array representing the grayscale image. + image_path: The path of the original image, used to derive the output filename. + """ + try: + img_pil = Image.fromarray(thresh) + filename = self.get_output_file(image_path) + filename = modify_filename_if_exists(filename) + img_pil.save(filename) + except Exception as e: + raise + logger.error(f"Unable to save the image: {fg.RED}{e}{RESET}") + + +class ImageDocxConverter: + """ + A class for converting images to DOCX documents. + """ + + def __init__( + self, + image_list: Union[Tuple[str], List[str]] = None, + input_dir: Union[str, os.PathLike] = None, + output_path: Union[str, os.PathLike] = None, + image_size: Tuple[float, float] = (6, 8), # Default to 6x8 inches + margin_mm: float = 25, # Default margin of 25mm (approx 1 inch) + ) -> None: + """ + Initializes the ImageToDocxConverter object. + + Args: + output_path: Path to save the output DOCX file + the file name e.g ~/Document/output.docx. + filename: Name of the output DOCX file. + image_size: Tuple (width, height) in inches. + margin_mm: Margin in millimeters. + """ + self.image_list = image_list + self.input_dir = input_dir + self.output_path = output_path if output_path else self.ensure_output_file() + self.image_size = image_size + self.margin_mm = margin_mm + self.document = Document() # Create a new document object filename + + # Set document margins in the constructor + sections = self.document.sections + for section in sections: + section.top_margin = Mm(self.margin_mm) + section.bottom_margin = Mm(self.margin_mm) + section.left_margin = Mm(self.margin_mm) + section.right_margin = Mm(self.margin_mm) + self.create_output_directory() # Create output directory in constructor + + def ensure_output_file(self) -> os.PathLike: + file_name = "filewarp_image2docx.docx" + if self.input_dir: + base_dir = self.input_dir + else: + base_dir = Path(self.image_list[0]).parent + + file_path = os.path.join(base_dir, file_name) + + return file_path + + def create_output_directory(self) -> None: + """ + Creates the output directory if it does not exist. + """ + Path(self.output_path).parent.mkdir(parents=True, exist_ok=True) + + def get_valid_images(self, image_paths: List[str]) -> List[str]: + """ + Filters the list of image paths, returning only those with supported formats. + + Args: + image_paths: A list of file paths to images. + + Returns: + A list of file paths to valid images. + """ + valid_images = [] + for image_path in image_paths: + try: + if Image.open(image_path).format.lower() in [ + _formats[1:] for _formats in SUPPORTED_IMAGE_FORMATS.values() + ]: + valid_images.append(image_path) + else: + print( + f"{fg.MAGENTA}Skipping unsupported image format: {fg.CYAN}{image_path}{RESET}" + ) + except Exception as e: + print( + f"{fg.RED}Error processing image {fg.YELLOW}{image_path} - {fg.RED} {e}{RESET}" + ) + return valid_images + + def convert_images_to_docx(self, image_paths: List[str]) -> os.PathLike: + """ + Converts a list of images to a single DOCX document. + + Args: + image_paths: List of image file paths. + """ + + valid_images = self.get_valid_images(image_paths) + if not valid_images: + print("No valid images to convert.") + return + + for image_path in valid_images: + try: + # Add a paragraph for each image + paragraph = self.document.add_paragraph() + run = paragraph.add_run() + run.add_picture( + image_path, + width=Inches(self.image_size[0]), + height=Inches(self.image_size[1]), + ) + # Add a page break after each image, except the last one + if image_path != valid_images[-1]: + self.document.add_page_break() + except Exception as e: + print( + f"{fg.RED}Error processing image {fg.YELLOW}{image_path}:{fg.RED} {e}{RESET}" + ) + + docx_file_path = ( + self.output_path + if self.output_path.endswith(("docx", "doc")) + else f"{self.output_path}.docx" + ) + self.document.save(docx_file_path) + return docx_file_path + + def convert_images_in_directory(self, input_dir, output_path) -> os.PathLike: + """ + Converts all images in a directory to a PDF. + + Args: + input_dir (str): The directory containing the images. + output_path (str): The path to save the generated Word File. + file_extensions (tuple, optional): Tuple of image file extensions to include. + """ + + if not os.path.exists(input_dir): + raise FileNotFoundError(f"Directory not found: {input_dir}") + + image_paths = sorted( + [os.path.join(input_dir, f) for f in os.listdir(input_dir)] + ) + + image_paths = self.get_valid_images(image_paths) + + if not image_paths: + raise ValueError(f"No images found in directory: {input_dir}") + + self.create_pdf_from_images(image_paths, output_path) + return output_path + + def run(self) -> os.PathLike: + """ + Runs the conversion process. + + Args: + image_paths: List of image file paths to convert. + """ + if not any((self.image_list, self.input_dir)): + print("No image paths provided.") + sys.exit() + + if self.image_list and self.output_path: + if all(os.path.exists(img) for img in self.image_list): + docx_file_path = self.convert_images_to_docx(self.image_list) + elif self.input_dir and self.output_path: + if os.path.exists(self.input_dir): + docx_file_path = self.convert_images_in_directory( + self.input_dir, self.output_path + ) + + if docx_file_path: + # print(f"{fg.GREEN_RG}Successfully created DOCX: {fg.BLUE}{docx_file_path}{RESET}") + pass + + return docx_file_path + + def cli(self, args: List[str]) -> None: + """ + Main function to parse command line arguments and perform the conversion. + + Args: + args: List of command line arguments. + """ + if not args or "-h" in args or "--help" in args: + print( + """ + Usage: python image_to_docx.py [options] image1 image2 ... imageN + + Options: + -h, --help show this help message and exit + -o, --output PATH path to save the output DOCX file (default: current directory) + -n, --name FILENAME name of the output DOCX file (default: output_document) + -s, --size WIDTHxHEIGHT size of images in inches (e.g., 6x8) (default: 6x8) + -m, --margin MARGIN_MM margin in millimeters (default: 25) + """ + ) + sys.exit() + + image_paths = [] + output_path = "." # Current directory + filename = "output_document" + image_size = (6, 8) # Default 6x8 inches + margin_mm = 25 + + i = 1 + while i < len(args): + if args[i] in ("-o", "--output"): + output_path = args[i + 1] + i += 2 + elif args[i] in ("-n", "--name"): + filename = args[i + 1] + i += 2 + elif args[i] in ("-s", "--size"): + try: + size_str = args[i + 1] + width, height = map(float, size_str.split("x")) + image_size = (width, height) + except ValueError: + print("Invalid size format. Please use WIDTHxHEIGHT (e.g., 6x8).") + sys.exit(1) + i += 2 + elif args[i] in ("-m", "--margin"): + try: + margin_mm = float(args[i + 1]) + except ValueError: + print("Invalid margin format. Please provide a numeric value.") + sys.exit(1) + i += 2 + else: + if not args[i].startswith("-"): + image_paths.append(args[i]) + i += 1 + else: + print(f"Unknown argument: {args[i]}") + sys.exit(1) + + converter = ImageDocxConverter(output_path, filename, image_size, margin_mm) + converter.run(image_paths) + + +class ImagePdfConverter: + """ + A class for converting images to PDF. + """ + + def __init__( + self, + image_list: Union[List[str], Tuple[str]] = None, + input_dir=None, + output_pdf_path=None, + page_size=letter, + order: bool = False, + base: bool = False, + walk: bool = False, + clean: bool = False, + ): + self.image_list = image_list + self.input_dir = input_dir + self.page_size = page_size + self.order = order + self.base = base + self.walk = walk + self.clean = clean + self.output_pdf_path = ( + output_pdf_path if output_pdf_path else self.ensure_output_file() + ) + + def ensure_output_file(self) -> os.PathLike: + file_name = "filewarp_image2pdf.pdf" + if self.input_dir: + base_dir = self.input_dir + if self.base: + one_file = os.listdir(self.input_dir)[0] + base_name, ext = os.path.splitext(one_file) + if "_img_" in base_name: + base_name = base_name.split("_img_")[0] + file_name = base_name + ".pdf" + else: + file_name = self.input_dir.split("_imgs")[0] + ".pdf" + else: + base_dir = Path(self.image_list[0]).parent + + file_path = os.path.join(base_dir, file_name) + + return file_path + + def _clean(self, dirs: list): + for d in dirs: + abspath = os.path.abspath(d) + # print(f"{fg.BWHITE}Nuke: {fg.BYELLOW}{abspath}{fg.RESET}") + if ( + os.path.exists(d) and os.path.isdir(d) + # and Path(d).is_relative_to(os.path.expanduser("~")) + ): + shutil.rmtree(abspath) + + def create_pdf_from_images( + self, image_paths, output_pdf_path, resize_to_fit=True + ) -> os.PathLike: + """ + Creates a PDF from a list of image paths. + + Args: + image_paths (list): A list of image file paths. + output_pdf_path (str): The path to save the generated PDF. + resize_to_fit (bool, optional): Whether to resize images to fit the page. Defaults to True. + + Raises: + FileNotFoundError: If any image path is invalid. + ValueError: If image_paths is empty or contains non-image files. + Exception: for pillow image opening errors, or reportlab canvas errors. + """ + + if not image_paths: + raise ValueError("Image paths list is empty.") + + for image_path in image_paths: + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found: {image_path}") + try: + Image.open(image_path) + except Exception as e: + raise ValueError(f"Error opening image {image_path}: {e}") + + try: + c = canvas.Canvas(output_pdf_path, pagesize=self.page_size) + width, height = self.page_size + + for image_path in image_paths: + img = Image.open(image_path) + img_width, img_height = img.size + + if resize_to_fit: + ratio = min(width / img_width, height / img_height) + new_width = img_width * ratio + new_height = img_height * ratio + x = (width - new_width) / 2 + y = (height - new_height) / 2 + else: + x = (width - img_width) / 2 + y = (height - img_height) / 2 + new_width = img_width + new_height = img_height + + c.drawImage( + image_path, + x, + y, + width=new_width, + height=new_height, + preserveAspectRatio=True, + ) + c.showPage() + + c.save() + + return output_pdf_path + except Exception as e: + raise Exception(f"Error creating PDF: {e}") + + @staticmethod + def ensure_format(input_image) -> os.PathLike: + from ..imagepy.converter import ImageConverter + + converter = ImageConverter(input_image, "png") + output_image = converter.convert_image() + return output_image + + def extract_img_number(self, filename): + match = re.search(r"_img_(\d+)", filename) + return int(match.group(1)) if match else float("inf") + + def _sort(self, obj, ext): + if self.order: + if isinstance(obj, list): + return sorted( + obj, + key=lambda f: self.extract_img_number(f), + ) + return sorted( + [ + os.path.join(obj, f) + for f in os.listdir(obj) + if f.lower().endswith(ext) + ], + key=lambda f: self.extract_img_number(f), + ) + else: + return sorted( + [ + os.path.join(obj, f) + for f in os.listdir(obj) + if f.lower().endswith(ext) + ] + ) + + def convert_images_in_directory_recursive( + self, input_dir, output_pdf_path, file_extensions=(".jpg", ".jpeg", ".png") + ): + """ + Recursively walks through a directory and its subdirectories, + converting images in each folder into a separate PDF. + + Args: + input_dir (str): Root directory containing images. + output_root (str): Directory to save the generated PDFs. + file_extensions (tuple): Supported image extensions. + """ + try: + if not os.path.exists(input_dir): + raise FileNotFoundError(f"Directory not found: {input_dir}") + + # if not os.path.exists(output_root): + # os.makedirs(output_root) + dclean = [] + for root, _, files in os.walk(input_dir): + image_paths = [ + os.path.join(root, f) + for f in files + if f.lower().endswith(file_extensions) + ] + + if not image_paths: + continue # No valid images in this directory + + # Optional: sort images with your custom logic + image_paths = self._sort(image_paths, file_extensions) + + # Ensure formats are valid + for index, image in enumerate(image_paths): + if not image.lower().endswith(file_extensions): + image_paths[index] = self.ensure_format(image) + + # Create a relative PDF name based on the subdir structure + fname = os.path.split(root)[-1].split("_imgs")[0] + ".pdf" + relative_path = os.path.join( + os.path.dirname((os.path.relpath(root, input_dir))), fname + ) + # Host dir for images to be cleaned is clean is on + dname = os.path.relpath(root, input_dir) + dclean.append(dname) + + # pdf_name = relative_path.replace(os.sep, "_") + ".pdf" + # pdf_output_path = os.path.join(output_root, pdf_name) + + # Create the PDF for this folder + self.create_pdf_from_images(image_paths, relative_path) + print(f"{fg.BWHITE}Created PDF{RESET}: {relative_path}") + if self.clean: + self._clean(dclean) + except Exception as e: + print(f"\033[31m{e}\033[0m") + sys.exit(1) + + def convert_images_in_directory( + self, input_dir, output_pdf_path, file_extensions=(".jpg", ".jpeg", ".png") + ) -> os.PathLike: + try: + """ + Converts all images in a directory to a PDF. + + Args: + input_dir (str): The directory containing the images. + output_pdf_path (str): The path to save the generated PDF. + file_extensions (tuple, optional): Tuple of image file extensions to include. + """ + + if not os.path.exists(input_dir): + raise FileNotFoundError(f"Directory not found: {input_dir}") + + image_paths = self._sort(input_dir, ext=file_extensions) + + for index, image in enumerate(image_paths): + if not image.endswith(file_extensions): + image_paths[index] = self.ensure_format(image) + + if not image_paths: + raise ValueError( + f"\033[31mNo images found in directory:\033[1m {input_dir}\033[0m" + ) + + self.create_pdf_from_images(image_paths, output_pdf_path) + return output_pdf_path + except ValueError as e: + print(e) + sys.exit(1) + + def run(self) -> os.PathLike: + """ + Runs the PDF creation based on the object's initialization parameters. + """ + if self.image_list and self.output_pdf_path: + if all(os.path.exists(img) for img in self.image_list): + output_pdf_path = self.create_pdf_from_images( + self.image_list, self.output_pdf_path + ) + # print(f"{fg.GREEN}PDF created successfully from directory!{RESET}") + # print(f"{fg.GREEN}Output:{RESET} {fg.BLUE}{output_pdf_path}{RESET}") + else: + print(f"{fg.RED}One or more images in the list do not exist.{RESET}") + elif self.input_dir and self.output_pdf_path: + if os.path.exists(self.input_dir): + if self.walk: + output_pdf_path = self.convert_images_in_directory_recursive( + self.input_dir, self.output_pdf_path + ) + else: + output_pdf_path = self.convert_images_in_directory( + self.input_dir, self.output_pdf_path + ) + # print(f"{fg.GREEN}PDF created successfully from directory!{RESET}") + # print(f"{fg.BWHITE}Output:{RESET} {fg.BLUE}{output_pdf_path}{RESET}") + else: + print(f"Directory {fg.YELLOW}{self.input_dir}{RESET} does not exist.") + else: + print( + "Please provide either image_list and output_pdf_path or input_dir and output_pdf_path during object instantiation." + ) + return + return output_pdf_path diff --git a/filewarp/core/image/extractor.py b/filewarp/core/image/extractor.py new file mode 100644 index 0000000..c12872c --- /dev/null +++ b/filewarp/core/image/extractor.py @@ -0,0 +1,269 @@ +import sys +import fitz # PyMuPDF for PDF +from docx import Document +from PIL import Image +from io import BytesIO +from typing import List, Union, Tuple +from pathlib import Path +import os +from ...utils.colors import fg, rs +from ...utils.file_utils import dirbuster + +RESET = rs + + +class ImageExtractor: + """ + Base class for extracting images from document files. + """ + + def __init__(self, output_path: str = None, tsize: tuple = (20, 20)) -> None: + """ + Initializes the ImageExtractor object. + + Args: + output_path: Path to save the extracted images. + """ + base_path = ( + os.path.join(output_path, "FilemacExctracts") + if output_path + else os.path.join(os.path.abspath(os.getcwd()), "FilemacExctracts") + ) + self.output_path = base_path + self.tsize = tsize + self.output_base = None + + def _extract_images(self, file_path: str) -> List[Image.Image]: + """ + Extracts images from the given file. This is a placeholder + for the actual extraction logic, to be implemented by + subclasses. + + Args: + file_path: Path to the document file. + + Returns: + A list of PIL Image objects. Returns an empty list if no images + are found or if there is an error. + """ + raise NotImplementedError("Subclasses must implement this method") + + def extract_and_save_images(self, file_path: str) -> None: + """ + Extracts and saves images from the given file. + + Args: + file_path: Path to the document file. + """ + images = self._extract_images(file_path) + self.output_base = os.path.split(file_path)[0] + if not images: + print(f"No images found in {file_path}") + return + + base_filename = Path(file_path).stem + self._save_images(images, base_filename) + + def is_page_sized_image(self, img, target_size=(595, 842), tolerance=1): + """Check if image is approximately page-sized (default: A4 at 72 DPI).""" + img_width, img_height = img.size + target_width, target_height = self.tsize if self.tsize else target_size + + within_width = ( + img_width > target_width + ) # abs(img_width - target_width) >= target_width * tolerance + within_height = ( + img_height > target_height + # abs(img_height - target_height) >= target_height * tolerance + ) + + return within_width and within_height + + def _save_images(self, images: List[Image.Image], base_filename: str) -> None: + """ + Saves the extracted images to the output directory. + + Args: + images: A list of PIL Image objects. + base_filename: The base filename to use when saving images (e.g., 'page_1'). + """ + self.output_path = os.path.join(self.output_base, f"{base_filename}_imgs") + os.makedirs(self.output_path, exist_ok=True) # Ensure directory exists + + for i, img in enumerate(images): + try: + if self.tsize and not self.is_page_sized_image(img): + print( + f"Skipping image {i + 1}: ({fg.CYAN}{img.size}{RESET}) <= {fg.BLUE}{self.tsize}{RESET}" + ) + continue + + # Generate a unique filename for each image + img_format = img.format or "PNG" # Default to PNG if format is None + safe_filename = f"{base_filename}_img_{i + 1}.{img_format.lower()}" + + img_path = Path(self.output_path) / safe_filename + img.save(img_path) + print(f"Saved image: {fg.GREEN}{img_path}{RESET}") + except Exception as e: + raise + print(f"Error saving image {i + 1} from {base_filename}: {e}") + + +class PdfImageExtractor(ImageExtractor): + """ + Extracts images from PDF files. + """ + + def __init__(self, output_path, size): + super().__init__( + output_path, size or (20, 20) + ) # Call Parent.__init__ with value + + def _extract_images(self, file_path: str) -> List[Image.Image]: + """ + Extracts images from a PDF file using PyMuPDF. + + Args: + file_path: Path to the PDF file. + + Returns: + A list of PIL Image objects. + """ + print(f"{fg.BWHITE}File: {fg.BLUE}{file_path}{RESET}") + images: List[Image.Image] = [] + try: + pdf_document = fitz.open(file_path) + for page_index in range(len(pdf_document)): + page = pdf_document.load_page(page_index) + image_list = page.get_images(full=True) # Get detailed image info + for img_index, img_info in enumerate(image_list): + xref = img_info[0] # Get the XREF of the image + base_image = pdf_document.extract_image(xref) + image_bytes = base_image["image"] + try: + pil_image = Image.open(BytesIO(image_bytes)) + images.append(pil_image) + except Exception as e: + print( + f"Error processing image {img_index + 1} from PDF page {page_index + 1}: {e}" + ) + pdf_document.close() + except Exception as e: + print(f"Error processing PDF file: {file_path} - {e}") + return images + + +class DocxImageExtractor(ImageExtractor): + """ + Extracts images from DOCX files. + """ + + def __init__(self, output_path, size): + super().__init__( + output_path, size or (20, 20) + ) # Call Parent.__init__ with value + + def _extract_images(self, file_path: str) -> List[Image.Image]: + """ + Extracts images from a DOCX file. + + Args: + file_path: Path to the DOCX file. + + Returns: + A list of PIL Image objects. + """ + images: List[Image.Image] = [] + try: + docx_document = Document(file_path) + for part in docx_document.part.rels.values(): + if "image" in part.target_ref: + image_bytes = part.target_part.blob + try: + pil_image = Image.open(BytesIO(image_bytes)) + images.append(pil_image) + except Exception as e: + print(f"Error processing image from DOCX: {e}") + except Exception as e: + print(f"Error processing DOCX file: {file_path} - {e}") + return images + + +def process_files( + file_paths: Union[Tuple[str], List[str]], + output_path: str = os.getcwd(), + tsize: tuple = None, +) -> None: + """ + Processes the given files and extracts images from them. + + Args: + file_paths: List of paths to the files to process. + output_path: Path to save the extracted images. + """ + try: + for file_path in file_paths: + if os.path.isdir(file_path): + files = dirbuster(file_path) + process_files(files, tsize=tsize) + if file_path.lower().endswith(".pdf"): + extractor = PdfImageExtractor(output_path, tsize) + extractor.extract_and_save_images(file_path) + elif file_path.lower().endswith((".docx")): + extractor = DocxImageExtractor(output_path, tsize) + extractor.extract_and_save_images(file_path) + else: + print(f"Skipping unsupported file format: {file_path}") + except KeyboardInterrupt: + print("\nQuit") + sys.exit() + + +def main(args: List[str]) -> None: + """ + Main function to parse command line arguments and perform image extraction. + + Args: + args: List of command line arguments. + """ + if not args or "-h" in args or "--help" in args: + print( + """ + Usage: python extract_images.py [options] file1 file2 ... fileN + + Options: + -h, --help show this help message and exit + -o, --output PATH path to save the extracted images (default: extracted_images) + """ + ) + sys.exit() + + file_paths = [] + output_path = "extracted_images" # Default output path + + i = 1 + while i < len(args): + if args[i] in ("-o", "--output"): + output_path = args[i + 1] + i += 2 + else: + if not args[i].startswith("-"): + file_paths.append(args[i]) + i += 1 + else: + print(f"Unknown argument: {args[i]}") + sys.exit(1) + + file_paths.append( + "/home/skye/Downloads/KDEConnect/SPE 2304 Server Side Programming Year III Semester II.pdf" + ) + if not file_paths: + print("No files provided for image extraction.") + sys.exit(1) + + process_files(file_paths, output_path) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/filewarp/core/ocr.py b/filewarp/core/ocr.py new file mode 100644 index 0000000..1ee6d70 --- /dev/null +++ b/filewarp/core/ocr.py @@ -0,0 +1,199 @@ +import logging +import os +import sys +from typing import Union, List, Optional + +import cv2 +import pytesseract +from PIL import Image +from rich.progress import Progress +from ..utils.colors import fg, bg, rs +from ..utils.file_utils import modify_filename_if_exists, DirectoryScanner + + +RESET = rs + +# Define constants for better readability and maintainability +SUPPORTED_IMAGE_FORMATS = {"png", "jpg", "jpeg"} +DEFAULT_CONFIG = "-l eng --oem 3 --psm 6" +DEFAULT_SEPARATOR = "\n" + +# Configure logging at the module level +logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s") +logger = logging.getLogger(__name__) + + +class ExtractText: + """ + Extracts text from images using OCR, with options for file/directory input, + output file naming, and text separation. + """ + + def __init__( + self, + input_obj: Optional[Union[list[str], tuple[str], str, os.PathLike]], + sep: str = DEFAULT_SEPARATOR, + ): + """ + Initializes the ExtractText object. + + Args: + input_obj: Path to the image file or directory containing images. + sep: Separator to use when joining extracted text. Defaults to newline. + """ + if not isinstance(input_obj, (str, list, os.PathLike)): + raise TypeError( + f"input_obj must be a string or os.PathLike, not {type(input_obj)}" + ) + self.input_obj = input_obj + self.sep = sep + self.sep = ( + "\n" + if self.sep == "newline" + else ( + "\t" + if self.sep == "tab" + else ( + " " + if self.sep == "space" + else ("" if self.sep == "none" else self.sep) + ) + ) + ) + + """ + separator_map = { + "newline": "\n", + "tab": "\t", + "space": " ", + "none": "", + } + + self.sep = separator_map.get(self.sep, self.sep) + """ + + def _process_image(self, image_path: str, output_file: str) -> str: + """ + Extracts text from a single image and saves it to a file. + + Args: + image_path: Path to the image file. + output_file: Path to the output text file. + + Returns: + The extracted text. Returns an empty string on error. + """ + try: + # Load image using OpenCV + img = cv2.imread(image_path) + if img is None: + raise ValueError(f"Could not read image: {image_path}") + + # Preprocess image for better OCR results + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + img_pil = Image.fromarray(thresh) + + # Perform OCR using pytesseract + self.sep = ( + self.sep.replace("\r\n", "\n") + .replace("\\n", "\n") + .replace("\r", "\n") + .replace("\r\t", "\t") + .replace("\\t", "\t") + ) + + text = pytesseract.image_to_string(img_pil, config=DEFAULT_CONFIG) + text = self.sep.join(text.splitlines()) # handle empty lines + logger.info("") + logger.info(f"Extracted text from {image_path}") + print(f"{fg.YELLOW}{text}{RESET}") + + # Save text to file + with open(output_file, "w", encoding="utf-8") as file: # Specify encoding + file.write(text) + return text + + except FileNotFoundError as e: + logger.error(f"File not found: {e}") + except IOError as e: + logger.error(f"IOError: {e}") + except pytesseract.TesseractError as e: + logger.error(f"Tesseract error: {e}") + except cv2.error as e: + logger.error(f"OpenCV error processing {image_path}: {e}") + except Exception as e: + logger.error( + f"An unexpected error occurred while processing {image_path}: {e}" + ) + + return "" # Return empty string on error + + def run( + self, output_file: Optional[Union[list[str], str, os.PathLike]] = None + ) -> Optional[List[str]]: + """ + Runs the OCR extraction process on the input file(s) or directory. + + Args: + output_file: Optional path to a single output file. If provided, all + extracted text will be written to this file. If None, output + files will be generated based on input image names. + + Returns: + A list of extracted texts, or None if no images were processed. + If output_file is provided, returns a list with a single string. + """ + + image_list = DirectoryScanner(self.input_obj).run() + num_images = len(image_list) + extracted_texts = [] + + if num_images == 0: + logger.warning("No images found to process.") + return None + + try: + if output_file: + # Process all images and concatenate text into one output file + all_text = "" + # with Progress() as progress: + # task = progress.add_task( + # "[yellow]Extracting text...", total=num_images + # ) + for image_path in image_list: + all_text += ( + self._process_image( + image_path, os.path.splitext(output_file)[0] + ".txt" + ) + + self.sep + ) + # progress.update(task, advance=1) + with open(output_file, "w", encoding="utf-8") as f: + f.write(all_text) + return [all_text] # Return a list containing the combined text + + else: + # Process each image individually, creating separate output files + with Progress() as progress: + task = progress.add_task( + "[yellow]Extracting text...", total=num_images + ) + for image_path in image_list: + _output_file = ( + os.path.splitext(os.path.basename(image_path))[0] + ".txt" + ) + _output_file = modify_filename_if_exists(_output_file) + text = self._process_image(image_path, _output_file) + extracted_texts.append(text) + progress.update(task, advance=1) + return extracted_texts + + except KeyboardInterrupt: + print( + f"\n[{bg.YELLOW}X{RESET}]Operation interrupted by {fg.UBLUE}user{RESET}.[/]" + ) + sys.exit(0) + except Exception as e: + logger.error(f"An unexpected error occurred: {bg.RED}{e}{RESET}") + return None # Ensure None is returned on error diff --git a/filewarp/core/pdf/core.py b/filewarp/core/pdf/core.py new file mode 100644 index 0000000..790cb7b --- /dev/null +++ b/filewarp/core/pdf/core.py @@ -0,0 +1,403 @@ +import os +import subprocess +import sys + +import PyPDF2 +from pdf2image import convert_from_path +from PIL import Image # ImageSequence +from tqdm.auto import tqdm +from ...utils.simple import logger +from ..document import DocumentConverter +from ..exceptions import FilemacError, FileSystemError +from ...utils.colors import fg, bg, rs +from ..ocr import ExtractText + + +RESET = rs +DEFAULT_SEPARATOR = "\n" + + +class PDF2LongImageConverter: + def __init__(self, pdf_file): + self.pdf_file = pdf_file + + def preprocess(self): + ext = self.pdf_file.split(".")[-1].lower() + if ext == "pdf": + long_image = self.convert(self.pdf_file) + return long_image + if ext == "doc" or ext == "docx": + conv = DocumentConverter(self.pdf_file) + + path = conv.word_to_pdf() + long_image = self.convert(path) + return long_image + elif ext == "odt": + return self.subprocess_executor() + + def subprocess_executor(self): + # pdf_file = ext = doc.split('.')[0] + 'docx' + # logger.info(f"{fg.DCYAN}Invoked soffice ..{RESET}") + subprocess.call( + [ + "soffice", + "--convert-to", + "pdf", + self.pdf_file, + "--outdir", + os.path.dirname(self.pdf_file), + ] + ) + pdf_file = os.path.abspath( + os.path.dirname(self.pdf_file) + + "/" + + (self.pdf_file.split("/")[-1].split(".")[0]) + + ".pdf" + ) + long_image = self.convert(pdf_file) + return long_image + + @staticmethod + def convert(pdf_file): + try: + # logger.info(f"{fg.BYELLOW}Read pdf{RESET}") + images = convert_from_path(pdf_file) + out_img = pdf_file[:-4] + ".png" + heights = [img.size[1] for img in images] + total_height = sum(heights) + max_width = max([img.size[0] for img in images]) + + # logger.info(f"{fg.DCYAN}Draw image ..{RESET}") + new_im = Image.new("RGB", (max_width, total_height)) + + y_offset = 0 + for i, img in enumerate(images): + # print(f"{fg.BBLUE}{i}{RESET}", end="\r") + new_im.paste(img, (0, y_offset)) + y_offset += img.size[1] + # logger.info(f"{fg.BYELLOW}Save dest: {fg.BMAGENTA}{out_img}{RESET}") + new_im.save(out_img) + # logger.info(f"{fg.BGREEN}Success😇✅{RESET}") + return out_img + except FileNotFoundError: + raise FileSystemError(f"{fg.RED}File not found!{RESET}") + except KeyboardInterrupt: + logger.DEBUG("\nQuit❕") + sys.exit() + except Exception as e: + raise FilemacError(f"{fg.RED}{e}{RESET}") + + +class PageExtractor: + """ + Extract pages specified by pange range from a pdf file and save them as a new file + Args: + Pdf -> pdf file to be operated on. + start -> Page in from which to start extraction (default 1) + stop -> Stop page for extraction default is last page (-1) + Range of pages to be extracted is given by Llimit and Ulimit inclusive + Returns: + outf-> the output file contsining the extracted pages + """ + + def __init__( + self, + pdf, + start: int = 1, + stop: int = -1, + ): + self.pdf = pdf + self.start = start # max(start - 1, 1) + self.stop = stop + + # Normalize indexing + if self.start != 0: + self.start = self.start - 1 + + # Due to 0 indexing we wont subtract + # if self.stop: + # self.stop = self.stop - 1 + + if self.stop is None: + # Do not add due to 0 indexing + self.stop = int(self.start) + 1 + + self.outf = f"{pdf.split('.')[0]}_{start}_{self.stop}_extract.pdf" + + def getPages(self): + """ + Extract the the page range. Write the pages to new pdf file + if self.stop (Ulimit) == -1 all pages are extracted from the Llimit to the last Page + """ + try: + reader = PyPDF2.PdfReader(self.pdf) + + if self.stop == -1: + self.stop = len(reader.pages) + + pdf_writer = PyPDF2.PdfWriter() + for page_num in range(self.start, self.stop): + # print(f"{fg.BBLUE}[📄]{RESET}{fg.DCYAN}Page {page_num + 1}{RESET}") + page = reader.pages[page_num] + pdf_writer.add_page(page) + + # Write the merged PDF to the output file + with open(self.outf, "wb") as out_file: + pdf_writer.write(out_file) + # print(f"{fg.BBLUE}[+]{RESET} {fg.BWHITE}File {fg.BMAGENTA}{self.outf}{RESET}") + return self.outf + except KeyboardInterrupt: + print("\n [!] Quit") + exit(2) + except FileNotFoundError as e: + print(f"[{bg.BRED}-{RESET}] {fg.RED}{e}{RESET}") + except Exception as e: + print(e) + # raise + + @staticmethod + def run(kwargs): + """ + Args: + kwargs type: list - Contains Upper and lower limit (first and last page) + Returns: + None + """ + if len(kwargs) > 2: + arg1, arg2, arg3 = kwargs + init = PageExtractor(arg1, int(arg2), int(arg3)) + init.getPages() + elif len(kwargs) == 2: + ( + arg1, + arg2, + ) = kwargs + arg2 = int(arg2) + # arg3 = arg2 + init = PageExtractor(arg1, arg2) + init.getPages() + else: + pass + + +class PDFCombine: + def __init__(self, obj1, obj2=None, outf=None, order="AA"): + self.obj1 = obj1 + self.obj2 = obj2 + self.outf = outf + self.order = order + + if self.outf is None: + try: + self.outf = os.path.join( + os.path.join( + os.path.split(self.obj1[0])[0], + f"{os.path.split(self.obj1[0])[1].split('.')[0]}_{os.path.split(self.obj1[1])[1].split('.')[0]}_filewarp.pdf", + ) + ) + except Exception: + self.outf = "Filemac_pdfjoin.pdf" + + def controller(self): + if self.order in {"AB", "BA", "ABA", "BAB"}: + self.combine_pdfs_ABA_interleave() + elif self.order in {"AA", "BB", "AAB", "BBA"}: + if type(self.obj1) is list: + self.merge_All_AAB() + else: + self.combine_pdfs_AAB_order() + + def combine_pdfs_ABA_interleave(self): + try: + pdf_writer = PyPDF2.PdfWriter() + # Create PdfReader objects for each input PDF file + pdf_readers = [PyPDF2.PdfReader(file) for file in self.obj1] + + max_pages = max(len(reader.pages) for reader in pdf_readers) + # pdf_readers = [PyPDF2.PdfReader(pdf) for pdf in pdf_files] + + for page_num in range(max_pages): + for reader in pdf_readers: + if page_num < len(reader.pages): + # print(f"{fg.CYAN}Page {fg.BBLUE}{page_num + 1}/{len(reader.pages)}{RESET}", end="\r") + # Order pages in terms of page1-pd1, page2-pd2 + page = reader.pages[page_num] + pdf_writer.add_page(page) + + with open(self.outf, "wb") as self.outf: + pdf_writer.write(self.outf) + # print(f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf.name}{RESET}") + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + def combine_pdfs_AAB_order(self): + try: + pdf_writer = PyPDF2.PdfWriter() + reader1 = PyPDF2.PdfReader(self.obj1) + reader2 = PyPDF2.PdfReader(self.obj2) + # pdf_readers = [PyPDF2.PdfReader(pdf) for pdf in pdf_files] + + # print(f"{fg.CYAN}File A{RESET}") + for p1_num in range(len(reader1.pages)): + # print(f"Page {p1_num + 1}/{len(reader1.pages)}", end="\r") + p1 = reader1.pages[p1_num] + # Order pages in terms of page1-pd1, page2-pd2 + pdf_writer.add_page(p1) + + # print(f"\n{fg.CYAN}File B{RESET}") + for p2_num in range(len(reader2.pages)): + # print(f"Page {p2_num + 1}/{len(reader2.pages)}", end="\r") + p2 = reader2.pages[p2_num] + pdf_writer.add_page(p2) + + with open(self.outf, "wb") as self.outf: + pdf_writer.write(self.outf) + # print(f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf.name}{RESET}") + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + def merge_All_AAB(self): + try: + pdf_writer = PyPDF2.PdfWriter() + + # List to store the reader objects + pdf_readers = [PyPDF2.PdfReader(file) for file in self.obj1] + + # max_pages = max(len(reader.pages) for reader in pdf_readers) + + for reader in pdf_readers: + for page_num in range(len(reader.pages)): + # print(f"{fg.BWHITE}Page {fg.CYAN}{page_num + 1}/{len(reader.pages)}{RESET}",end="\r") + page = reader.pages[page_num] + pdf_writer.add_page(page) + + # Write the merged PDF to the output file + with open(self.outf, "wb") as out_file: + pdf_writer.write(out_file) + # print(f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf}{RESET}") + except KeyboardInterrupt: + print("\nQuit!") + sys.exit(1) + except Exception as e: + print(f"{fg.RED}{e}{RESET}") + + +class PDFScanner: + """Implementation of scanning to extract data from pdf files and images + input_file -> file to be scanned pdf,image + Args: + input_file->file to be scanned + no_strip-> Preserves text formating once set to True, default: False + Returns: + None""" + + def __init__(self, input_file, sep: str = DEFAULT_SEPARATOR): + self.input_file = input_file + self.sep = sep + + def preprocess(self): + files_to_process = [] + + if os.path.isfile(self.input_file): + files_to_process.append(self.input_file) + elif os.path.isdir(self.input_file): + for file in os.listdir(self.input_file): + file_path = os.path.join(self.input_file, file) + if os.path.isfile(file_path): + files_to_process.append(file_path) + + return files_to_process + + def scanPDF(self, obj=None): + """Obj - object for scanning where the object is not a list""" + pdf_list = self.preprocess() + pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] + if obj: + pdf_list = [obj] + + for pdf in pdf_list: + out_f = pdf[:-3] + "txt" + # print(f"{fg.YELLOW}Read pdf ..{RESET}") + + with open(pdf, "rb") as f: + reader = PyPDF2.PdfReader(f) + text = "" + + pg = 0 + for page_num in range(len(reader.pages)): + pg += 1 + + # print(f"{fg.BYELLOW}Progress:{RESET}", end="") + # print(f"{fg.CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r") + page = reader.pages[page_num] + text += page.extract_text() + + # print(f"\n{text}") + # print(f"\n{fg.YELLOW}Write text to {fg.GREEN}{out_f}{RESET}") + with open(out_f, "w") as f: + f.write(text) + + # print(f"\n{fg.BGREEN}Ok{RESET}") + + def scanAsImgs(self): + file = self.input_file + mc = DocumentConverter(file) + img_objs = mc.doc2image() + + text = "" + + for i in tqdm(img_objs, desc="Extracting", leave=False): + extract = ExtractText(i, self.sep) + _text = extract.run() + if _text is not None: + text += "".join(_text) + with open(f"{self.input_file[:-4]}_filewarp.txt", "a") as _writer: + _writer.write(text) + + def _cleaner_(): + # print(f"{fg.FMAGENTA}Clean") + for obj in img_objs: + if os.path.exists(obj): + # print(obj, end="\r") + os.remove(obj) + txt_file = f"{obj[:-4]}.txt" + if os.path.exists(txt_file): + # print(f"{bg.CYAN}{txt_file}{RESET}", end="\r") + os.remove(txt_file) + + _cleaner_() + # from ...utils.screen import clear_screen + + # clear_screen() + # print(f"{bg.GREEN}Full Text{RESET}") + # print(text) + # print(f"{fg.BWHITE}Text File ={fg.IGREEN}{self.input_file[:-4]}_filewarp.txt{RESET}") + # print(f"{fg.GREEN}Ok✅{RESET}") + return text + + def scanAsLongImg(self) -> bool: + """Convert the pdf to long image for scanning - text extraction""" + + try: + pdf_list = self.preprocess() + pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")] + from ..pdf.core import PDF2LongImageConverter + + for file in pdf_list: + converter = PDF2LongImageConverter(file) + file = converter.preprocess() + + tx = ExtractText(file, self.sep) + text = "".join(tx.run()) + if text is not None: + print(text) + # print(f"{fg.GREEN}Ok{RESET}") + return True + except Exception as e: + print(e) diff --git a/filewarp/core/recorder.py b/filewarp/core/recorder.py new file mode 100644 index 0000000..a8843db --- /dev/null +++ b/filewarp/core/recorder.py @@ -0,0 +1,106 @@ +#!/usr/bin/python3 +import numpy as np +import sounddevice as sd +import wavio +import time +from pynput import keyboard +import sys + + +class SoundRecorder: + def __init__(self, frequency=44100, channels=2, dtype=np.int16): + self.fs = frequency # Sample rate (samples per second) + self.channels = 2 # Number of audio channels + self.dtype = dtype # Data type for the recording + + self.paused = False # Global flag for pause + self.recording = [] # Buffer for recorded chunks + self.start_time = 0 # Start time for elapsed time tracking + self.elapsed_time = 0 # Track elapsed time + self.running = True # Track recording status + self.filename = self.filename_prober() + + def format_time(self, seconds): + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + sec = int(seconds % 60) + return f"\033[34m{hours:02d}\033[35m:{minutes:02d}\033[32m:{sec:02d} \033[0m" + + def on_press(self, key): + # global paused, running + try: + if key == keyboard.Key.space: + self.paused = not self.paused # Toggle pause/resume + if self.paused: + print("\nPaused... Press SPACE to resume.") + else: + print("\nRecording resumed... Press SPACE to pause.") + elif key == keyboard.Key.enter: + self.running = False # Stop recording + print("\nRecording finished.") + return False # Stop listener + except Exception as e: + print(f"Error: {e}") + + def record_audio(self): + # global paused, recording, start_time, elapsed_time, running + print("Press SPACE to pause/resume, ENTER to stop and save.") + start_time = time.time() + + def callback(indata, frames, callback_time, status): + if not self.paused: + self.recording.append(indata.copy()) + self.elapsed_time = time.time() - start_time + print(f"Elapsed Time: {self.format_time(self.elapsed_time)}", end="\r") + + with sd.InputStream( + samplerate=self.fs, + channels=self.channels, + dtype=self.dtype, + callback=callback, + ): + with keyboard.Listener(on_press=self.on_press) as listener: + while self.running: + time.sleep(0.1) # Prevents high CPU usage + listener.stop() + + return ( + np.concatenate(self.recording, axis=0) + if self.recording + else np.array([], dtype=self.dtype) + ) + + def run(self): + try: + r_data = self.record_audio() + self.save_audio(r_data) + return self.filename + except KeyboardInterrupt: + sys.exit() + + def save_audio(self, recording): + if recording.size == 0: + print("No audio recorded.") + else: + wavio.write(self.filename, recording, self.fs, sampwidth=2) + print(f"Recording saved as {self.filename}") + + @staticmethod + def filename_prober(): + _filename = None + + while not _filename: + _filename = input("\033[94mEnter Desired File Name\033[0;1;89m:") + + filename = f"{_filename}.wav" if len(_filename.split(".")) < 2 else _filename + return filename + + +if __name__ == "__main__": + try: + filename = input("\033[94mEnter Desired File Name\033[0;1;89m:") + ".wav" + recorder = SoundRecorder() + file = recorder.run() + except KeyboardInterrupt: + print("\nQuit!") + exit(1) diff --git a/filewarp/core/svg/core.py b/filewarp/core/svg/core.py new file mode 100644 index 0000000..fcbede0 --- /dev/null +++ b/filewarp/core/svg/core.py @@ -0,0 +1,47 @@ +import cairosvg + + +class SVGConverter: + """ + A utility class for converting SVG files to various formats using CairoSVG. + Supported formats: PNG, PDF, SVG (optimized). + """ + + @staticmethod + def to_png(input_svg: str, output_path: str, is_string: bool = False): + """ + Convert SVG to PNG. + :param input_svg: Path to SVG file or raw SVG string. + :param output_path: Output PNG file path. + :param is_string: Set True if input_svg is raw SVG data. + """ + if is_string: + cairosvg.svg2png(bytestring=input_svg.encode(), write_to=output_path) + else: + cairosvg.svg2png(url=input_svg, write_to=output_path) + + @staticmethod + def to_pdf(input_svg: str, output_path: str, is_string: bool = False): + """ + Convert SVG to PDF. + :param input_svg: Path to SVG file or raw SVG string. + :param output_path: Output PDF file path. + :param is_string: Set True if input_svg is raw SVG data. + """ + if is_string: + cairosvg.svg2pdf(bytestring=input_svg.encode(), write_to=output_path) + else: + cairosvg.svg2pdf(url=input_svg, write_to=output_path) + + @staticmethod + def to_svg(input_svg: str, output_path: str, is_string: bool = False): + """ + Convert/Optimize SVG to SVG. + :param input_svg: Path to SVG file or raw SVG string. + :param output_path: Output SVG file path. + :param is_string: Set True if input_svg is raw SVG data. + """ + if is_string: + cairosvg.svg2svg(bytestring=input_svg.encode(), write_to=output_path) + else: + cairosvg.svg2svg(url=input_svg, write_to=output_path) diff --git a/filewarp/core/text/core.py b/filewarp/core/text/core.py new file mode 100644 index 0000000..167b9b9 --- /dev/null +++ b/filewarp/core/text/core.py @@ -0,0 +1,111 @@ +"""Create a word document directly from a text file.""" + +from docx import Document +from docx.shared import Pt, RGBColor + +from ...utils.colors import fg, rs + +RESET = rs + + +class StyledText: + """ + Args: + obj-> input object (normally a formated text file) + fsize ->font-size default = 12: int + fstyle -> font-name default = Times New Roman: str + out_obj -> output object(file) name: str + Returns: + None + + Given obj -> Text file where: + '#' is used to specify formarting + Only three heading leavels are supported. + '#' Heading1, + '##' -> Heading2, + '###' -> Heading3 + """ + + def __init__( + self, obj, out_obj=None, fsize: int = 12, fstyle: str = "Times New Roman" + ): + self.obj = obj + self.out_obj = out_obj + self.fsize = fsize + self.fstyle = fstyle + if self.out_obj is None: + self.out_obj = f"{self.obj.split('.')[0]}_filewarp.docx" + + def text_to_word(self): + """ + Create new document, + heading_styles -> define formating + Open the text file and read it line by line. + For every line check whether it starts with '#' format specify , ommit the specifier and formart the line. + Strip empty spaces from every line. + Set body font to fstyle and font size to fsize. + """ + + print(f"{fg.BWHITE}Set Font: {fg.CYAN}{self.fsize}{RESET}") + print(f"{fg.BWHITE}Set Style: {fg.CYAN}{self.fstyle}{RESET}") + # Create a new Document + doc = Document() + + # Define formatting for headings and body text + head_font_name = self.fstyle + heading_styles = { + # Heading 1 + 1: {"font_size": Pt(18), "font_color": RGBColor(126, 153, 184)}, + # Heading 2 + 2: {"font_size": Pt(16), "font_color": RGBColor(0, 120, 212)}, + # Heading 3 + 3: {"font_size": Pt(14), "font_color": RGBColor(0, 120, 212)}, + # Heading 4 + 4: {"font_size": Pt(13), "font_color": RGBColor(0, 120, 212)}, + } + + body_font_name = "Times New Roman" + body_font_size = Pt(self.fsize) + body_font_color = RGBColor(0, 0, 0) # Black color + + # Open the text file and read content + with open(self.obj, "r") as file: + lines = file.readlines() + + for i, line in enumerate(lines): + print( + f"{fg.BWHITE}Line: {fg.DCYAN}{i}{fg.YELLOW} of {fg.BLUE}{len(lines)}{RESET}", + end="\r", + ) + # Determine heading level or body text + if line.startswith("#"): + level = line.count("#") + level = min(level, 3) # Support up to 3 levels of headings + style = heading_styles.get(level, heading_styles[1]) + p = doc.add_paragraph() + # Remove '#' and extra space + run = p.add_run(line[level + 1 :].strip()) + run.font.size = style["font_size"] + run.font.name = head_font_name + run.font.color.rgb = style["font_color"] + p.style = f"Heading{level}" + else: + p = doc.add_paragraph() + run = p.add_run(line.strip()) + run.font.name = body_font_name + run.font.size = body_font_size + run.font.color.rgb = body_font_color + + # Save the document + print("\n") + doc.save(self.out_obj) + print( + f"{fg.BWHITE}Text file converted to Word document: {fg.MAGENTA}{self.out_obj}{RESET}" + ) + + +if __name__ == "__main__": + init = StyledText("/home/skye/Documents/FMAC/file2.txt") + + # Call the function + init.text_to_word() diff --git a/filewarp/core/tts/core.py b/filewarp/core/tts/core.py new file mode 100644 index 0000000..e69de29 diff --git a/filewarp/core/tts/gtts.py b/filewarp/core/tts/gtts.py new file mode 100644 index 0000000..1f1918b --- /dev/null +++ b/filewarp/core/tts/gtts.py @@ -0,0 +1,562 @@ +import json +import math +import os +import PyPDF2 +import shutil +import sys +from docx import Document +from threading import Lock, Thread +from typing import List, Union +import requests +from gtts import gTTS +from pydub import AudioSegment +from rich.errors import MarkupError +from ..document import DocumentConverter +from ...utils.colors import fg, rs +from ...utils.simple import logger + +RESET = rs + +_ext_word = ["doc", "docx"] + + +class GoogleTTS: + """Definition of audiofying class""" + + def __init__( + self, + obj: Union[os.PathLike, str, List[Union[os.PathLike, str]]], + resume: bool = True, + ): + self.obj = obj + self.resume = resume + + @staticmethod + def join_audios(files, output_file): + masterfile = output_file + "_master.mp3" + print( + f"{fg.BBLUE}Create a master file {fg.BMAGENTA}{masterfile}{RESET}", + end="\r", + ) + # Create a list to store files + ogg_files = [] + # loop through the directory while adding the ogg files to the list + for filename in files: + print(f"Join {fg.BBLUE}{len(files)}{RESET} files") + # if filename.endswith('.ogg'): + # ogg_file = os.path.join(path, filename) + ogg_files.append(AudioSegment.from_file(filename)) + + # Concatenate the ogg files + combined_ogg = ogg_files[0] + for i in range(1, len(files)): + combined_ogg += ogg_files[i] + + # Export the combined ogg to new mp3 file or ogg file + combined_ogg.export(output_file + "_master.ogg", format="ogg") + print( + f"{fg.BGREEN}Master file:Ok {RESET}" + ) + + def Synthesise( + self, + text: str, + output_file: str, + CHUNK_SIZE: int = 1_000, + _tmp_folder_: str = "tmp_dir", + thread_name: str = None, + max_retries: int = 30, + ) -> None: + """Converts given text to speech using Google Text-to-Speech API.""" + # from rich.progress import (BarColumn, Progress, SpinnerColumn,TextColumn) + + config = ConfigManager() + # Define directories and other useful variables for genrating output_file and checkpoint_file + out_dir = os.path.split(output_file)[0] + + thread_name = f"thread_{os.path.split(output_file.split('.')[0])[-1]}" + _file_ = os.path.split(output_file)[1] + + _tmp_folder_ = os.path.join(out_dir, _tmp_folder_) + + # Remove temporary dir if it exists, rare-cases since file names are mostly unique + if os.path.exists(_tmp_folder_) and self.resume is False: + # query = input(f"{fg.BBLUE}Remove the {os.path.join(out_dir, _tmp_folder_)} directory (y/n)?{RESET} ").lower() in ('y', 'yes') + shutil.rmtree(_tmp_folder_) + + # Create temporary folder to house chunks + if not os.path.exists(_tmp_folder_): + logger.info( + f"{fg.BYELLOW}Create temporary directory = {fg.BBLUE}{_tmp_folder_}{RESET}" + ) + os.mkdir(_tmp_folder_) + + _full_output_path_ = os.path.join(_tmp_folder_, _file_) + + # Read reume chunk from the configuration file + start_chunk = int(config.read_config_file(thread_name)) * 1_000 + start_chunk = 0 if start_chunk is None else start_chunk + + """ If chunk is not 0 multiply the chunk by the highest decimal value of the chunk size + else set it to 0 meaning file is being operated on for the first time + """ + resume_chunk_pos = start_chunk * 1_000 if start_chunk != 0 else start_chunk + + try: + print(f"{fg.BYELLOW}Start thread:: {thread_name}{RESET}") + + total_chunks = math.ceil(len(text) / CHUNK_SIZE) + + counter = ( + math.ceil(resume_chunk_pos / CHUNK_SIZE) if resume_chunk_pos != 0 else 0 + ) + + attempt = 0 + + while attempt <= max_retries: + try: + # Initialize progress bar for the overall process + + for i in range(resume_chunk_pos, len(text), CHUNK_SIZE): + print( + f"Processing: chunk {fg.BMAGENTA}{counter}/{total_chunks} {fg.DCYAN}{counter / total_chunks * 100:.2f}%{RESET}\n", + end="\r", + ) + chunk = text[i : i + CHUNK_SIZE] + # print(chunk) + if os.path.exists(f"{_full_output_path_}_{counter}.ogg"): + if counter == start_chunk: + print( + f"{fg.CYAN}Chunk vs file confict: {fg.BLUE}Resolving{RESET}" + ) + os.remove(f"{_full_output_path_}_{counter}.ogg") + output_filename = f"{_full_output_path_}_{counter}.ogg" + + # Remove empty file + elif ( + os.path.getsize(f"{_full_output_path_}_{counter}.ogg") + != 0 + ): + os.remove(f"{_full_output_path_}_{counter}.ogg") + output_filename = f"{_full_output_path_}_{counter}.ogg" + + else: + output_filename = ( + f"{_full_output_path_}_{counter + 1}.ogg" + ) + + else: + output_filename = f"{_full_output_path_}_{counter}.ogg" + + tts = gTTS(text=chunk, lang="en", slow=False) + + tts.save(output_filename) + + # Update current_chunk in the configuration + config.update_config_entry(thread_name, current_chunk=counter) + + counter += 1 + + except FileNotFoundError as e: + logger.error(f"{fg.RED}{e}{RESET}") + + except ( + requests.exceptions.ConnectionError + ): # Handle connectivity/network error + logger.error(f"{fg.RED}ConnectionError{RESET}") + + # Exponential backoff for retries + for _sec_ in range(2**attempt, 0, -1): + print( + # Increament the attempts + f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}", + end="\r", + ) + + attempt += 1 + + # Read chunk from configuration + resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000 + + except ( + requests.exceptions.HTTPError + ) as e: # Exponential backoff for retries + logger.error(f"HTTP error: {e.status_code} - {e.reason}") + for _sec_ in range(2**attempt, 0, -1): + print( + f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}", + end="\r", + ) + + attempt += 1 + + resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000 + + except requests.exceptions.RequestException as e: + logger.error(f"{fg.RED}{e}{RESET}") + + for _sec_ in range(2**attempt, 0, -1): + print( + f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}", + end="\r", + ) + attempt += 1 + + resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000 + + except ( + ConnectionError, + ConnectionAbortedError, + ConnectionRefusedError, + ConnectionResetError, + ): + logger.error(f"{fg.RED}Connection at attempt{RESET}") + + for _sec_ in range(2**attempt, 0, -1): + print( + f"{fg.BWHITE}Resume in {fg.BLUE}{_sec_}{RESET}", + end="\r", + ) + + attempt += 1 + + resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000 + + except MarkupError as e: + logger.error(f"{fg.RED}{e}{RESET}") + except Exception as e: # Handle all other types of exceptions + logger.error( + f"{fg.BMAGENTA}{attempt + 1}/{max_retries}:{fg.RED}{e}{RESET}" + ) + + for _sec_ in range(2**attempt, 0, -1): + pass + + attempt += 1 + + resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000 + + else: + print( + f"{fg.BMAGENTA}Conversion success✅. \n {fg.CYAN}INFO\t Create masterfile{RESET}" + ) + + if ( + len(os.listdir(_tmp_folder_)) > 2 + ): # Combine generated gTTS objects + from .JoinAudios import JoinAudios + + joiner = JoinAudios(_tmp_folder_, masterfile=output_file) + joiner.worker() + # Remove temporary files + shutil.rmtree(_tmp_folder_) + + break # Exit the retry loop if successfull + + else: + print( + f"{fg.RED}Maximum retries reached. Unable to complete the operation after {fg.BMAGENTA} {max_retries} attempts.{RESET}" + ) + sys.exit(2) + + finally: + pass + + @staticmethod + def pdf_to_text(pdf_path): + logger.info(f"{fg.GREEN} Initializing pdf to text conversion{RESET}") + try: + with open(pdf_path, "rb") as file: + pdf_reader = PyPDF2.PdfReader(file) + text = "" + _pg_ = 0 + print(f"{fg.YELLOW}Convert pages..{RESET}") + for page_num in range(len(pdf_reader.pages)): + _pg_ += 1 + logger.info( + f"Page {fg.BBLUE}{_pg_}{RESET}/{len(pdf_reader.pages)}" + ) + page = pdf_reader.pages[page_num] + text += page.extract_text() + print(f"{fg.BGREEN}Ok{RESET}\n") + return text + except Exception as e: + logger.error( + f"{fg.RED}Failed to extract text from '{fg.YELLOW}{pdf_path}'{RESET}:\n {e}" + ) + + @staticmethod + def text_file(input_file): + try: + with open(input_file, "r", errors="ignore") as file: + text = file.read().replace("\n", " ") + return text + except FileNotFoundError: + logger.error("File '{}' was not found.📁".format(input_file)) + except Exception as e: + logger.error(f"{fg.RED}{str(e)}{RESET}") + + @staticmethod + def docx_to_text(docx_path): + try: + logger.info(f"{fg.BLUE} Converting {docx_path} to text{RESET}") + doc = Document(docx_path) + paragraphs = [paragraph.text for paragraph in doc.paragraphs] + return "\n".join(paragraphs) + except FileNotFoundError: + logger.error(f"File '{docx_path}' was not found.📁") + except Exception as e: + logger.error( + f"{fg.RED}Error converting {docx_path} to text: {e} {RESET}" + ) + + class ThreadClient: + def __init__(self, instance): + self.instance = instance + self.lock = Lock() + self.config = ConfigManager() + + def audiofy(self, num_threads=3): + ls = ("pdf", "docx", "doc", "txt", "ppt", "pptx") + + def create_thread(item, thread_name): + # Create a unique temp dir for each file + temp_dir = f"tmp_dir_{os.path.split(item.split('.')[0])[-1]}" + + # Ensure proper locking when adding config entry + with self.lock: + # Record config entry for each item + self.config.add_config_entry( + thread_name, f"{item.split('.')[0]}", temp_dir, 0 + ) + + # Create and return the thread + return Thread( + target=self.worker, + args=(item, temp_dir, thread_name), + name=thread_name, + ) + + threads = [] + processed_items = 0 + + # Process a list of files + def process_batch(): + for thread in threads: + thread.start() + for thread in threads: + thread.join() + threads.clear() # Clear thread list after batch is done + + # Handle files provided as a list + if isinstance(self.instance.obj, list): + for item in self.instance.obj: + item = os.path.abspath(item) + if os.path.isfile(item) and item.endswith(ls): + thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}" + thread = create_thread(item, thread_name) + threads.append(thread) + processed_items += 1 + + # Process threads in batches of 'num_threads' + if processed_items % num_threads == 0: + process_batch() + + # Process remaining threads in case the list isn't a perfect multiple of num_threads + if threads: + process_batch() + + # Handle a single file + elif os.path.isfile(self.instance.obj): + item = os.path.abspath(self.instance.obj) + if item.endswith(ls): + thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}" + thread = create_thread(item, thread_name) + threads.append(thread) + process_batch() # Process immediately for single file + + # Handle a directory of files + elif os.path.isdir(self.instance.obj): + for item in os.listdir(self.instance.obj): + item = os.path.abspath(item) + if os.path.isfile(item) and item.endswith(ls): + thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}" + thread = create_thread(item, thread_name) + threads.append(thread) + processed_items += 1 + + # Process threads in batches + if processed_items % num_threads == 0: + process_batch() + + # Process remaining threads + if threads: + process_batch() + + def worker(self, input_file, _temp_dir_, thread_name): + output_file = os.path.split(input_file)[-1].split(".")[0] + ".ogg" + print(f"Thread {thread_name} processing file: {input_file}") + + try: + # Extract text based on file type + if input_file.endswith(".pdf"): + text = GoogleTTS.pdf_to_text(input_file) + elif input_file.lower().endswith(tuple(_ext_word)): + text = GoogleTTS.docx_to_text(input_file) + elif input_file.endswith(".txt"): + text = GoogleTTS.text_file(input_file) + elif input_file.split(".")[-1] in ("ppt", "pptx"): + conv = DocumentConverter(input_file) + word = conv.ppt_to_word() + conv = DocumentConverter(word) + text = GoogleTTS.text_file(conv.word_to_txt()) + else: + raise ValueError( + "Unsupported file format. Please provide a PDF, txt, or Word document." + ) + + # Synthesize audio using the extracted text + self.instance.Synthesise( + text, output_file, _tmp_folder_=_temp_dir_, thread_name=thread_name + ) + print(f"Thread {thread_name} completed processing {input_file}") + + except Exception as e: + print(f"Error in thread {thread_name}: {e}") + except KeyboardInterrupt: + print(f"Thread {thread_name} interrupted.") + sys.exit(1) + + +class ConfigManager: + def __init__(self, config_path="filewarp_config.json"): + self.config_path = config_path + + def create_config_file(self, config_data): + """ + Create or overwrite a configuration file to record thread names, associated file names, and current chunks. + + Args: + config_data(list): A list of dictionaries containing thread name, associated file name, temp dir, and current chunk. + """ + try: + # Ensure the output directory exists + output_dir = os.path.dirname(self.config_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Write the configuration data to a JSON file + with open(self.config_path, "w") as config_file: + json.dump(config_data, config_file, indent=4) + + print(f"Configuration file '{self.config_path}' created successfully.") + except Exception as e: + print(f"Error creating configuration file: {e}") + + def read_config_file(self, thread=None): + """ + Read the configuration file and return the data or a specific thread's current chunk. + + Args: + thread (str): The thread name to search for in the config. If None, returns the full config. + + Returns: + dict/list: Returns a specific entry for the thread or the full configuration data. + None: If the file doesn't exist or thread is not found. + """ + try: + if not os.path.exists(self.config_path): + print(f"Configuration file '{self.config_path}' not found.") + return None + + with open(self.config_path, "r") as config_file: + config = json.load(config_file) + + if thread is None: + return config # Return entire configuration + + # Search for specific thread's current chunk + for entry in config: + if entry["thread_name"] == thread: + return entry.get("current_chunk", None) + + print(f"Entry for thread '{thread}' not found.") + return None + + except Exception as e: + print(f"Error reading configuration file: {e}") + return None + + def add_config_entry(self, thread_name, associated_file, tmp_dir, current_chunk): + """ + Add a new entry to the configuration file. + + Args: + thread_name (str): The name of the thread to be added. + associated_file (str): The associated file name for the thread. + tmp_dir (str): Temporary directory for the thread. + current_chunk (int): The current chunk number for the thread. + """ + try: + # Read existing config data or create a new list if the file doesn't exist + config_data = self.read_config_file() or [] + + # Check if the thread already exists in the configuration + for entry in config_data: + if entry["thread_name"] == thread_name: + print( + f"Thread '{thread_name}' already exists. Use 'update_config_entry' to update it." + ) + return + + # Add the new entry + config_data.append( + { + "thread_name": thread_name, + "associated_file": associated_file, + "tmp_dir": tmp_dir, + "current_chunk": current_chunk, + } + ) + + # Save the updated configuration + self.create_config_file(config_data) + + except Exception as e: + print(f"Error adding config entry: {e}") + + def update_config_entry( + self, thread_name, associated_file=None, tmp_dir=None, current_chunk=None + ): + """ + Update an existing entry in the configuration file. + + Args: + thread_name (str): The name of the thread to update. + associated_file (str, optional): The updated associated file name. Defaults to None. + tmp_dir (str, optional): The updated temporary directory. Defaults to None. + current_chunk (int, optional): The updated current chunk number. Defaults to None. + """ + try: + # Read existing config data + config_data = self.read_config_file() or [] + + # Find the entry to update + for entry in config_data: + if entry["thread_name"] == thread_name: + if associated_file: + entry["associated_file"] = associated_file + if tmp_dir: + entry["tmp_dir"] = tmp_dir + if current_chunk is not None: + entry["current_chunk"] = current_chunk + + # Save the updated configuration + self.create_config_file(config_data) + print(f"Thread '{thread_name}' updated successfully.") + return True + + print(f"Thread '{thread_name}' not found in the configuration.") + + except Exception as e: + print(f"Error updating config entry: {e}") diff --git a/filewarp/core/validator.py b/filewarp/core/validator.py new file mode 100644 index 0000000..51b70be --- /dev/null +++ b/filewarp/core/validator.py @@ -0,0 +1,19 @@ +from typing import Tuple +from pathlib import Path + + +class SystemValidator: + """Validates system requirements and dependencies.""" + + @staticmethod + def validate_file_permissions(temp_dir: Path) -> Tuple[bool, str]: + """Validate write permissions in temporary directory.""" + try: + if temp_dir.is_file(): + temp_dir = temp_dir.parent + test_file = temp_dir / "permission_test.txt" + test_file.write_text("test") + test_file.unlink() + return True, "Write permissions verified" + except (OSError, IOError) as e: + return False, f"Insufficient permissions: {str(e)}" diff --git a/filewarp/core/video/Editor.py b/filewarp/core/video/Editor.py new file mode 100644 index 0000000..a603caf --- /dev/null +++ b/filewarp/core/video/Editor.py @@ -0,0 +1,527 @@ +import subprocess +import tempfile +import json +import re +import shutil +from pathlib import Path +from typing import Union, List, Tuple, Optional + +# from concurrent.futures import ThreadPoolExecutor +from threading import Lock +from tqdm import tqdm +from .models import VideoCodec, AudioCodec, VideoQuality, VideoInfo, TrimRange +from ...utils.logging_utils import logger + + +class VideoEditor: + """ + Optimized video editor with progress tracking and frame-accurate seeking. + """ + + def __init__(self, ffmpeg_path: str = "ffmpeg", ffprobe_path: str = "ffprobe"): + self.ffmpeg_path = ffmpeg_path + self.ffprobe_path = ffprobe_path + self._temp_dir = Path(tempfile.mkdtemp(prefix="video_editor_")) + self._check_ffmpeg() + + # Thread lock for progress bar safety if using ThreadPoolExecutor + self._progress_lock = Lock() + + def _check_ffmpeg(self): + """Verify FFmpeg and FFprobe are accessible""" + try: + subprocess.run( + [self.ffmpeg_path, "-version"], capture_output=True, check=True + ) + subprocess.run( + [self.ffprobe_path, "-version"], capture_output=True, check=True + ) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + raise RuntimeError( + "FFmpeg/FFprobe not found. Please install FFmpeg and ensure it's in PATH." + ) from e + + def get_video_info(self, video_path: Union[str, Path]) -> VideoInfo: + """Extract comprehensive video information using ffprobe.""" + video_path = Path(video_path) + if not video_path.exists(): + raise FileNotFoundError(f"Video file not found: {video_path}") + + cmd = [ + self.ffprobe_path, + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + "-show_streams", + str(video_path), + ] + + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + data = json.loads(result.stdout) + + duration = float(data.get("format", {}).get("duration", 0)) + file_size = int(data.get("format", {}).get("size", 0)) + + video_stream = None + audio_stream = None + + for stream in data.get("streams", []): + if stream.get("codec_type") == "video" and not video_stream: + video_stream = stream + elif stream.get("codec_type") == "audio" and not audio_stream: + audio_stream = stream + + width = height = 0 + fps = 0 + video_codec = "" + bitrate = 0 + + if video_stream: + width = int(video_stream.get("width", 0)) + height = int(video_stream.get("height", 0)) + + avg_frame_rate = video_stream.get("avg_frame_rate", "0/0").split("/") + if len(avg_frame_rate) == 2 and float(avg_frame_rate[1]) != 0: + fps = float(avg_frame_rate[0]) / float(avg_frame_rate[1]) + + video_codec = video_stream.get("codec_name", "") + bitrate = int(video_stream.get("bit_rate", 0)) + + audio_codec = None + audio_channels = None + if audio_stream: + audio_codec = audio_stream.get("codec_name", "") + audio_channels = int(audio_stream.get("channels", 0)) + + return VideoInfo( + path=video_path, + duration=duration, + width=width, + height=height, + fps=fps, + codec=video_codec, + bitrate=bitrate, + audio_codec=audio_codec, + audio_channels=audio_channels, + file_size=file_size, + has_video=video_stream is not None, + has_audio=audio_stream is not None, + ) + + def trim_video( + self, + input_path: Union[str, Path], + output_path: Union[str, Path], + trim_ranges: Union[ + TrimRange, List[TrimRange], Tuple[float, float], List[Tuple[float, float]] + ], + video_codec: VideoCodec = VideoCodec.H264, + audio_codec: AudioCodec = AudioCodec.AAC, + quality: VideoQuality = VideoQuality.MEDIUM, + crf: Optional[int] = None, + preserve_audio: bool = True, + copy_streams: bool = False, + show_progress: bool = True, + seek_buffer: float = 5.0, # Seconds before target for accurate seeking + ) -> Path: + """ + Trim video with progress tracking and frame-accurate seeking. + + Args: + seek_buffer: Seconds to seek before target for accurate keyframe alignment (default 5s) + """ + input_path = Path(input_path) + output_path = Path(output_path) + + ranges = self._normalize_trim_ranges(trim_ranges) + self._validate_trim_ranges(input_path, ranges) + output_path.parent.mkdir(parents=True, exist_ok=True) + + if len(ranges) == 1: + return self._trim_single_range( + input_path, + output_path, + ranges[0], + video_codec, + audio_codec, + quality, + crf, + preserve_audio, + copy_streams, + show_progress, + seek_buffer, + ) + else: + return self._trim_multiple_ranges( + input_path, + output_path, + ranges, + video_codec, + audio_codec, + quality, + crf, + preserve_audio, + copy_streams, + show_progress, + seek_buffer, + ) + + def _build_ffmpeg_command( + self, + input_path: Path, + output_path: Path, + trim_range: Optional[TrimRange] = None, + video_codec: VideoCodec = VideoCodec.H264, + audio_codec: AudioCodec = AudioCodec.AAC, + quality: VideoQuality = VideoQuality.MEDIUM, + crf: Optional[int] = None, + preserve_audio: bool = True, + copy_streams: bool = False, + seek_buffer: float = 5.0, + ) -> List[str]: + """ + Build optimized FFmpeg command using double -ss technique: + 1. Fast seek to keyframe before target (input seeking) + 2. Accurate seek to exact frame (output seeking) + 3. Timestamp correction to prevent frozen frames + """ + cmd = [self.ffmpeg_path, "-hide_banner", "-y"] + + # Input seeking (fast, inaccurate to keyframe) + if trim_range and trim_range.start > 0: + # Seek to buffer seconds before target to ensure we hit a keyframe + seek_pos = max(0, trim_range.start - seek_buffer) + cmd.extend(["-ss", str(seek_pos)]) + + cmd.extend(["-i", str(input_path)]) + + # Output seeking (accurate, from keyframe to exact frame) + if trim_range: + if trim_range.start > 0: + # Skip the buffer we added earlier + cmd.extend(["-ss", str(seek_buffer)]) + + # Duration of actual content to extract + cmd.extend(["-t", str(trim_range.duration)]) + + # Video encoding options + if copy_streams: + cmd.extend(["-c:v", "copy"]) + # CRITICAL: Fix timestamp gaps when copying streams + cmd.extend( + ["-avoid_negative_ts", "make_zero", "-fflags", "+genpts", "-async", "1"] + ) + else: + quality_value = quality if isinstance(quality, str) else quality.value + cmd.extend( + [ + "-c:v", + video_codec.value, + "-preset", + quality_value, + "-pix_fmt", + "yuv420p", # Ensure compatibility + ] + ) + + if crf: + cmd.extend(["-crf", str(crf)]) + elif video_codec == VideoCodec.H264: + cmd.extend(["-crf", "23"]) + + # Audio encoding options + if preserve_audio and audio_codec != AudioCodec.NONE: + if copy_streams: + cmd.extend(["-c:a", "copy"]) + else: + cmd.extend(["-c:a", audio_codec.value]) + else: + cmd.extend(["-an"]) + + # Additional flags to prevent "long video with short content" bug + if copy_streams: + cmd.extend(["-vsync", "cfr"]) # Constant frame rate to fix timing + + cmd.extend([str(output_path)]) + return cmd + + def _parse_ffmpeg_time(self, line: str) -> Optional[float]: + """Parse time from FFmpeg stderr output (format: time=00:01:23.45)""" + match = re.search(r"time=(\d+):(\d+):(\d+\.\d+)", line) + if match: + hours, minutes, seconds = map(float, match.groups()) + return hours * 3600 + minutes * 60 + seconds + return None + + def _trim_single_range( + self, + input_path: Path, + output_path: Path, + trim_range: TrimRange, + video_codec: VideoCodec, + audio_codec: AudioCodec, + quality: VideoQuality, + crf: Optional[int], + preserve_audio: bool, + copy_streams: bool, + show_progress: bool, + seek_buffer: float, + ) -> Path: + """Process single trim range with progress tracking.""" + + cmd = self._build_ffmpeg_command( + input_path, + output_path, + trim_range, + video_codec, + audio_codec, + quality, + crf, + preserve_audio, + copy_streams, + seek_buffer, + ) + + logger.debug(f"FFmpeg command: {' '.join(cmd)}") + + try: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # FFmpeg outputs to stderr, capture both + universal_newlines=True, + bufsize=1, + ) + + if show_progress: + # Create progress bar + pbar = tqdm( + total=int(trim_range.duration), + desc=f"Trimming {input_path.name[:20]}", + unit="s", + bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}s [{elapsed}<{remaining}]", + ) + + last_update = 0 + + for line in process.stdout: + current_time = self._parse_ffmpeg_time(line) + if current_time is not None: + # Update progress (cap at duration) + progress = min(int(current_time), int(trim_range.duration)) + if progress > last_update: + pbar.update(progress - last_update) + last_update = progress + + pbar.close() + else: + # Just wait for completion without progress + process.communicate() + + return_code = process.wait() + + if return_code != 0: + raise RuntimeError(f"FFmpeg exited with code {return_code}") + + # Verify output file exists and has size + if not output_path.exists() or output_path.stat().st_size == 0: + raise RuntimeError("Output file is empty or was not created") + + except Exception as e: + # Clean up partial output on failure + if output_path.exists(): + try: + output_path.unlink() + except: + pass + raise RuntimeError(f"Failed to trim video: {e}") from e + + return output_path + + def _trim_multiple_ranges( + self, + input_path: Path, + output_path: Path, + ranges: List[TrimRange], + video_codec: VideoCodec, + audio_codec: AudioCodec, + quality: VideoQuality, + crf: Optional[int], + preserve_audio: bool, + copy_streams: bool, + show_progress: bool, + seek_buffer: float, + ) -> Path: + """Extract multiple ranges and concatenate with progress tracking.""" + temp_files = [] + concat_file = None + + try: + # Process segments sequentially (FFmpeg isn't thread-safe for encoding) + total_duration = sum(r.duration for r in ranges) + + with tqdm( + total=int(total_duration), desc="Total Progress", unit="s" + ) as main_pbar: + for i, trim_range in enumerate(ranges): + temp_file = self._temp_dir / f"segment_{i:03d}.mp4" + temp_files.append(temp_file) + + # Process segment + self._trim_single_range( + input_path, + temp_file, + trim_range, + video_codec, + audio_codec, + quality, + crf, + preserve_audio, + copy_streams, + False, + seek_buffer, # Disable individual progress + ) + + # Update main progress + main_pbar.update(int(trim_range.duration)) + + # Create concat list + concat_file = self._temp_dir / "concat.txt" + with open(concat_file, "w") as f: + for temp_file in temp_files: + # Escape single quotes in path for FFmpeg concat demuxer + path_str = str(temp_file.absolute()).replace("'", "'\\''") + f.write(f"file '{path_str}'\n") + + # Concatenate with progress + concat_cmd = [ + self.ffmpeg_path, + "-hide_banner", + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + str(concat_file), + "-c", + "copy", + "-avoid_negative_ts", + "make_zero", + str(output_path), + ] + + logger.debug(f"Concat command: {' '.join(concat_cmd)}") + + result = subprocess.run( + concat_cmd, capture_output=True, text=True, check=True + ) + + except Exception as e: + if output_path.exists(): + output_path.unlink() + raise RuntimeError(f"Failed to concatenate segments: {e}") from e + + finally: + # Cleanup temp files + for temp_file in temp_files: + if temp_file.exists(): + temp_file.unlink() + if concat_file and concat_file.exists(): + concat_file.unlink() + + return output_path + + def _normalize_trim_ranges(self, ranges): + """Normalize various input formats to list of TrimRange objects.""" + if isinstance(ranges, TrimRange): + return [ranges] + elif isinstance(ranges, tuple) and len(ranges) == 2: + return [TrimRange(ranges[0], ranges[1])] + elif isinstance(ranges, list): + normalized = [] + for r in ranges: + if isinstance(r, TrimRange): + normalized.append(r) + elif isinstance(r, tuple) and len(r) == 2: + normalized.append(TrimRange(r[0], r[1])) + else: + raise ValueError(f"Invalid trim range format: {r}") + return normalized + else: + raise ValueError(f"Invalid trim ranges format: {ranges}") + + def _validate_trim_ranges(self, input_path: Path, ranges: List[TrimRange]): + """Validate trim ranges against video duration.""" + video_info = self.get_video_info(input_path) + + for i, trim_range in enumerate(ranges): + if trim_range.start < 0: + raise ValueError(f"Trim range {i}: start time cannot be negative") + if trim_range.start >= video_info.duration: + raise ValueError( + f"Trim range {i}: start time {trim_range.start}s " + f"exceeds video duration {video_info.duration}s" + ) + if trim_range.end > video_info.duration: + logger.warning( + f"Trim range {i}: end time {trim_range.end}s " + f"exceeds video duration {video_info.duration}s. " + f"Truncating to video end." + ) + trim_range.end = video_info.duration + if trim_range.end <= trim_range.start: + raise ValueError( + f"Trim range {i}: end time must be greater than start time" + ) + + def batch_trim( + self, + video_paths: List[Union[str, Path]], + output_dir: Union[str, Path], + trim_specs: Union[TrimRange, List[TrimRange], dict], + **kwargs, + ) -> List[Path]: + """Batch trim with global progress tracking.""" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + output_paths = [] + + for video_path in tqdm(video_paths, desc="Processing videos", unit="file"): + video_path = Path(video_path) + + if isinstance(trim_specs, dict): + trim_spec = trim_specs.get(str(video_path), trim_specs.get(video_path)) + if trim_spec is None: + raise ValueError(f"No trim specification for video: {video_path}") + else: + trim_spec = trim_specs + + stem = video_path.stem + output_path = output_dir / f"{stem}_trimmed{video_path.suffix}" + + try: + result = self.trim_video( + video_path, output_path, trim_spec, show_progress=True, **kwargs + ) + output_paths.append(result) + except Exception as e: + logger.error(f"Failed to process {video_path.name}: {e}") + raise + + return output_paths + + def __del__(self): + """Cleanup temporary directory on object destruction.""" + try: + if ( + hasattr(self, "_temp_dir") + and self._temp_dir + and self._temp_dir.exists() + ): + shutil.rmtree(self._temp_dir, ignore_errors=True) + except Exception as e: + logger.warning(f"Failed to clean up temp directory: {e}") diff --git a/filewarp/core/video/avEditor.py b/filewarp/core/video/avEditor.py new file mode 100644 index 0000000..6582dd6 --- /dev/null +++ b/filewarp/core/video/avEditor.py @@ -0,0 +1,42 @@ +import av + + +def trim_pyav(input_path, output_path, start_time, end_time): + input_container = av.open(str(input_path)) + output_container = av.open(str(output_path), mode="w") + + # Setup streams + in_streams = [ + stream + for stream in input_container.streams + if stream.type in ("video", "audio") + ] + out_streams = { + s: output_container.add_stream(codec_name="libx264") for s in in_streams + } + + # Seek to nearest keyframe before start + input_container.seek(int(start_time * av.time_base), any_frame=False) + + for packet in input_container.demux(in_streams): + if packet.pts is None: + continue + + time = float(packet.pts * packet.time_base) + + if time < start_time: + continue + if time > end_time: + break + + packet.stream = out_streams[packet.stream] + output_container.mux(packet) + + output_container.close() + input_container.close() + + +if __name__ == "__main__": + input_path = "/home/skye/Videos/Im.Nobody.S01E21.1080p.x264-[T4TSA.cc].mkv" + output_path = "/home/skye/Videos/trimed.mkv" + trim_pyav(input_path, output_path, 166, 350) diff --git a/filewarp/core/video/core.py b/filewarp/core/video/core.py new file mode 100644 index 0000000..d0e7377 --- /dev/null +++ b/filewarp/core/video/core.py @@ -0,0 +1,182 @@ +""" +Convert video file to from one format to another +""" + +import os +import subprocess +import sys + +import cv2 +from moviepy import VideoFileClip +from pydub import AudioSegment +from tqdm import tqdm + +from ...utils.colors import fg, bg, rs +from ...utils.formats import SUPPORTED_VIDEO_FORMATS, Video_codecs + + +RESET = rs + + +class VideoConverter: + def __init__(self, input_file, out_format=None): + self.input_file = input_file + self.out_format = out_format + + def preprocess(self): + if self.out_format is None: + return None + files_to_process = [] + + if os.path.isfile(self.input_file): + files_to_process.append(self.input_file) + elif os.path.isdir(self.input_file): + if os.listdir(self.input_file) is None: + print(f"{bg.RED}Cannot work with empty folder{RESET}") + sys.exit(1) + for file in os.listdir(self.input_file): + file_path = os.path.join(self.input_file, file) + if os.path.isfile(file_path): + files_to_process.append(file_path) + + return files_to_process + + def ffmpeg_merger(self, obj: list = None): + video_list = self.preprocess(), obj + for input_video in video_list: + base, ext = input_video.split(".", 1) + output_file = f"{base}_new_.{ext}" + + # keep the original video quality by using -c:v copy, which avoids re-encoding. + subprocess.run( + [ + "ffmpeg", + "-i", + input_video, + "-i", + "audio.mp3", + "-c:v", + "copy", + "-c:a", + "aac", + "-strict", + "experimental", + output_file, + ] + ) + + def pydub_merger(self, obj: list = None): + video_list = self.preprocess() or obj + for input_video in video_list: + output_file = [f"{_}_new_.{ext}" for _, ext in [input_video.split(".", 1)]][ + 0 + ] + # Process or manipulate audio with Pydub (e.g., adjust volume) + audio = AudioSegment.from_file("audio.mp3") + audio = audio + 6 # Increase volume by 6 dB + audio.export("processed_audio.mp3", format="mp3") + + # Merge processed audio with video using FFmpeg + subprocess.run( + [ + "ffmpeg", + "-i", + input_video, + "-i", + "processed_audio.mp3", + "-c:v", + "copy", + "-c:a", + "aac", + output_file, + ] + ) + + def cv2_merger(self, obj: list = None): + video_list = self.preprocess(), obj + for input_video in video_list: + # Read video and save frames (without audio) + cap = cv2.VideoCapture(input_video) + + # Retrieve width and height from the video + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(cap.get(cv2.CAP_PROP_FPS)) + + # _, ext = input_video.split('.')[0] + # output_file = f"{_}_new{ext}" + output_file = [f"{_}_new_.{ext}" for _, ext in [input_video.split(".", 1)]][ + 0 + ] + # Define the VideoWriter with the video dimensions + out = cv2.VideoWriter( + output_file, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height) + ) + + # Read frames from the original video and write them to the output + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + out.write(frame) + + # Release resources + cap.release() + out.release() + + # Merge with audio using FFmpeg + subprocess.run( + [ + "ffmpeg", + "-i", + "video_no_audio.mp4", + "-i", + "audio.mp3", + "-c:v", + "copy", + "-c:a", + "aac", + output_file, + ] + ) + + def CONVERT_VIDEO(self): + try: + input_list = self.preprocess() + out_f = self.out_format.upper() + input_list = [ + item + for item in input_list + if any(item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS) + ] + + for file in tqdm(input_list): + if out_f.upper() in Video_codecs.keys(): + _, ext = os.path.splitext(file) + output_filename = _ + "." + out_f.lower() + # print(output_filename) + elif ( + out_f.upper() in SUPPORTED_VIDEO_FORMATS + and out_f.upper() not in Video_codecs.keys() + ): + print( + f"{fg.RED}Unsupported output format --> Pending Implementation{RESET}" + ) + sys.exit(1) + else: + print(f"{fg.RED}Unsupported output format{RESET}") + sys.exit(1) + + """Load the video file""" + video = VideoFileClip(file) + + """Export the video to a different format""" + video.write_videofile(output_filename, codec=Video_codecs[out_f]) + + """Close the video file""" + video.close() + except KeyboardInterrupt: + print("\nQuit❕") + sys.exit(1) + except Exception as e: + print(e) diff --git a/filewarp/core/video/models.py b/filewarp/core/video/models.py new file mode 100644 index 0000000..50ea156 --- /dev/null +++ b/filewarp/core/video/models.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Optional +from pathlib import Path + + +class VideoCodec(Enum): + """Supported video codecs for encoding""" + + H264 = "libx264" + H265 = "libx265" + VP9 = "libvpx-vp9" + COPY = "copy" # Stream copy (no re-encoding) + + +class AudioCodec(Enum): + """Supported audio codecs for encoding""" + + AAC = "aac" + MP3 = "libmp3lame" + COPY = "copy" # Stream copy (no re-encoding) + NONE = "none" # Remove audio + + +class VideoQuality(Enum): + """Preset quality settings""" + + ULTRA_FAST = "ultrafast" # Fastest encoding, largest file + FAST = "fast" + MEDIUM = "medium" # Default balance + SLOW = "slow" # Better compression, slower encoding + VERYS_LOW = "veryslow" # Best compression, very slow + + +@dataclass +class VideoInfo: + """Container for video metadata""" + + path: Path + duration: float + width: int + height: int + fps: float + codec: str + bitrate: int + audio_codec: Optional[str] + audio_channels: Optional[int] + file_size: int + has_video: bool + has_audio: bool + + +@dataclass +class TrimRange: + """Represents a trim range in seconds""" + + start: float + end: float + + def __post_init__(self): + if self.start < 0: + raise ValueError("Start time cannot be negative") + if self.end <= self.start: + raise ValueError("End time must be greater than start time") + + @property + def duration(self) -> float: + return self.end - self.start diff --git a/filewarp/core/video/moviepyEditor.py b/filewarp/core/video/moviepyEditor.py new file mode 100644 index 0000000..d6b22cd --- /dev/null +++ b/filewarp/core/video/moviepyEditor.py @@ -0,0 +1,25 @@ +# from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip +from moviepy import VideoFileClip +# import os + + +def trim_moviepy(input_path, output_path, start, end): + # This has progress bar support built-in + clip = VideoFileClip(str(input_path)) + subclip = clip.subclipped(start, end) + + # Progress bar appears automatically + subclip.write_videofile( + str(output_path), + codec="libx264", + audio_codec="aac", + # verbose=True, # Shows progress + # logger=None, # Or use 'bar' for tqdm style + ) + clip.close() + + +if __name__ == "__main__": + input_path = "/home/skye/Videos/Im.Nobody.S01E21.1080p.x264-[T4TSA.cc].mkv" + output_path = "/home/skye/Videos/trimed.mkv" + trim_moviepy(input_path, output_path, 166, 350) diff --git a/filewarp/core/warning.py b/filewarp/core/warning.py new file mode 100644 index 0000000..eece477 --- /dev/null +++ b/filewarp/core/warning.py @@ -0,0 +1,13 @@ +import warnings + + +def default_supressor(): + # warnings.filterwarnings(action="ignore", category=warnings.defaultaction, module="numexpr") + warnings.simplefilter("ignore", RuntimeWarning) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Your system is avx2 capable but pygame was not built with support for it.", + category=RuntimeWarning, + ) + return True diff --git a/filewarp/miscellaneous/VKITPro.py b/filewarp/miscellaneous/VKITPro.py new file mode 100644 index 0000000..77b7c9c --- /dev/null +++ b/filewarp/miscellaneous/VKITPro.py @@ -0,0 +1,135 @@ +#!/usr/bin/python3 +import logging +import os + +import cv2 +from colorama import Fore, Style, init +from moviepy import AudioFileClip, VideoFileClip + +# import numpy as np +from tqdm import tqdm + +# Initialize colorama +init(autoreset=True) + +# Custom formatter class to add colors + + +class CustomFormatter(logging.Formatter): + COLORS = { + logging.DEBUG: Fore.BLUE, + logging.INFO: Fore.GREEN, + logging.WARNING: Fore.YELLOW, + logging.ERROR: Fore.RED, + logging.CRITICAL: Fore.MAGENTA, + } + + def format(self, record): + log_color = self.COLORS.get(record.levelno, Fore.WHITE) + log_message = super().format(record) + return f"{log_color}{log_message}{Style.RESET_ALL}" + + +# Set up logging +logger = logging.getLogger("colored_logger") +handler = logging.StreamHandler() +handler.setFormatter(CustomFormatter("- %(levelname)s - %(message)s")) + +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +class AudioMan: + def __init__(self, obj): + self.obj = obj + # Load the video file + self.video = VideoFileClip(self.obj) + basename, _ = os.path.splitext(self.obj) + self.outfile = basename + ".wav" + + def Extract_audio(self): + # audio = video.audio + self.video.audio.write_audiofile(self.outfile) + + def Write_audio(self, outfile): + # Load the audio file + audio = AudioFileClip(outfile) + new = self.video.set_audio(audio) + # Export the final video + return new.write_videofile( + "output_@vkitpro.mp4", codec="libx264", audio_codec="aac", bitrate="125.4k" + ) + + +class VideoRepair: + def __init__(self, obj): + self.obj = obj + + logger.info("Open the file") + self.cap = cv2.VideoCapture(obj) + if not self.cap.isOpened(): + logger.error("Could not open video file.") + return + + # Collect file metadata + self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = self.cap.get(cv2.CAP_PROP_FPS) + + logger.info( + "File info:\n" + f"\tFrames: \033[95m{self.frame_count}\033[0;32m\n" + f"\tFrame Width: \033[0;95m{width}\033[0;32m\n" + f"\tFrame Height: \033[0;95m{height}\033[0;32m\n" + f"\tFPS: \033[0;95m{fps}\033[0m" + ) + + def get_frame_size_in_bytes(frame): + return frame.nbytes # Get the size of the frame in bytes + + def Repair(self, batch: int = 2): + logger.info("Find missing frames and index them") + """batch_size = batch * 1024 * 1024 + l_frame = None + r_frame = None + current_batch_size = 0 + frames_batch = []""" + + for _ in tqdm(range(self.frame_count), desc="Progress"): + ret, frame = self.cap.read() + if not ret: + # If no frame is captured, break the loop + self.frames.append(None) + else: + self.frames.append(frame) + + self.cap.release() + + +class cv2Repair: + def __init__(self): + self = self + + def preprocessor(input_video_path): + cap = cv2.VideoCapture(input_video_path) + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + pass + else: + yield frame # Yield frame one by one (lazy loading) + + cap.release() + + def repair(self): + # Process the frames using the generator + for frame in tqdm(self.preprocessor("/home/skye/Videos/FixedSupercar.mp4")): + run = AudioMan() + run.Write_audio() + + +if __name__ == "__main__": + run = AudioMan("/home/skye/Videos/FixedSupercar.mp4") + run.Write_audio("/home/skye/Videos/supercar.wav") diff --git a/filewarp/miscellaneous/VRKit.py b/filewarp/miscellaneous/VRKit.py new file mode 100644 index 0000000..07e511f --- /dev/null +++ b/filewarp/miscellaneous/VRKit.py @@ -0,0 +1,138 @@ +#!/usr/bin/python3 +import logging +import cv2 +from colorama import Fore, Style, init + +# import numpy as np +from tqdm import tqdm + +# Initialize colorama +init(autoreset=True) + +# Custom formatter class to add colors + + +class CustomFormatter(logging.Formatter): + COLORS = { + logging.DEBUG: Fore.BLUE, + logging.INFO: Fore.GREEN, + logging.WARNING: Fore.YELLOW, + logging.ERROR: Fore.RED, + logging.CRITICAL: Fore.MAGENTA, + } + + def format(self, record): + log_color = self.COLORS.get(record.levelno, Fore.WHITE) + log_message = super().format(record) + return f"{log_color}{log_message}{Style.RESET_ALL}" + + +# Set up logging +logger = logging.getLogger("colored_logger") +handler = logging.StreamHandler() +handler.setFormatter(CustomFormatter("- %(levelname)s - %(message)s")) + +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def detect_missing_frames(frames): + """Implementation for missing frame detection and index them, append index + of missing frames to a list""" + missing_frames = [] + logger.info("Index missing frames") + for i in tqdm(range(1, len(frames) - 1), desc="Progress"): + if frames[i] is None: + missing_frames.append(i) + + # Exit when no missing frames are found + if not missing_frames: + exit(0) + return missing_frames + + +def interpolate_frame(prev_frame, next_frame): + """Based on number and size of missing frames use this logic to create a + dummy frame by interpolating. + combine the frame before and after the missing frame and find the missing + frame by calculating middle weight.""" + logger.info("Interpolating") + return cv2.addWeighted(prev_frame, 0.5, next_frame, 0.5, 0) + + +def repair_video(input_path, output_path): + logger.info("Open the file") + cap = cv2.VideoCapture(input_path) + if not cap.isOpened(): + logger.error("Could not open video file.") + return + + # Collect file metadata + frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) + + logger.info( + "File info:\n" + f"\tFrames: \033[95m{frame_count}\033[0;32m\n" + f"\tFrame Width: \033[0;95m{width}\033[0;32m\n" + f"\tFPS: \033[0;95m{fps}\033[0m" + ) + + frames = [] + # Remove missing frames + logger.info("Find missing frames and index them") + for _ in tqdm(range(frame_count), desc="Progress"): + ret, frame = cap.read() + if not ret: + frames.append(None) + else: + frames.append(frame) + + cap.release() + + """ Call function to detect missing frames and decide on the method to apply + depending on number of missing frames. If number is larger than frame_count * 0.1 +remove the missing frames else interpolate.""" + + missing_frames = detect_missing_frames(frames) + if ( + len(missing_frames) > frame_count * 0.1 + ): # Arbitrary threshold for many missing frames + frames = [f for f in frames if f is not None] + else: + for i in missing_frames: + """ Based on missing frame `i` find previous frame `frames[i-1]` and preceeding frame `frames[i+1]` wher both previous and preceeding are not missing. Use them to create the middle frame.""" + if ( + i > 0 + and i < frame_count - 1 + and frames[i - 1] is not None + and frames[i + 1] is not None + ): + frames[i] = interpolate_frame(frames[i - 1], frames[i + 1]) + else: + """Where ...""" + frames[i] = ( + frames[i - 1] if frames[i - 1] is not None else frames[i + 1] + ) + + # Create writer objectfor the frames + out = cv2.VideoWriter( + output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height) + ) + + # Write the new video to file + for frame in frames: + "Don't write empty frames" + if frame is not None: + out.write(frame) + + out.release() + print("Video repair complete and saved to:", output_path) + + +# Usage +input_video_path = "/home/skye/Videos/supercar.mp4" +output_video_path = "output_video.mp4" +repair_video(input_video_path, output_video_path) diff --git a/filewarp/miscellaneous/video_analyzer.py b/filewarp/miscellaneous/video_analyzer.py new file mode 100644 index 0000000..6394c8d --- /dev/null +++ b/filewarp/miscellaneous/video_analyzer.py @@ -0,0 +1,121 @@ +"""A basic/simple file analyzer""" + +import sys +import cv2 +import numpy as np +from ..utils.colors import fg, rs +import ffmpeg + +RESET = rs + + +class SimpleAnalyzer: + """Video - video object subject for analysis + return video`s: duration, total_area and frame_count""" + + def __init__(self, video): + self.video = video + + @staticmethod + def get_metadata(input_file): + """Fetch the original bitrate of the video file using ffmpeg.""" + try: + probe = ffmpeg.probe(input_file) + # print(probe.get("streams")[1]) + bitrate = None + # Iterate over the streams and find the video stream + for stream in probe["streams"]: + bitrate = ( + stream.get("bit_rate", None) + if stream["codec_type"] == "video" + else None + ) + aspect_ratio = ( + stream.get("sample_aspect_ratio") + if stream["sample_aspect_ratio"] + else None + ) + codec_name = stream.get("codec_name") if stream["codec_name"] else None + channels = stream.get("channels") + + encoder = stream.get("encoder") if stream.get("encoder") else None + break + return bitrate, aspect_ratio, codec_name, channels, encoder + except ffmpeg.Error as e: + raise + print(f"Error: {e}") + except Exception as e: + raise + print(f"Error: {e}") + + def analyze(self): + """Read the video file/obj + Increase frame count and accumulate area + Calculate current frame duration + Display the resulting frame""" + + try: + # Read the video file + cap = cv2.VideoCapture(self.video) + # print(f"{fg.BYELLOW}Initializing..{RESET}") + # Initialize variables + # Frame rate (fps) + bitrate, aspect_ratio, codec_name, channels, encoder = self.get_metadata( + self.video + ) + frame_count = 0 + total_area = 0 + duration = 0 + + # print(f"{fg.DCYAN}Working on it{RESET}") + while True: + ret, frame = cap.read() + + if not ret: + break + # Increase frame count and accumulate area + frame_count += 1 + total_area += np.prod(frame.shape[:2]) + + # Calculate current frame duration + fps = cap.get(cv2.CAP_PROP_FPS) + duration += 1 / fps + + # Display the resulting frame + cv2.imshow("Frame", frame) + + # Break the loop after pressing 'q' + if cv2.waitKey(1) == ord("q"): + break + + # Release the video capture object and close all windows + cap.release() + cv2.destroyAllWindows() + + # Print results + # print(f"Size {fg.BGREEN}{size}{RESET}Kb") + print(f"Channels: {fg.BGREEN}{channels}{RESET}") + print(f"Encoder {fg.BGREEN}{encoder}{RESET}") + print(f"Bitrate {fg.BGREEN}{bitrate}{RESET}") + print(f"Aspect ratio{fg.BGREEN}{aspect_ratio}{RESET}") + print(f"Codec name {fg.BGREEN}{codec_name}{RESET}") + print(f"Total Frames: {fg.BGREEN}{frame_count}{RESET}") + print( + f"Average Frame Area: {fg.BGREEN}{total_area / frame_count}{RESET}" + ) + print(f"Duration: {fg.BGREEN}{duration:.2f}{RESET} seconds") + return frame_count, total_area, duration + except KeyboardInterrupt: + print("\nExiting") + sys.exit(1) + except TypeError: + pass + except Exception as e: + print(e) + sys.exit(1) + + +if __name__ == "__main__": + vi = SimpleAnalyzer("/home/skye/Videos/demo.mkv") + # SimpleAnalyzer.get_metadata("/home/skye/Videos/demo.mkv") + vi.analyze() diff --git a/filewarp/utils/__init__.py b/filewarp/utils/__init__.py new file mode 100644 index 0000000..b814662 --- /dev/null +++ b/filewarp/utils/__init__.py @@ -0,0 +1,3 @@ +from . import formats + +__all__ = ["formats"] diff --git a/filewarp/utils/colors.py b/filewarp/utils/colors.py new file mode 100644 index 0000000..def6fc7 --- /dev/null +++ b/filewarp/utils/colors.py @@ -0,0 +1,271 @@ +import os + +from colorama import Fore, Style, init + +init(autoreset=True) + + +class ForegroundColor: + if os.name == "posix": + RESET = "\033[0m" # Reset to default text color + + # Red Variants + RED = "\033[91m" # Normal RED + BRED = "\033[1;91m" # Deep RED + FRED = "\033[2;91m" # Faint red + IRED = "\033[3;91m" # Indented RED + LRED = "\033[4;91m" # Underlined RED + URED = "\033[5;91m" # Blinking RED + + # Green Variants + GREEN = "\033[92m" # Normal green + BGREEN = "\033[1;92m" # Deep green + FGREEN = "\033[2;92m" # Faint green + IGREEN = "\033[3;92m" # Indented GREEN + LGREEN = "\033[4;92m" # Underlined GREEN + UGREEN = "\033[5;92m" # Blinking GREEN + + # Yellow Variants + YELLOW = "\033[93m" # Normal yellow + BYELLOW = "\033[1;93m" # Deep YELLOW + FYELLOW = "\033[2;93m" # Faint YELLOW + IYELLOW = "\033[3;93m" # Indented YELLOW + LYELLOW = "\033[4;93m" # Underlined YELLOW + UYELLOW = "\033[5;93m" # Blinking YELLOW + + # Blue Variants + BLUE = "\033[94m" # Normal BLUE + BBLUE = "\033[1;94m" # Deep BLUE + FBLUE = "\033[2;94m" # Faint Blue + IBLUE = "\033[3;94m" # Indented BLUE + LBLUE = "\033[4;94m" # Underlined BLUE + UBLUE = "\033[5;94m" # Blinking BLUE + + # Magenta Variants + MAGENTA = "\033[95m" # Normal MAGENTA + BMAGENTA = "\033[1;95m" # Deep MAGENTA + FMAGENTA = "\033[2;95m" # Faint MAGENTA + IMAGENTA = "\033[3;95m" # Indented MAGENTA + LMAGENTA = "\033[4;95m" # Underlined MAGENTA + UMAGENTA = "\033[5;95m" # Blinking MAGENTA + + # Cyan Variants + CYAN = "\033[96m" # Normal cyan + DCYAN = "\033[1;96m" # Deep CYAN + FCYAN = "\033[2;96m" # Faint cyan + ICYAN = "\033[3;96m" # Indented CYAN + LCYAN = "\033[4;96m" # Underlined CYAN + UCYAN = "\033[5;96m" # Blinking CYAN + + # White Variants + BWHITE = "\033[1m" # Bold white + BBWHITE = "\033[5;97;1m" # Bold Blinking white + WHITE = "\033[97m" # Normal white + DWHITE = "\033[1;97m" # Deep white + FWHITE = "\033[2;97m" # Faint white + IWHITE = "\033[3;97m" # Indented white + LWHITE = "\033[4;97m" # Underlined white + UWHITE = "\033[5;97m" # Blinking white + + if os.name == "nt": + RESET = Style.RESET_ALL + + # Red Variants + RED = Fore.LIGHTRED_EX + BRED = Fore.RED + FRED = Fore.RED + IRED = Fore.RED + LRED = Fore.LIGHTRED_EX # Underlined RED + URED = Fore.RED # Blinking not directly supported, using RED + + # Green Variants + GREEN = Fore.LIGHTGREEN_EX + BGREEN = Fore.GREEN + FGREEN = Fore.GREEN + IGREEN = Fore.GREEN + LGREEN = Fore.LIGHTGREEN_EX # Underlined GREEN + UGREEN = Fore.GREEN # Blinking not directly supported, using GREEN + + # Yellow Variants + YELLOW = Fore.LIGHTYELLOW_EX + BYELLOW = Fore.YELLOW + FYELLOW = Fore.YELLOW + IYELLOW = Fore.YELLOW + LYELLOW = Fore.LIGHTYELLOW_EX # Underlined YELLOW + UYELLOW = Fore.YELLOW # Blinking not directly supported, using YELLOW + + # Blue Variants + BLUE = Fore.LIGHTBLUE_EX + BBLUE = Fore.BLUE + FBLUE = Fore.BLUE + IBLUE = Fore.BLUE + LBLUE = Fore.LIGHTBLUE_EX # Underlined BLUE + UBLUE = Fore.BLUE # Blinking not directly supported, using BLUE + + # Magenta Variants + MAGENTA = Fore.LIGHTMAGENTA_EX + BMAGENTA = Fore.MAGENTA + FMAGENTA = Fore.MAGENTA + IMAGENTA = Fore.LIGHTMAGENTA_EX + LMAGENTA = Fore.LIGHTMAGENTA_EX # Underlined MAGENTA + UMAGENTA = Fore.MAGENTA # Blinking not directly supported, using MAGENTA + + # Cyan Variants + CYAN = Fore.LIGHTCYAN_EX + DCYAN = Fore.CYAN + ICYAN = Fore.WHITE # Indented CYAN + FCYAN = Fore.CYAN + LCYAN = Fore.LIGHTCYAN_EX # Underlined CYAN + UCYAN = Fore.CYAN # Blinking not directly supported, using CYAN + + # White Variants + BWHITE = Fore.WHITE + BBWHITE = Fore.WHITE # Blinking not directly supported, using WHITE + WHITE = Fore.WHITE + DWHITE = Fore.WHITE # Deep white (not distinct in colorama) + FWHITE = Fore.WHITE # Faint white (not distinct in colorama) + IWHITE = Fore.WHITE # Indented white (not distinct in colorama) + LWHITE = Fore.WHITE # Underlined white (not distinct in colorama) + UWHITE = Fore.WHITE # Blinking not directly supported, using WHITE + + +class BackgroundColor: + if os.name == "posix": + RESET = "\033[0m" # Reset to default text color + + # Red Variants + RED = "\033[91m" # Normal RED + BRED = "\033[1;41m" # Deep RED + FRED = "\033[2;41m" # Faint red + IRED = "\033[3;41m" # Indented RED + LRED = "\033[4;41m" # Underlined RED + URED = "\033[5;41m" # Blinking RED + + # Green Variants + GREEN = "\033[42m" # Normal green + BGREEN = "\033[1;42m" # Deep green + FGREEN = "\033[2;42m" # Faint green + IGREEN = "\033[3;42m" # Indented GREEN + LGREEN = "\033[4;42m" # Underlined GREEN + UGREEN = "\033[5;42m" # Blinking GREEN + + # Yellow Variants + YELLOW = "\033[43m" # Normal yellow + BYELLOW = "\033[1;43m" # Deep YELLOW + FYELLOW = "\033[2;43m" # Faint YELLOW + IYELLOW = "\033[3;43m" # Indented YELLOW + LYELLOW = "\033[4;43m" # Underlined YELLOW + UYELLOW = "\033[5;43m" # Blinking YELLOW + + # Blue Variants + BLUE = "\033[44m" # Normal BLUE + BBLUE = "\033[1;44m" # Deep BLUE + FBLUE = "\033[2;44m" # Faint Blue + IBLUE = "\033[3;44m" # Indented BLUE + LBLUE = "\033[4;44m" # Underlined BLUE + UBLUE = "\033[5;44m" # Blinking BLUE + + # Magenta Variants + MAGENTA = "\033[45m" # Normal MAGENTA + BMAGENTA = "\033[1;45m" # Deep MAGENTA + FMAGENTA = "\033[2;45m" # Faint MAGENTA + IMAGENTA = "\033[3;45m" # Indented MAGENTA + LMAGENTA = "\033[4;45m" # Underlined MAGENTA + UMAGENTA = "\033[5;45m" # Blinking MAGENTA + + # Cyan Variants + CYAN = "\033[46m" # Normal cyan + DCYAN = "\033[1;46m" # Deep CYAN + FCYAN = "\033[2;46m" # Faint cyan + ICYAN = "\033[3;46m" # Indented CYAN + LCYAN = "\033[4;46m" # Underlined CYAN + UCYAN = "\033[5;46m" # Blinking CYAN + + # White Variants + BWHITE = "\033[1m" # Bold white + BBWHITE = "\033[5;47;1m" # Bold Blinking white + WHITE = "\033[47m" # Normal white + DWHITE = "\033[1;47m" # Deep white + FWHITE = "\033[2;47m" # Faint white + IWHITE = "\033[3;47m" # Indented white + LWHITE = "\033[4;47m" # Underlined white + UWHITE = "\033[5;47m" # Blinking white + + BLACK = "\033[40m" # Black Background + + if os.name == "nt": + RESET = Style.RESET_ALL + + # Red Variants + RED = Fore.LIGHTRED_EX + BRED = Fore.RED + FRED = Fore.RED + IRED = Fore.RED + LRED = Fore.LIGHTRED_EX # Underlined RED + URED = Fore.RED # Blinking not directly supported, using RED + + # Green Variants + GREEN = Fore.LIGHTGREEN_EX + BGREEN = Fore.GREEN + FGREEN = Fore.GREEN + IGREEN = Fore.GREEN + LGREEN = Fore.LIGHTGREEN_EX # Underlined GREEN + UGREEN = Fore.GREEN # Blinking not directly supported, using GREEN + + # Yellow Variants + YELLOW = Fore.LIGHTYELLOW_EX + BYELLOW = Fore.YELLOW + FYELLOW = Fore.YELLOW + IYELLOW = Fore.YELLOW + LYELLOW = Fore.LIGHTYELLOW_EX # Underlined YELLOW + UYELLOW = Fore.YELLOW # Blinking not directly supported, using YELLOW + + # Blue Variants + BLUE = Fore.LIGHTBLUE_EX + BBLUE = Fore.BLUE + FBLUE = Fore.BLUE + IBLUE = Fore.BLUE + LBLUE = Fore.LIGHTBLUE_EX # Underlined BLUE + UBLUE = Fore.BLUE # Blinking not directly supported, using BLUE + + # Magenta Variants + MAGENTA = Fore.LIGHTMAGENTA_EX + BMAGENTA = Fore.MAGENTA + FMAGENTA = Fore.MAGENTA + IMAGENTA = Fore.LIGHTMAGENTA_EX + LMAGENTA = Fore.LIGHTMAGENTA_EX # Underlined MAGENTA + UMAGENTA = Fore.MAGENTA # Blinking not directly supported, using MAGE + + # Cyan Variants + CYAN = Fore.LIGHTCYAN_EX + DCYAN = Fore.CYAN + ICYAN = Fore.WHITE # Indented CYAN + FCYAN = Fore.CYAN + LCYAN = Fore.LIGHTCYAN_EX # Underlined CYAN + UCYAN = Fore.CYAN # Blinking not directly supported, using CYAN + + # White Variants + BWHITE = Fore.WHITE + BBWHITE = Fore.WHITE # Blinking not directly supported, using WHITE + WHITE = Fore.WHITE + DWHITE = Fore.WHITE # Deep white (not distinct in colorama) + FWHITE = Fore.WHITE # Faint white (not distinct in colorama) + IWHITE = Fore.WHITE # Indented white (not distinct in colorama) + LWHITE = Fore.WHITE # Underlined white (not distinct in colorama) + UWHITE = Fore.WHITE # Blinking not directly supported, using WHITE + + +fg = ForegroundColor() +bg = BackgroundColor() +rs = fg.RESET + + +class OutputFormater: + """ANSI styles for output display""" + + INFO = f"{fg.BLUE}[i]{rs}" + WARN = f"{fg.YELLOW}[!]{rs}" + ERR = f"{fg.RED}[x]{rs}" + EXP = f"{fg.MAGENTA}[⁉️]{rs}" # For exceptios + OK = f"{fg.GREEN}[✓]{rs}" + RESET = rs diff --git a/filewarp/utils/config.py b/filewarp/utils/config.py new file mode 100644 index 0000000..b923bbb --- /dev/null +++ b/filewarp/utils/config.py @@ -0,0 +1,11 @@ +from pathlib import Path +import os + +BASE_DIR = Path(__file__).resolve().home() + +OUTPUT_DIR = BASE_DIR / "Documents" + +CACHE_DIR = BASE_DIR / "tmp/filewarp" + +# Ensure cache dir exists +os.makedirs(CACHE_DIR, exist_ok=True) diff --git a/filewarp/utils/decorators.py b/filewarp/utils/decorators.py new file mode 100644 index 0000000..65c4652 --- /dev/null +++ b/filewarp/utils/decorators.py @@ -0,0 +1,25 @@ +class Decorators: + def __init__(self): + pass + + @staticmethod + def for_loop_decorator(data_list): + """ + A decorator that calls the decorated function with each element + from the provided list or tuple. + + Args: + data_list: A list or tuple of data to iterate over. + """ + + def decorator(func): + def wrapper(self, *args, **kwargs): + for item in data_list: + func(self, item, *args, **kwargs) + + return wrapper + + return decorator + + +for_loop = Decorators.for_loop_decorator diff --git a/filewarp/utils/file_utils.py b/filewarp/utils/file_utils.py new file mode 100644 index 0000000..a0e741b --- /dev/null +++ b/filewarp/utils/file_utils.py @@ -0,0 +1,352 @@ +""" +File utility functions for filewarp. +""" + +import fnmatch +import os +import shutil +import tempfile +import uuid +from pathlib import Path +from typing import Iterator, List, Optional, Union + +from tqdm.auto import tqdm + +# from .colors import fg, rs +from ..core.exceptions import FileSystemError +from .colors import OutputFormater as OF +from .config import OUTPUT_DIR +from .formats import SUPPORTED_IMAGE_FORMATS +from .simple import logger + + +def dirbuster(_dir_): + try: + target = [] + for root, dirs, files in os.walk(_dir_): + for file in files: + ext = file.split(".")[-1] + + _path_ = os.path.join(root, file) + if os.path.exists(_path_) and ext.lower() in ("pdf", "doc", "docx"): + target.append(_path_) + return target + except FileNotFoundError as e: + print(e) + + except KeyboardInterrupt: + print("\nQuit!") + return + + +def generate_filename( + basedir: Path, suffix: str, postfix: Optional[str] = "filewarp" +) -> Path: + """ + Generate Filename given its extension + Args: + suffix-> str file extension + basedir-> Path + postfix = str string preceding name + prefix - string before name + Returns: + path + """ + + filename = basedir / f"{uuid.uuid4().hex}-{postfix}.{suffix}" + + return filename + + +class FileSystemHandler: + """ + Encapsulates file handling utilities required by cleaner + """ + + def __init__(self, ignore: list | tuple = None): + self.ignore = ignore + + def find_files(self, paths, patterns, recursive=True) -> list: + try: + candidates = [] + for path in paths: + path_obj = Path(path).expanduser().resolve() + if not path_obj.exists(): + continue + if recursive: + for file in tqdm( + path_obj.rglob("*"), desc="Searching", leave=False + ): + if file.is_file() and any( + fnmatch.fnmatch(file.name, pat) for pat in patterns + ): + candidates.append(file) + else: + for file in tqdm(path_obj.glob("*"), desc="Searching", leave=False): + if file.is_file() and any( + fnmatch.fnmatch(file.name, pat) for pat in patterns + ): + candidates.append(file) + return self.ignore_pattern(candidates) + except Exception as e: + raise FileSystemError(e) + + def find_directories(self, paths, patterns, recursive=True, empty=True) -> list: + try: + candidates = [] + for path in paths: + path_obj = Path(path).expanduser().resolve() + if not path_obj.exists(): + continue + if recursive: + for root, dirs, files in tqdm( + os.walk(path_obj, followlinks=True), + desc="Searching", + leave=False, + ): + for dir in dirs: + if len(os.listdir(os.path.join(root, dir))) == 0: + candidates.append(Path(root) / dir) + + else: + for item in tqdm( + os.listdir(path_obj), desc="Searching", leave=False + ): + if os.path.isdir(item) and len(os.listdir(item)) == 0: + candidates.append(path_obj / item) + + return self.ignore_pattern(candidates) + except Exception as e: + raise FileSystemError(e) + + def ignore_pattern(self, items: list | tuple, ignore: list | tuple = None) -> list: + ignore = self.ignore if not ignore else ignore + candidates = [] + for item in items: + for ig in ignore: + _ig = ig.lower() + if _ig in item.as_uri().lower().split( + "/" + ) + item.as_uri().lower().split("\\"): + continue + + candidates.append(item) + + return candidates + + @staticmethod + def _find_files(pattern: str, recursive: bool = True) -> Iterator[Path]: + """Find files matching pattern.""" + path = Path(pattern) + + if path.exists() and path.is_file(): + yield path + return + + # Handle glob patterns + if recursive: + yield from Path(".").rglob(pattern) + else: + yield from Path(".").glob(pattern) + + @staticmethod + def delete_files(files) -> bool: + try: + for f in files: + if f.exists(): + f.unlink() + print(f"{OF.OK} Deleted: {f}") + return True + except (PermissionError, OSError) as e: + raise FileSystemError(e) + except Exception as e: + print(f"{OF.ERR} Failed to delete {f}: {e}") + return False + + @staticmethod + def delete_folders(files) -> bool: + try: + for f in files: + if f.exists(): + f.rmdir() + print(f"{OF.OK} Deleted: {f}") + return True + except (PermissionError, OSError) as e: + raise FileSystemError(e) + except Exception as e: + print(f"{OF.ERR} Failed to delete {f}: {e}") + return False + + @staticmethod + def ensure_directory(path: Path) -> Path: + """Ensure directory exists, create if necessary.""" + try: + path.mkdir(parents=True, exist_ok=True) + return path + except OSError as e: + raise FileSystemError(f"Failed to create directory {path}: {str(e)}") + + @staticmethod + def safe_filename(name: str, max_length: int = 255) -> str: + """Convert string to safe filename.""" + # Replace unsafe characters + safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in name) + + # Remove extra spaces and underscores + safe_name = "_".join(filter(None, safe_name.split())) + + # Trim to max length + if len(safe_name) > max_length: + name_hash = str(hash(safe_name))[-8:] + safe_name = safe_name[: max_length - 9] + "_" + name_hash + + return safe_name + + +class TemporaryFileManager: + """Manages temporary files with proper cleanup.""" + + def __init__(self, prefix: str = "kcleaner_"): + self.temp_files = [] + self.temp_dirs = [] + self.prefix = prefix + + def create_temp_file(self, suffix: str, content: str = "") -> Path: + """Create a temporary file with the given suffix and content.""" + try: + with tempfile.NamedTemporaryFile( + mode="w", + suffix=suffix, + prefix=self.prefix, + encoding="utf-8", + delete=False, + ) as f: + if content: + f.write(content) + temp_path = Path(f.name) + + self.temp_files.append(temp_path) + return temp_path + + except (OSError, IOError) as e: + raise FileSystemError(f"Failed to create temporary file: {str(e)}") + + def create_temp_dir(self) -> Path: + """Create a temporary directory.""" + try: + temp_dir = Path(tempfile.mkdtemp(prefix=self.prefix)) + self.temp_dirs.append(temp_dir) + return temp_dir + except OSError as e: + raise FileSystemError(f"Failed to create temporary directory: {str(e)}") + + def cleanup(self): + """Clean up all temporary files and directories.""" + for temp_file in self.temp_files: + try: + if temp_file.exists(): + temp_file.unlink() + except OSError as e: + logger.warning(f"Failed to delete temporary file {temp_file}: {e}") + + for temp_dir in self.temp_dirs: + try: + if temp_dir.exists(): + shutil.rmtree(temp_dir) + except OSError as e: + logger.warning(f"Failed to delete temporary directory {temp_dir}: {e}") + + self.temp_files.clear() + self.temp_dirs.clear() + + +class DirectoryScanner: + def __init__(self, input_obj: Optional[Union[str, list[str], os.PathLike]]): + self.input_obj = input_obj + + def get_dir_files(self): + """ + Get file path list given dir/folder + + ------- + Args: + path: path to the directory/folder + Returns: + ------- + list + """ + files = [ + os.path.join(self.input_obj, f) + for f in os.listdir(self.input_obj) + if os.path.isfile(os.path.join(self.input_obj, f)) + and self._is_supported_image(f) + ] + if not files: # Check for empty directory *after* filtering + raise FileNotFoundError( + f"No supported image files found in: {self.input_obj}" + ) + return files + + def _is_supported_image(self, filename: str) -> bool: + """Checks if a file has a supported image extension.""" + return filename.lower().endswith(tuple(SUPPORTED_IMAGE_FORMATS.values())) + + def _get_image_files(self, files: list = None) -> List[str]: + """ + Identifies image files to process, handling both single files and directories. + + Returns: + A list of paths to image files. Raises FileNotFoundError if no + valid image files are found. + """ + files = self.input_obj if not files else files + + if isinstance(files, (str, os.PathLike)): + if os.path.isfile(files): + return [files] + else: + return self.get_dir_files(files) + + files_to_process = [] + for obj in files: + if os.path.isfile(obj): + if self._is_supported_image(obj): + files_to_process.append(obj) + else: + logger.warning(f"Skipping unsupported file: {obj}") + + elif os.path.isdir(obj): + files = self.get_dir_files(obj) + if not files: # Check for empty directory *after* filtering + raise FileNotFoundError(f"No supported image files found in: {obj}") + files_to_process.extend(files) + else: + raise FileNotFoundError( + f"Input is not a valid file or directory: {obj}" + ) + return files_to_process + + def run(self): + supported_files = self._get_image_files(self.input_obj) + return supported_files + + +def modify_filename_if_exists(filename): + """ + Modifies the filename by adding "_filewarp" before the extension if the original filename exists. + + Args: + filename (str): The filename to modify. + + Returns: + str: The modified filename, or the original filename if it doesn't exist or has no extension. + """ + if os.path.exists(filename): + parts = filename.rsplit(".", 1) # Split from the right, at most once + if len(parts) == 2: + base, ext = parts + return f"{base}_filewarp.{ext}" + else: + return f"{filename}_filewarp" # handle files with no extension. + else: + return filename diff --git a/filewarp/utils/formats.py b/filewarp/utils/formats.py new file mode 100644 index 0000000..c8ea2e9 --- /dev/null +++ b/filewarp/utils/formats.py @@ -0,0 +1,368 @@ +# multimedia_cli/formats.py +from rich.table import Table +from rich.panel import Panel +from rich.text import Text +from rich import box +from rich.columns import Columns + +try: + from cli._entry_ import console +except ImportError: + from rich.console import Console + + console = Console() + +# Color mappings for consistent styling +STYLES = { + "input": "bold cyan", + "output": "bold green", + "arrow": "yellow", + "pending": "dim italic red", + "header": "bold white on blue", + "format": "magenta", +} + + +def create_doc_formats_table(): + """Create an elegant table for document formats""" + table = Table( + title="[bold]Document Format Conversions[/]", + title_style="bold cyan", + box=box.ROUNDED, + border_style="blue", + header_style="bold white on blue", + show_lines=True, + padding=(0, 2), + ) + + table.add_column("Input Format", style="bold cyan", justify="center") + table.add_column("→", style="yellow", justify="center", width=3) + table.add_column("Output Formats", style="green", justify="left") + + conversions = [ + ("xlsx", "→", "csv, txt, doc/docx, db(sql)"), + ("doc/docx", "→", "txt, pdf, ppt/pptx, audio(ogg)"), + ("txt", "→", "pdf, docx/doc, audio(ogg)"), + ("pdf", "→", "doc/docx, txt, audio(ogg)"), + ("pptx/ppt", "→", "doc/docx"), + ] + + for in_fmt, arrow, out_fmt in conversions: + table.add_row(in_fmt, arrow, out_fmt) + + return table + + +def create_audio_formats_table(): + """Create an elegant table for audio formats""" + table = Table( + title="[bold]Supported Audio Formats[/]", + title_style="bold cyan", + box=box.ROUNDED, + border_style="magenta", + header_style="bold white on magenta", + show_header=False, + padding=(0, 3), + ) + + table.add_column("Format", style="bold magenta", justify="center") + table.add_column("Status", style="white", justify="center") + + audio_formats = [ + ("WAV", "✅ Supported"), + ("MP3", "✅ Supported"), + ("OGG", "✅ Supported"), + ("FLV", "✅ Supported"), + ("OGV", "✅ Supported"), + ("MOV", "✅ Supported"), + ("WEBM", "✅ Supported"), + ("AAC", "⏳ Pending Implementation"), + ("BPF", "⏳ Pending Implementation"), + ("M4A", "✅ Supported"), + ("RAW", "✅ Supported"), + ("AIFF", "✅ Supported"), + ("FLAC", "✅ Supported"), + ] + + for fmt, status in audio_formats: + table.add_row(fmt, status) + + return table + + +def create_video_formats_table(): + """Create an elegant table for video formats""" + table = Table( + title="[bold]Supported Video Formats[/]", + title_style="bold cyan", + box=box.ROUNDED, + border_style="green", + header_style="bold white on green", + show_lines=True, + ) + + table.add_column("Format", style="bold green", justify="center") + table.add_column("Codec", style="cyan", justify="center") + table.add_column("Status", style="white", justify="center") + + video_formats = [ + ("MP4", "mpeg4", "✅ Supported"), + ("AVI", "rawvideo", "✅ Supported"), + ("OGV", "avc", "⏳ Pending Implementation"), + ("WEBM", "libvpx", "✅ Supported"), + ("MOV", "mpeg4", "✅ Supported"), + ("MKV", "mpeg4", "✅ Supported"), + ("FLV", "flv", "✅ Supported"), + ("WMV", "WMV", "⏳ Pending Implementation"), + ] + + for fmt, codec, status in video_formats: + table.add_row(fmt, codec, status) + + return table + + +def create_image_formats_table(): + """Create an elegant table for image formats""" + table = Table( + title="[bold]Supported Image Formats[/]", + title_style="bold cyan", + box=box.ROUNDED, + border_style="yellow", + header_style="bold white on yellow", + show_lines=True, + ) + + table.add_column("Format", style="bold yellow", justify="center") + table.add_column("Extension", style="cyan", justify="center") + table.add_column("Status", style="white", justify="center") + + image_formats = [ + ("JPEG", ".jpeg", "✅ Supported"), + ("JPG", ".jpg", "✅ Supported"), + ("PNG", ".png", "✅ Supported"), + ("GIF", ".gif", "✅ Supported"), + ("BMP", ".bmp", "✅ Supported"), + ("DIB", ".dib", "✅ Supported"), + ("TIFF", ".tiff", "✅ Supported"), + ("PIC", ".pic", "✅ Supported"), + ("PDF", ".pdf", "✅ Supported"), + ("WEBP", ".webp", "✅ Supported"), + ("ICNS", ".icns", "✅ Supported"), + ("EPS", ".eps", "✅ Supported"), + ("PSD", ".psd", "⏳ Pending Implementation"), + ("SVG", ".svg", "⏳ Pending Implementation"), + ("EXR", ".exr", "⏳ Pending Implementation"), + ("DXF", ".dxf", "⏳ Pending Implementation"), + ("PICT", ".pct", "⏳ Pending Implementation"), + ("PS", ".ps", "⏳ Pending Implementation"), + ("POSTSCRIPT", ".ps", "⏳ Pending Implementation"), + ] + + for fmt, ext, status in image_formats: + table.add_row(fmt, ext, status) + + return table + + +def create_quick_reference(): + """Create a quick reference panel with all formats""" + doc_table = create_doc_formats_table() + audio_table = create_audio_formats_table() + video_table = create_video_formats_table() + image_table = create_image_formats_table() + + # Create panels for each category + doc_panel = Panel( + doc_table, title="📄 Documents", border_style="blue", padding=(1, 2) + ) + + audio_panel = Panel( + audio_table, title="🎵 Audio", border_style="magenta", padding=(1, 2) + ) + + video_panel = Panel( + video_table, title="🎬 Video", border_style="green", padding=(1, 2) + ) + + image_panel = Panel( + image_table, title="🖼️ Images", border_style="yellow", padding=(1, 2) + ) + + # Arrange in columns for compact display + top_row = Columns([doc_panel, audio_panel], equal=True, expand=True) + bottom_row = Columns([video_panel, image_panel], equal=True, expand=True) + + return Panel( + Columns([top_row, bottom_row], equal=False), + title="[bold cyan]File Format Support Matrix[/]", + border_style="bright_white", + padding=(1, 2), + ) + + +def create_formats_help(): + """Create a comprehensive help display for formats""" + help_text = Text() + help_text.append("\n📋 ", style="bold blue") + help_text.append("Format Conversion Guide\n\n", style="bold white") + + help_text.append(" ✅ ", style="green") + help_text.append("Fully implemented and tested\n", style="white") + + help_text.append(" ⏳ ", style="yellow") + help_text.append("Pending implementation (coming soon)\n\n", style="dim") + + help_text.append(" 🔄 ", style="cyan") + help_text.append("Batch conversions supported\n", style="white") + + help_text.append(" 🎯 ", style="magenta") + help_text.append("Preserves metadata where applicable\n\n", style="white") + + help_text.append(" 💡 ", style="bright_yellow") + help_text.append("Tip: Use ", style="white") + help_text.append("--help ", style="bold cyan") + help_text.append("with any command for specific format options", style="white") + + return Panel( + help_text, title="[bold]Format Help[/]", border_style="cyan", padding=(1, 2) + ) + + +# Export the table creation functions +__all__ = [ + "create_doc_formats_table", + "create_audio_formats_table", + "create_video_formats_table", + "create_image_formats_table", + "create_quick_reference", + "create_formats_help", +] + +# For backward compatibility, also provide the original constants +# But now as formatted strings for legacy code +SUPPORTED_DOC_FORMATS_SHOW = """ +Document Format Conversions: + xlsx → csv, txt, doc/docx, db(sql) + doc/docx → txt, pdf, ppt/pptx, audio(ogg) + txt → pdf, docx/doc, audio(ogg) + pdf → doc/docx, txt, audio(ogg) + pptx/ppt → doc/docx +""" + +SUPPORTED_AUDIO_FORMATS = [ + "wav", + "mp3", + "ogg", + "flv", + "ogv", + "webm", + "aiff", + "flac", + "m4a", + "raw", + "bpf", + "aac", +] + +SUPPORTED_AUDIO_FORMATS_DIRECT = [ + "mp3", + "wav", + "raw", + "ogg", + "aiff", + "flac", + "flv", + "webm", + "ogv", +] + +SUPPORTED_AUDIO_FORMATS_SHOW = """ +Supported Audio Formats: + • WAV • MP3 • OGG • FLV • OGV + • MOV • WEBM • AAC* • BPF* • M4A + • RAW • AIFF • FLAC + + * Pending Implementation +""" + +SUPPORTED_VIDEO_FORMATS = ["MP4", "AVI", "OGV", "WEBM", "MOV", "MKV", "FLV", "WMV"] + +Video_codecs = { + "MP4": "mpeg4", + "AVI": "rawvideo", + "WEBM": "libvpx", + "MOV": "mpeg4", + "MKV": "mpeg4", + "FLV": "flv", +} + +SUPPORTED_VIDEO_FORMATS_SHOW = """ +Supported Video Formats: + • MP4 (mpeg4) • AVI (rawvideo) + • OGV* (avc) • WEBM (libvpx) + • MOV (mpeg4) • MKV (mpeg4) + • FLV (flv) • WMV* (WMV) + + * Pending Implementation +""" + +SUPPORTED_IMAGE_FORMATS = { + "JPEG": ".jpeg", + "JPG": ".jpg", + "PNG": ".png", + "GIF": ".gif", + "BMP": ".bmp", + "DIB": ".dib", + "TIFF": ".tiff", + "PIC": ".pic", + "PDF": ".pdf", + "WEBP": ".webp", + "EPS": ".eps", + "ICNS": ".icns", + "PSD": ".psd", + "SVG": ".svg", + "EXR": ".exr", + "DXF": ".dxf", + "PICT": ".pct", + "PS": ".ps", + "POSTSCRIPT": ".ps", +} + +SUPPORTED_IMAGE_FORMATS_SHOW = """ +Supported Image Formats: + • JPEG (.jpeg) • JPG (.jpg) • PNG (.png) + • GIF (.gif) • BMP (.bmp) • DIB (.dib) + • TIFF (.tiff) • PIC (.pic) • PDF (.pdf) + • WEBP (.webp) • ICNS (.icns) • EPS (.eps) + • PSD* (.psd) • SVG* (.svg) • EXR* (.exr) + • DXF* (.dxf) • PICT*(.pct) • PS* (.ps) + + * Pending Implementation +""" + +SUPPORTED_DOCUMENT_FORMATS = [ + "pdf", + "doc", + "docx", + "csv", + "xlsx", + "xls", + "ppt", + "pptx", + "txt", + "ogg", + "mp3", + "audio", +] + + +# Function to display all formats in a beautiful layout +def display_all_formats(): + """Display all format tables in a beautiful layout""" + + console.print("\n") + console.print(create_quick_reference()) + console.print("\n") + console.print(create_formats_help()) + console.print("\n") diff --git a/filewarp/utils/formats_old.py b/filewarp/utils/formats_old.py new file mode 100644 index 0000000..9ab7aac --- /dev/null +++ b/filewarp/utils/formats_old.py @@ -0,0 +1,168 @@ +# multimedia_cli/formats +from .colors import fg, bg, rs + + +RESET = rs + +SUPPORTED_DOC_FORMATS = f""" +|--------------------------------------------------------------------------- +|{bg.BBLUE}Input format{RESET} |{bg.BBLUE}Output format{RESET} | +|________________________________|__________________________________________| +| xlsx {fg.BYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) | +| | | +| doc/docx{fg.BYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) | +| | | +| txt {fg.BYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) | +| | | +| pdf {fg.BYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) | +| | | +| pptx/ppt{fg.BYELLOW}-------------------->{RESET}|doc/docx | +| | +|___________________________________________________________________________| +""" + + +# Add supported input and output formats for each media type +SUPPORTED_AUDIO_FORMATS = [ + "wav", # Waveform Audio File Format + "mp3", # MPEG Audio Layer III + "ogg", + "flv", + "ogv", + "webm", + "aiff", + "flac", + "m4a", + "raw", + "bpf", + "aac", +] # Advanced Audio Codec] (Free Lossless Audio Codec) + +SUPPORTED_AUDIO_FORMATS_DIRECT = [ + "mp3", + "wav", + "raw", + "ogg", + "aiff", + "flac", + "flv", # Flash Video + "webm", + "ogv", +] # Video +SUPPORTED_AUDIO_FORMATS_SHOW = f""" +|==============================| +| {bg.BBLUE}Supported I/O formats {RESET} | +|==============================| +| {fg.CYAN} wav {fg.BYELLOW} | +| {fg.CYAN} mp3 {fg.BYELLOW} | +| {fg.CYAN} ogg {fg.BYELLOW} | +| {fg.CYAN} flv {fg.BYELLOW} | +| {fg.CYAN} ogv {fg.BYELLOW} | +| {fg.CYAN} mov {fg.BYELLOW} | +| {fg.CYAN} webm {fg.BYELLOW} | +| {fg.CYAN} aac {fg.BYELLOW}-------------->|{bg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW} +| {fg.CYAN} bpf {fg.BYELLOW}-------------->|{bg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW} +| {fg.CYAN} m4a {fg.BYELLOW} | +| {fg.CYAN} raw {fg.BYELLOW} | +| {fg.CYAN} aiff {fg.BYELLOW} | +-------------------------------- + +""" + +SUPPORTED_VIDEO_FORMATS = [ + "MP4", # MPEG-4 part 14 Bitrate - 860kb/s + "AVI", # Audio Video Interleave + "OGV", + "WEBM", + "MOV", # QuickTime video Bitrate - 1.01mb/s + "MKV", # Matroska video - MKV is known for its support of high-quality content. Bitrate-1.01mb/s + "FLV", # Flash video Bitrate + "WMV", +] + + +Video_codecs = { + "MP4": "mpeg4", + "AVI": "rawvideo", + # "OGV": "avc", + "WEBM": "libvpx", + "MOV": "mpeg4", # QuickTime video + "MKV": "mpeg4", # Matroska video + "FLV": "flv", + # "WMV": "WMV" +} +SUPPORTED_VIDEO_FORMATS_SHOW = f""" +,_______________________________________, +|x| {bg.BBLUE}Supported I/O formats{RESET} |x| +|x|-----------------------------------{fg.BYELLOW}|x| +|x| {fg.BMAGENTA} MP4 {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} AVI {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} OGV {fg.BYELLOW}-------------->|x|{fg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW} +|x| {fg.BMAGENTA} WEBM{fg.BYELLOW} |x| +|x| {fg.BMAGENTA} MOV {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} MKV {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} FLV {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} WMV {fg.BYELLOW}-------------->|x|{fg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW} +|,|___________________.BMAGENTA________________|,|{fg.BYELLOW} +""" + +SUPPORTED_IMAGE_FORMATS = { + "JPEG": ".jpeg", # Joint Photographic Experts Group -Lossy compression + "JPG": ".jpg", # Joint Photographic Experts Group - not lossy + "PNG": ".png", + "GIF": ".gif", # Graphics Interchange Format + "BMP": ".bmp", # Windows BMP image + "DIB": ".dib", # Windows BMP image + "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression + "PIC": ".pic", + "PDF": ".pdf", + "WEBP": ".webp", + "EPS": ".eps", + "ICNS": ".icns", # MacOS X icon + # Waiting Implementation 👇 + "PSD": ".psd", + "SVG": ".svg", # Scalable vector Graphics + "EXR": ".exr", + "DXF": ".dxf", # Autocad format 2D + "PICT": ".pct", + "PS": ".ps", # PostSciript + "POSTSCRIPT": ".ps", +} + +SUPPORTED_IMAGE_FORMATS_SHOW = f""" +__________________________________________ +|x|{bg.BBLUE}Supported I/O formats{RESET} |x| +|x|_____________________________________{fg.BYELLOW}|x| +|x| {fg.BMAGENTA} JPEG {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} PNG {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} GIF {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} BMP {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} DIB {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} TIFF {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} PIC {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} EXR {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW} +|x| {fg.BMAGENTA} PDF {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} WebP {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} ICNS {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} PSD {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW} +|x| {fg.BMAGENTA} SVG {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW} +|x| {fg.BMAGENTA} EPS {fg.BYELLOW} |x| +|x| {fg.BMAGENTA} Postscript {fg.FMAGENTA}---------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW} +|x| {fg.BMAGENTA} PICT {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW} +|_|_____________________________________|x| +""" + +SUPPORTED_DOCUMENT_FORMATS = [ + "pdf", + "doc", + "docx", + "csv", + "xlsx", + "xls", + "ppt", + "pptx", + "txt", + "ogg", + "mp3", + "audio", +] diff --git a/filewarp/utils/helpmaster.py b/filewarp/utils/helpmaster.py new file mode 100644 index 0000000..6971426 --- /dev/null +++ b/filewarp/utils/helpmaster.py @@ -0,0 +1,25 @@ +from .utils.colors import fg, rs + + +RESET = rs + + +def pdf_combine_help(): + options = f""" + _________________________ + {fg.BWHITE}|Linear: {fg.YELLOW}AA/BB/AAB/BBA{RESET} | + {fg.BWHITE}|Shifted: {fg.YELLOW}AB/BA/ABA/BAB{RESET} | + _________________________""" + + helper = f"""\n\t--------------------------------------------------------------------------------------------- + {fg.BWHITE}|Currently There are 2 supported methods: {fg.FCYAN}Linear and Alternating/shifting.{RESET}\t\t | + |-------------------------------------------------------------------------------------------| + {fg.BWHITE}|->Linear pages are ordered in form of: {fg.CYAN}File1Page1,...Fil1Pagen{RESET} then {fg.CYAN}File2Page1,...Fil2Pagen{RESET}|\n\t{fg.BWHITE}|File2 is joined at the end of the file1.\t\t\t\t\t\t | + |-------------------------------------------------------------------------------------------| + {fg.BWHITE}|->Shifting method Picks: {fg.CYAN}File1Page1, File2Page1...File1pagen,File2Pagen{RESET}\t\t | + |--------------------------------------------------------------------------------------------""" + + ex = f"""\t_____________________________________________________ + \t|->{fg.BBLUE}filewarp --pdfjoin file1.pdf file2.pdf --order AAB{RESET}| + \t-----------------------------------------------------""" + return options, helper, ex diff --git a/filewarp/utils/logging_utils.py b/filewarp/utils/logging_utils.py new file mode 100644 index 0000000..4851bce --- /dev/null +++ b/filewarp/utils/logging_utils.py @@ -0,0 +1,76 @@ +""" +Logging configuration for Filemac. +""" + +import logging +import sys +from typing import Optional + + +def setup_logging( + level: int = logging.INFO, + format_string: Optional[str] = None, + log_file: Optional[str] = None, +) -> logging.Logger: + """ + Setup logging configuration for kcleaner. + + Args: + level: Logging level + format_string: Custom format string + log_file: Optional log file path + + Returns: + Configured logger + """ + if format_string is None: + format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + formatter = logging.Formatter(format_string) + + # Root logger + logger = logging.getLogger("filewarp") + logger.setLevel(level) + + # Clear existing handlers + for handler in logger.handlers[:]: + logger.removeHandler(handler) + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # File handler if specified + if log_file: + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + # Prevent propagation to root logger + logger.propagate = False + + return logger + + +class LoggingContext: + """Context manager for temporary logging configuration.""" + + def __init__(self, level: int = logging.INFO, log_file: Optional[str] = None): + self.level = level + self.log_file = log_file + self.original_level = None + self.file_handler = None + + def __enter__(self): + self.original_level = logging.getLogger("filewarp").level + setup_logging(level=self.level, log_file=self.log_file) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + setup_logging(level=self.original_level) + if self.file_handler: + logging.getLogger("filewarp").removeHandler(self.file_handler) + + +logger = setup_logging() diff --git a/filewarp/utils/screen.py b/filewarp/utils/screen.py new file mode 100644 index 0000000..3c6d4de --- /dev/null +++ b/filewarp/utils/screen.py @@ -0,0 +1,19 @@ +"""Provides screen actions like clearing screen etc.""" + +import os +import ctypes + + +def clear_screen(): + """ + Clear the screen using ctypes in windows and os.system('clear') in unix systems + """ + if os.name == "nt": # Windows system + ctypes.windll.kernel32.SetConsoleCursorPosition( + ctypes.windll.kernel32.GetStdHandle(-11), (0, 0) + ) + ctypes.windll.kernel32.FillConsoleOutputCharacter( + ctypes.windll.kernel32.GetStdHandle(-11), b"\x00", 80 * 10, (0, 0) + ) + else: # Unix/Linux/MacOS systems + os.system("clear") diff --git a/filewarp/utils/security/vul_mitigate.py b/filewarp/utils/security/vul_mitigate.py new file mode 100644 index 0000000..398fb6e --- /dev/null +++ b/filewarp/utils/security/vul_mitigate.py @@ -0,0 +1,177 @@ +import os +import subprocess +import sqlite3 + +# import shlex +import json +import tempfile +import logging +import html +import requests +from dotenv import load_dotenv +from ...core.exceptions import ValidationError + +# from importlib import resources +from ..colors import fg, rs + +RESET = rs + +pkg_resources = [] + + +class SecurePython: + def __init__(self): + """Initialize security mitigations.""" + load_dotenv() # Load environment variables for secret management + logging.basicConfig(level=logging.INFO) + + # ✅ 1. Prevent Command Injection + def secure_subprocess(self, command_list): + """Runs a secure subprocess command using a list format to prevent command injection.""" + if not isinstance(command_list, list): + raise ValidationError("Command must be a list") + try: + result = subprocess.run( + command_list, check=True, capture_output=True, text=True + ) + return result.stdout + except subprocess.CalledProcessError as e: + logging.error(f"Command failed: {e}") + return None + + # ✅ 2. Prevent Path Traversal + def safe_filepath(self, base_dir, user_input_path): + """Prevents path traversal by restricting access to a safe base directory.""" + full_path = os.path.abspath(os.path.join(base_dir, user_input_path)) + + if not full_path.startswith(os.path.abspath(base_dir)): + raise ValueError("Invalid file path: Path traversal attempt detected") + print(f"{fg.BBLUE}Return safe path: {fg.BGREEN}{full_path}{RESET}") + return full_path + + # ✅ 3. Prevent SQL Injection + def safe_sql_query(self, db_path, query, params): + """Executes a parameterized SQL query to prevent SQL injection.""" + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + try: + cursor.execute(query, params) + result = cursor.fetchall() + conn.commit() + return result + except sqlite3.Error as e: + logging.error(f"SQL error: {e}") + return None + finally: + conn.close() + + # ✅ 4. Secure File Handling + def secure_temp_file(self, content): + """Creates a secure temporary file to prevent race conditions.""" + with tempfile.NamedTemporaryFile(delete=True) as temp_file: + temp_file.write(content.encode()) + temp_file.flush() + return temp_file.name # Return temp file path for safe use + + # ✅ 5. Secure Secret Management + def get_secret(self, key): + """Fetches secrets from environment variables.""" + secret = os.getenv(key) + if not secret: + logging.warning(f"Secret {key} is missing!") + return secret + + # ✅ 6. Prevent Insecure Deserialization + def safe_json_load(self, json_string): + """Safely loads JSON instead of using pickle to avoid remote code execution.""" + try: + return json.loads(json_string) + except json.JSONDecodeError as e: + logging.error(f"Invalid JSON: {e}") + return None + + # ✅ 7. Prevent XSS Attacks + def sanitize_html(self, user_input): + """Escapes HTML to prevent XSS attacks.""" + return html.escape(user_input) + + # ✅ 8. Check Dependency Vulnerabilities + def check_dependencies(self): + """Checks installed dependencies for known vulnerabilities.""" + try: + installed_packages = { + pkg.key: pkg.version for pkg in pkg_resources.working_set + } + response = requests.get("https://pyup.io/api/v1/safety/") + if response.status_code == 200: + vulnerable_packages = [] + for package, version in installed_packages.items(): + if package in response.json(): + vulnerable_packages.append(package) + if vulnerable_packages: + logging.warning( + f"Vulnerable dependencies found: {vulnerable_packages}" + ) + else: + logging.info("No known vulnerable dependencies detected.") + else: + logging.warning("Failed to fetch vulnerability database.") + except Exception as e: + logging.error(f"Error checking dependencies: {e}") + + # ✅ 9. Secure Logging + def secure_logging(self, message): + """Logs messages securely without sensitive data exposure.""" + sanitized_message = message.replace("password", "*****").replace( + "API_KEY", "*****" + ) + logging.info(sanitized_message) + + # ✅ 10. Run All Security Mitigations + def entry_run(self): + """Runs all security mitigations where applicable.""" + logging.info("🔒 Running security mitigations...") + + # Example secure execution + self.secure_subprocess(["echo", "Secure Execution"]) + + # Example secure file path usage + try: + safe_path = self.safe_filepath("/safe/directory", "../etc/passwd") + logging.info(f"Safe path resolved: {safe_path}") + except ValueError as e: + logging.error(e) + + # Example secure SQL execution + self.safe_sql_query(":memory:", "CREATE TABLE test (id INTEGER, name TEXT)", ()) + self.safe_sql_query( + ":memory:", "INSERT INTO test (id, name) VALUES (?, ?)", (1, "John Doe") + ) + + # Example secure file handling + temp_file = self.secure_temp_file("Secure data") + logging.info(f"Created secure temp file at {temp_file}") + + # Example secret fetching + self.get_secret("API_KEY") + + # Example safe JSON parsing + self.safe_json_load('{"key": "value"}') + + # Example HTML sanitization + sanitized_html = self.sanitize_html("") + logging.info(f"Sanitized HTML: {sanitized_html}") + + # Example dependency check + self.check_dependencies() + + # Example secure logging + self.secure_logging("User attempted login with password: mypassword") + + logging.info("✅ All security mitigations executed successfully!") + + +# === Run SecurePython Class === +if __name__ == "__main__": + sp = SecurePython() + sp.entry_run() diff --git a/filewarp/utils/simple.py b/filewarp/utils/simple.py new file mode 100644 index 0000000..08ffceb --- /dev/null +++ b/filewarp/utils/simple.py @@ -0,0 +1,8 @@ +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(levelname)s - %(message)s", +) +logger = logging.getLogger("filewarp") diff --git a/index.html b/index.html new file mode 100644 index 0000000..5422189 --- /dev/null +++ b/index.html @@ -0,0 +1,137 @@ + + + + + + FileMAC - Multimedia File Operation Kit + + + + + +
+
+

FileMAC

+

+ A Comprehensive Multimedia File Operation Kit +

+
+
+ + +
+ +
+

Introduction

+

+ FileMAC is a Python-based command-line interface (CLI) utility + designed for efficient file conversion, manipulation, and analysis. It + supports various multimedia operations, including document conversion, + file analysis, and text-to-speech conversion using Google's + Text-to-Speech (gTTS) library. +

+
+ + +
+

Features

+
    +
  • Convert documents between various formats.
  • +
  • Analyze and manipulate multimedia files.
  • +
  • Generate audio files from text using gTTS.
  • +
  • + Command-line interface for easy integration into scripts and + workflows. +
  • +
  • Supports Linux operating systems.
  • +
  • + Encapsulates reputable multimedia elements for robust performance. +
  • +
+
+ + +
+

Installation

+

Install FileMAC using pip:

+
pip install filewarp
+

+ Alternatively, install directly from the GitHub repository: +

+
pip install git+https://github.com/skye-cyber/FileMAC.git
+
+ + +
+

Usage

+

+ After installation, you can use FileMAC through the command line. For + help and available commands, run: +

+
filewarp -h
+

or

+
Filemac -h
+

or

+
FILEMAC -h
+

+ To run the CLI app for specific operations, use the following command + structure: +

+
FileMAC [options] stdin format
+

+ Replace [options] with the desired + operation flags, stdin with the + input file, and format with the + target format or operation. +

+
+ + +
+

License

+

+ FileMAC is licensed under the GPL-3.0 License. For more details, refer + to the LICENSE file in the repository. +

+
+ + +
+

Repository

+

+ For more information, visit the GitHub repository: +

+ https://github.com/skye-cyber/FileMAC +
+
+ + +
+
+

© 2025 FileMAC. All rights reserved.

+
+
+ + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6f1fff4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,40 @@ +click +rich +argparse +pdfminer.six +python-docx +python-pptx +# gTTS +pypandoc +pydub +Pillow +pandas +opencv-python +pytesseract +PyPDF2 +pdf2docx +pymupdf +requests +moviepy +reportlab +numpy +pdf2image +poppler +openpyxl +rich +tqdm +ffmpeg-python +librosa +python-magic +matplotlib +numpy +soundfile +SpeechRecognition +colorama +scipy +PyMuPDF +pyautogui +imageio +pynput +pyaudio +cairosvg diff --git a/res/audio_effects/chipmunk_demo_v.mp4 b/res/audio_effects/chipmunk_demo_v.mp4 new file mode 100644 index 0000000..8e44250 Binary files /dev/null and b/res/audio_effects/chipmunk_demo_v.mp4 differ diff --git a/res/audio_effects/demo.mp3 b/res/audio_effects/demo.mp3 new file mode 100644 index 0000000..b23cec6 Binary files /dev/null and b/res/audio_effects/demo.mp3 differ diff --git a/res/audio_effects/demo_v.mp4 b/res/audio_effects/demo_v.mp4 new file mode 100644 index 0000000..6ea564d Binary files /dev/null and b/res/audio_effects/demo_v.mp4 differ diff --git a/res/audio_effects/high_demo.mp3 b/res/audio_effects/high_demo.mp3 new file mode 100644 index 0000000..7150d0c Binary files /dev/null and b/res/audio_effects/high_demo.mp3 differ diff --git a/res/demo.html b/res/demo.html new file mode 100644 index 0000000..05c5460 --- /dev/null +++ b/res/demo.html @@ -0,0 +1,96 @@ + + + + + + Audio and Video Examples + + + +

Audio

+
filewarp --audio_effect 'demo.mp3' --effect high
+

Original:

+ + +

Result:

+ + +
+ +

Video

+
filewarp --audio_effect 'demo_v.mp4' --effect high
+

Original:

+ + +

Result:

+ + + diff --git a/res/init.css b/res/init.css new file mode 100644 index 0000000..8a196d1 --- /dev/null +++ b/res/init.css @@ -0,0 +1,316 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; +.scrollbar-hide { + /* Hide scrollbar for Chrome, Safari, and Edge */ + -ms-overflow-style: none; /* Internet Explorer 10+ */ + scrollbar-width: none; /* Firefox */ + overflow: -moz-scrollbars-none; /* Older Firefox */ + overflow-y: scroll; /* Add this to ensure the content is scrollable */ + &::-webkit-scrollbar { + display: none; /* Hide scrollbar for Chrome, Safari, and Edge */ + } +} +/* global.css or within a Tailwind plugin */ +@layer utilities { + h1, + h2, + h3, + h4, + h5, + h6 { + margin: 0; /* Reset margin for consistency */ + } + + h1 { + font-size: 2.5rem; /* 40px */ + font-weight: 700; /* bold */ + } + + h2 { + font-size: 2rem; /* 32px */ + font-weight: 600; /* semi-bold */ + } + + h3 { + font-size: 1.75rem; /* 28px */ + font-weight: 500; /* medium */ + } + + h4 { + font-size: 1.5rem; /* 24px */ + font-weight: 400; /* normal */ + } + + h5 { + font-size: 1.25rem; /* 20px */ + font-weight: 300; /* light */ + } + + h6 { + font-size: 1rem; /* 16px */ + font-weight: 200; /* extra light */ + } +} +.pulse { + display: inline-block; + transition: transform 0.3s ease-in-out; +} + +.pulse:hover { + transform: scale(1.1); +} + +@keyframes pulse { + 0% { + transform: scale(1); + } + 50% { + transform: scale(1.5); + } + 100% { + transform: scale(1); + } +} + +.pulse-hover { + display: inline-block; +} + +.pulse-hover:hover { + animation: pulse 1s infinite; +} +/* Reset default scrollbar */ +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +/* Light theme scrollbar */ +::-webkit-scrollbar-track { + background: #2c2c2c; + border-radius: 4px; + opacity: 0.5; +} + +.dark ::-webkit-scrollbar-track { + background: #24486b; + border-radius: 4px; + opacity: 0.5; +} + +::-webkit-scrollbar-thumb { + background: linear-gradient(145deg, #00aa7f, #aaffff, #00aaff); + border-radius: 4px; + transition: background-color 0.3s ease; +} + +::-webkit-scrollbar-thumb:hover { + background: #555500; +} + +.dark ::-webkit-scrollbar-thumb { + background: #ffffff; +} + +::-webkit-scrollbar-thumb:active { + background: linear-gradient(135deg, #aa55ff, #aaaaff, #ff55ff); +} + +/* Optional: Add transitions for more natural feel */ +ython.assistant-unused .note:-webkit-scrollbar { + scroll-behavior: smooth; +} + +/* Simulate a placeholder on the contenteditable div */ +#userInput:empty:before { + content: attr(data-placeholder); + color: #9ca3af; +} +/* Always ensure an extra empty row at the bottom */ +#userInput::after { + content: "\A"; /* Inserts a newline */ + white-space: pre; + display: block; + visibility: hidden; + height: 2.4em; /* Adjust this value to match the height of an empty row */ +} + +@keyframes modalEnter { + from { + transform: scale(0); + opacity: 0; + } + to { + transform: scale(1); + opacity: 1; + } +} + +@keyframes modalExit { + from { + transform: scale(1); + opacity: 1; + } + to { + transform: scale(0); + opacity: 0; + } +} + +.animate-enter { + animation: modalEnter 0.4s ease-out forwards; +} + +.animate-exit { + animation: modalExit 0.3s ease-in forwards; +} + +@keyframes singleRipple { + 0% { + transform: scale(0.8); + opacity: 1; + } + 100% { + transform: scale(2.5); + opacity: 0; + } +} +.ripple-single-1 { + position: absolute; + border: 3px solid; + border-image: linear-gradient(45deg, #ff8a65, #ff7043) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-2 { + position: absolute; + border: 3px solid; + border-image: linear-gradient(45deg, #ff8a65, #55aaff) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} +.ripple-single-3 { + position: absolute; + border: 3px solid; + border-image: linear-gradient(45deg, #55ff7f, #ff7043) 1; + border-radius: 50%; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} +.ripple-single-1 { + animation-delay: 0s; +} +.ripple-single-2 { + animation-delay: 0.6s; +} +.ripple-single-3 { + animation-delay: 1.2s; +} + +/* Light code theme*/ + +/* Dark code theme*/ +.hljs { + background-color: #282c34; + color: #abb2bf; + padding: 15px; + border-radius: 8px; + line-height: 1.5; + font-family: "Fira Code", monospace; +} + +.hljs-keyword { + color: #c678dd; + font-weight: bold; +} + +.hljs-built_in { + color: #e06c74; +} + +.hljs-string { + color: #98c379; +} + +.hljs-number { + color: #d19a66; +} + +.hljs-comment { + color: #5c6370; + font-style: italic; +} + +.hljs-function { + color: #61afef; +} + +.hljs-params { + color: #abb2bf; +} + +.hljs-variable { + color: #d19a66; +} + +.hljs-class { + color: #e5c07b; +} + +.hljs-title { + color: #61afef; +} + +.hljs-attribute { + color: #d19a66; +} + +.hljs-symbol { + color: #61afef; +} + +.hljs-bullet { + color: #abb2bf; +} + +.hljs-meta { + color: #5c6370; +} + +.hljs-link { + color: #61afef; + text-decoration: underline; +} + +.hljs-doctag { + color: #c678dd; + font-weight: bold; +} + +.hljs-tag { + color: #e06c74; +} + +.hljs-name { + color: #61afef; +} + +.hljs-attr { + color: #d19a66; +} + +.hljs-attr { + color: #00aaff; +} + +.hljs-literal { + color: #d19a66; +} diff --git a/res/styles.css b/res/styles.css new file mode 100644 index 0000000..51d299d --- /dev/null +++ b/res/styles.css @@ -0,0 +1,1094 @@ +*, ::before, ::after { + --tw-border-spacing-x: 0; + --tw-border-spacing-y: 0; + --tw-translate-x: 0; + --tw-translate-y: 0; + --tw-rotate: 0; + --tw-skew-x: 0; + --tw-skew-y: 0; + --tw-scale-x: 1; + --tw-scale-y: 1; + --tw-pan-x: ; + --tw-pan-y: ; + --tw-pinch-zoom: ; + --tw-scroll-snap-strictness: proximity; + --tw-gradient-from-position: ; + --tw-gradient-via-position: ; + --tw-gradient-to-position: ; + --tw-ordinal: ; + --tw-slashed-zero: ; + --tw-numeric-figure: ; + --tw-numeric-spacing: ; + --tw-numeric-fraction: ; + --tw-ring-inset: ; + --tw-ring-offset-width: 0px; + --tw-ring-offset-color: #fff; + --tw-ring-color: rgb(59 130 246 / 0.5); + --tw-ring-offset-shadow: 0 0 #0000; + --tw-ring-shadow: 0 0 #0000; + --tw-shadow: 0 0 #0000; + --tw-shadow-colored: 0 0 #0000; + --tw-blur: ; + --tw-brightness: ; + --tw-contrast: ; + --tw-grayscale: ; + --tw-hue-rotate: ; + --tw-invert: ; + --tw-saturate: ; + --tw-sepia: ; + --tw-drop-shadow: ; + --tw-backdrop-blur: ; + --tw-backdrop-brightness: ; + --tw-backdrop-contrast: ; + --tw-backdrop-grayscale: ; + --tw-backdrop-hue-rotate: ; + --tw-backdrop-invert: ; + --tw-backdrop-opacity: ; + --tw-backdrop-saturate: ; + --tw-backdrop-sepia: ; + --tw-contain-size: ; + --tw-contain-layout: ; + --tw-contain-paint: ; + --tw-contain-style: ; +} + +::backdrop { + --tw-border-spacing-x: 0; + --tw-border-spacing-y: 0; + --tw-translate-x: 0; + --tw-translate-y: 0; + --tw-rotate: 0; + --tw-skew-x: 0; + --tw-skew-y: 0; + --tw-scale-x: 1; + --tw-scale-y: 1; + --tw-pan-x: ; + --tw-pan-y: ; + --tw-pinch-zoom: ; + --tw-scroll-snap-strictness: proximity; + --tw-gradient-from-position: ; + --tw-gradient-via-position: ; + --tw-gradient-to-position: ; + --tw-ordinal: ; + --tw-slashed-zero: ; + --tw-numeric-figure: ; + --tw-numeric-spacing: ; + --tw-numeric-fraction: ; + --tw-ring-inset: ; + --tw-ring-offset-width: 0px; + --tw-ring-offset-color: #fff; + --tw-ring-color: rgb(59 130 246 / 0.5); + --tw-ring-offset-shadow: 0 0 #0000; + --tw-ring-shadow: 0 0 #0000; + --tw-shadow: 0 0 #0000; + --tw-shadow-colored: 0 0 #0000; + --tw-blur: ; + --tw-brightness: ; + --tw-contrast: ; + --tw-grayscale: ; + --tw-hue-rotate: ; + --tw-invert: ; + --tw-saturate: ; + --tw-sepia: ; + --tw-drop-shadow: ; + --tw-backdrop-blur: ; + --tw-backdrop-brightness: ; + --tw-backdrop-contrast: ; + --tw-backdrop-grayscale: ; + --tw-backdrop-hue-rotate: ; + --tw-backdrop-invert: ; + --tw-backdrop-opacity: ; + --tw-backdrop-saturate: ; + --tw-backdrop-sepia: ; + --tw-contain-size: ; + --tw-contain-layout: ; + --tw-contain-paint: ; + --tw-contain-style: ; +} + +/* +! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com +*/ + +/* +1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4) +2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116) +*/ + +*, +::before, +::after { + box-sizing: border-box; + /* 1 */ + border-width: 0; + /* 2 */ + border-style: solid; + /* 2 */ + border-color: #e5e7eb; + /* 2 */ +} + +::before, +::after { + --tw-content: ''; +} + +/* +1. Use a consistent sensible line-height in all browsers. +2. Prevent adjustments of font size after orientation changes in iOS. +3. Use a more readable tab size. +4. Use the user's configured `sans` font-family by default. +5. Use the user's configured `sans` font-feature-settings by default. +6. Use the user's configured `sans` font-variation-settings by default. +7. Disable tap highlights on iOS +*/ + +html, +:host { + line-height: 1.5; + /* 1 */ + -webkit-text-size-adjust: 100%; + /* 2 */ + -moz-tab-size: 4; + /* 3 */ + -o-tab-size: 4; + tab-size: 4; + /* 3 */ + font-family: ui-sans-serif, system-ui, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; + /* 4 */ + font-feature-settings: normal; + /* 5 */ + font-variation-settings: normal; + /* 6 */ + -webkit-tap-highlight-color: transparent; + /* 7 */ +} + +/* +1. Remove the margin in all browsers. +2. Inherit line-height from `html` so users can set them as a class directly on the `html` element. +*/ + +body { + margin: 0; + /* 1 */ + line-height: inherit; + /* 2 */ +} + +/* +1. Add the correct height in Firefox. +2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655) +3. Ensure horizontal rules are visible by default. +*/ + +hr { + height: 0; + /* 1 */ + color: inherit; + /* 2 */ + border-top-width: 1px; + /* 3 */ +} + +/* +Add the correct text decoration in Chrome, Edge, and Safari. +*/ + +abbr:where([title]) { + -webkit-text-decoration: underline dotted; + text-decoration: underline dotted; +} + +/* +Remove the default font size and weight for headings. +*/ + +h1, +h2, +h3, +h4, +h5, +h6 { + font-size: inherit; + font-weight: inherit; +} + +/* +Reset links to optimize for opt-in styling instead of opt-out. +*/ + +a { + color: inherit; + text-decoration: inherit; +} + +/* +Add the correct font weight in Edge and Safari. +*/ + +b, +strong { + font-weight: bolder; +} + +/* +1. Use the user's configured `mono` font-family by default. +2. Use the user's configured `mono` font-feature-settings by default. +3. Use the user's configured `mono` font-variation-settings by default. +4. Correct the odd `em` font sizing in all browsers. +*/ + +code, +kbd, +samp, +pre { + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; + /* 1 */ + font-feature-settings: normal; + /* 2 */ + font-variation-settings: normal; + /* 3 */ + font-size: 1em; + /* 4 */ +} + +/* +Add the correct font size in all browsers. +*/ + +small { + font-size: 80%; +} + +/* +Prevent `sub` and `sup` elements from affecting the line height in all browsers. +*/ + +sub, +sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} + +sub { + bottom: -0.25em; +} + +sup { + top: -0.5em; +} + +/* +1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297) +2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016) +3. Remove gaps between table borders by default. +*/ + +table { + text-indent: 0; + /* 1 */ + border-color: inherit; + /* 2 */ + border-collapse: collapse; + /* 3 */ +} + +/* +1. Change the font styles in all browsers. +2. Remove the margin in Firefox and Safari. +3. Remove default padding in all browsers. +*/ + +button, +input, +optgroup, +select, +textarea { + font-family: inherit; + /* 1 */ + font-feature-settings: inherit; + /* 1 */ + font-variation-settings: inherit; + /* 1 */ + font-size: 100%; + /* 1 */ + font-weight: inherit; + /* 1 */ + line-height: inherit; + /* 1 */ + letter-spacing: inherit; + /* 1 */ + color: inherit; + /* 1 */ + margin: 0; + /* 2 */ + padding: 0; + /* 3 */ +} + +/* +Remove the inheritance of text transform in Edge and Firefox. +*/ + +button, +select { + text-transform: none; +} + +/* +1. Correct the inability to style clickable types in iOS and Safari. +2. Remove default button styles. +*/ + +button, +input:where([type='button']), +input:where([type='reset']), +input:where([type='submit']) { + -webkit-appearance: button; + /* 1 */ + background-color: transparent; + /* 2 */ + background-image: none; + /* 2 */ +} + +/* +Use the modern Firefox focus style for all focusable elements. +*/ + +:-moz-focusring { + outline: auto; +} + +/* +Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737) +*/ + +:-moz-ui-invalid { + box-shadow: none; +} + +/* +Add the correct vertical alignment in Chrome and Firefox. +*/ + +progress { + vertical-align: baseline; +} + +/* +Correct the cursor style of increment and decrement buttons in Safari. +*/ + +::-webkit-inner-spin-button, +::-webkit-outer-spin-button { + height: auto; +} + +/* +1. Correct the odd appearance in Chrome and Safari. +2. Correct the outline style in Safari. +*/ + +[type='search'] { + -webkit-appearance: textfield; + /* 1 */ + outline-offset: -2px; + /* 2 */ +} + +/* +Remove the inner padding in Chrome and Safari on macOS. +*/ + +::-webkit-search-decoration { + -webkit-appearance: none; +} + +/* +1. Correct the inability to style clickable types in iOS and Safari. +2. Change font properties to `inherit` in Safari. +*/ + +::-webkit-file-upload-button { + -webkit-appearance: button; + /* 1 */ + font: inherit; + /* 2 */ +} + +/* +Add the correct display in Chrome and Safari. +*/ + +summary { + display: list-item; +} + +/* +Removes the default spacing and border for appropriate elements. +*/ + +blockquote, +dl, +dd, +h1, +h2, +h3, +h4, +h5, +h6, +hr, +figure, +p, +pre { + margin: 0; +} + +fieldset { + margin: 0; + padding: 0; +} + +legend { + padding: 0; +} + +ol, +ul, +menu { + list-style: none; + margin: 0; + padding: 0; +} + +/* +Reset default styling for dialogs. +*/ + +dialog { + padding: 0; +} + +/* +Prevent resizing textareas horizontally by default. +*/ + +textarea { + resize: vertical; +} + +/* +1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300) +2. Set the default placeholder color to the user's configured gray 400 color. +*/ + +input::-moz-placeholder, textarea::-moz-placeholder { + opacity: 1; + /* 1 */ + color: #9ca3af; + /* 2 */ +} + +input::placeholder, +textarea::placeholder { + opacity: 1; + /* 1 */ + color: #9ca3af; + /* 2 */ +} + +/* +Set the default cursor for buttons. +*/ + +button, +[role="button"] { + cursor: pointer; +} + +/* +Make sure disabled buttons don't get the pointer cursor. +*/ + +:disabled { + cursor: default; +} + +/* +1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14) +2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210) + This can trigger a poorly considered lint error in some tools but is included by design. +*/ + +img, +svg, +video, +canvas, +audio, +iframe, +embed, +object { + display: block; + /* 1 */ + vertical-align: middle; + /* 2 */ +} + +/* +Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14) +*/ + +img, +video { + max-width: 100%; + height: auto; +} + +/* Make elements with the HTML hidden attribute stay hidden by default */ + +[hidden]:where(:not([hidden="until-found"])) { + display: none; +} + +.container { + width: 100%; +} + +@media (min-width: 640px) { + .container { + max-width: 640px; + } +} + +@media (min-width: 768px) { + .container { + max-width: 768px; + } +} + +@media (min-width: 1024px) { + .container { + max-width: 1024px; + } +} + +@media (min-width: 1280px) { + .container { + max-width: 1280px; + } +} + +@media (min-width: 1536px) { + .container { + max-width: 1536px; + } +} + +.mx-auto { + margin-left: auto; + margin-right: auto; +} + +.my-8 { + margin-top: 2rem; + margin-bottom: 2rem; +} + +.mb-4 { + margin-bottom: 1rem; +} + +.mb-6 { + margin-bottom: 1.5rem; +} + +.mt-2 { + margin-top: 0.5rem; +} + +.mt-8 { + margin-top: 2rem; +} + +.list-inside { + list-style-position: inside; +} + +.list-disc { + list-style-type: disc; +} + +.rounded-lg { + border-radius: 0.5rem; +} + +.bg-gray-200 { + --tw-bg-opacity: 1; + background-color: rgb(229 231 235 / var(--tw-bg-opacity, 1)); +} + +.bg-gray-800 { + --tw-bg-opacity: 1; + background-color: rgb(31 41 55 / var(--tw-bg-opacity, 1)); +} + +.bg-white { + --tw-bg-opacity: 1; + background-color: rgb(255 255 255 / var(--tw-bg-opacity, 1)); +} + +.bg-gradient-to-r { + background-image: linear-gradient(to right, var(--tw-gradient-stops)); +} + +.from-blue-800 { + --tw-gradient-from: #1e40af var(--tw-gradient-from-position); + --tw-gradient-to: rgb(30 64 175 / 0) var(--tw-gradient-to-position); + --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to); +} + +.to-blue-600 { + --tw-gradient-to: #2563eb var(--tw-gradient-to-position); +} + +.p-4 { + padding: 1rem; +} + +.p-6 { + padding: 1.5rem; +} + +.py-4 { + padding-top: 1rem; + padding-bottom: 1rem; +} + +.py-8 { + padding-top: 2rem; + padding-bottom: 2rem; +} + +.text-center { + text-align: center; +} + +.text-3xl { + font-size: 1.875rem; + line-height: 2.25rem; +} + +.text-5xl { + font-size: 3rem; + line-height: 1; +} + +.text-lg { + font-size: 1.125rem; + line-height: 1.75rem; +} + +.font-bold { + font-weight: 700; +} + +.font-semibold { + font-weight: 600; +} + +.leading-normal { + line-height: 1.5; +} + +.tracking-normal { + letter-spacing: 0em; +} + +.text-blue-800 { + --tw-text-opacity: 1; + color: rgb(30 64 175 / var(--tw-text-opacity, 1)); +} + +.text-gray-800 { + --tw-text-opacity: 1; + color: rgb(31 41 55 / var(--tw-text-opacity, 1)); +} + +.text-white { + --tw-text-opacity: 1; + color: rgb(255 255 255 / var(--tw-text-opacity, 1)); +} + +.shadow-lg { + --tw-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1); + --tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color); + box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); +} + +.shadow-md { + --tw-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1); + --tw-shadow-colored: 0 4px 6px -1px var(--tw-shadow-color), 0 2px 4px -2px var(--tw-shadow-color); + box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); +} + +h1, + h2, + h3, + h4, + h5, + h6 { + margin: 0; + /* Reset margin for consistency */ +} + +h1 { + font-size: 2.5rem; + /* 40px */ + font-weight: 700; + /* bold */ +} + +h2 { + font-size: 2rem; + /* 32px */ + font-weight: 600; + /* semi-bold */ +} + +h3 { + font-size: 1.75rem; + /* 28px */ + font-weight: 500; + /* medium */ +} + +h4 { + font-size: 1.5rem; + /* 24px */ + font-weight: 400; + /* normal */ +} + +h5 { + font-size: 1.25rem; + /* 20px */ + font-weight: 300; + /* light */ +} + +h6 { + font-size: 1rem; + /* 16px */ + font-weight: 200; + /* extra light */ +} + +.scrollbar-hide { + /* Hide scrollbar for Chrome, Safari, and Edge */ + -ms-overflow-style: none; + /* Internet Explorer 10+ */ + scrollbar-width: none; + /* Firefox */ + overflow: -moz-scrollbars-none; + /* Older Firefox */ + overflow-y: scroll; + /* Add this to ensure the content is scrollable */ + &::-webkit-scrollbar { + display: none; + /* Hide scrollbar for Chrome, Safari, and Edge */ + } +} + +/* global.css or within a Tailwind plugin */ + +.pulse { + display: inline-block; + transition: transform 0.3s ease-in-out; +} + +.pulse:hover { + transform: scale(1.1); +} + +@keyframes pulse { + 0% { + transform: scale(1); + } + + 50% { + transform: scale(1.5); + } + + 100% { + transform: scale(1); + } +} + +.pulse-hover { + display: inline-block; +} + +.pulse-hover:hover { + animation: pulse 1s infinite; +} + +/* Reset default scrollbar */ + +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +/* Light theme scrollbar */ + +::-webkit-scrollbar-track { + background: #2c2c2c; + border-radius: 4px; + opacity: 0.5; +} + +.dark ::-webkit-scrollbar-track { + background: #24486b; + border-radius: 4px; + opacity: 0.5; +} + +::-webkit-scrollbar-thumb { + background: linear-gradient(145deg, #00aa7f, #aaffff, #00aaff); + border-radius: 4px; + -webkit-transition: background-color 0.3s ease; + transition: background-color 0.3s ease; +} + +::-webkit-scrollbar-thumb:hover { + background: #555500; +} + +.dark ::-webkit-scrollbar-thumb { + background: #ffffff; +} + +::-webkit-scrollbar-thumb:active { + background: linear-gradient(135deg, #aa55ff, #aaaaff, #ff55ff); +} + +/* Optional: Add transitions for more natural feel */ + +ython.assistant-unused .note:-webkit-scrollbar { + scroll-behavior: smooth; +} + +/* Simulate a placeholder on the contenteditable div */ + +#userInput:empty:before { + content: attr(data-placeholder); + color: #9ca3af; +} + +/* Always ensure an extra empty row at the bottom */ + +#userInput::after { + content: "\A"; + /* Inserts a newline */ + white-space: pre; + display: block; + visibility: hidden; + height: 2.4em; + /* Adjust this value to match the height of an empty row */ +} + +@keyframes modalEnter { + from { + transform: scale(0); + opacity: 0; + } + + to { + transform: scale(1); + opacity: 1; + } +} + +@keyframes modalExit { + from { + transform: scale(1); + opacity: 1; + } + + to { + transform: scale(0); + opacity: 0; + } +} + +.animate-enter { + animation: modalEnter 0.4s ease-out forwards; +} + +.animate-exit { + animation: modalExit 0.3s ease-in forwards; +} + +@keyframes singleRipple { + 0% { + transform: scale(0.8); + opacity: 1; + } + + 100% { + transform: scale(2.5); + opacity: 0; + } +} + +.ripple-single-1 { + position: absolute; + border: 3px solid; + -o-border-image: linear-gradient(45deg, #ff8a65, #ff7043) 1; + border-image: linear-gradient(45deg, #ff8a65, #ff7043) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-2 { + position: absolute; + border: 3px solid; + -o-border-image: linear-gradient(45deg, #ff8a65, #55aaff) 1; + border-image: linear-gradient(45deg, #ff8a65, #55aaff) 1; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-3 { + position: absolute; + border: 3px solid; + -o-border-image: linear-gradient(45deg, #55ff7f, #ff7043) 1; + border-image: linear-gradient(45deg, #55ff7f, #ff7043) 1; + border-radius: 50%; + width: 80%; + height: 80%; + animation: singleRipple 1.8s infinite; + pointer-events: none; +} + +.ripple-single-1 { + animation-delay: 0s; +} + +.ripple-single-2 { + animation-delay: 0.6s; +} + +.ripple-single-3 { + animation-delay: 1.2s; +} + +/* Light code theme*/ + +/* Dark code theme*/ + +.hljs { + background-color: #282c34; + color: #abb2bf; + padding: 15px; + border-radius: 8px; + line-height: 1.5; + font-family: "Fira Code", monospace; +} + +.hljs-keyword { + color: #c678dd; + font-weight: bold; +} + +.hljs-built_in { + color: #e06c74; +} + +.hljs-string { + color: #98c379; +} + +.hljs-number { + color: #d19a66; +} + +.hljs-comment { + color: #5c6370; + font-style: italic; +} + +.hljs-function { + color: #61afef; +} + +.hljs-params { + color: #abb2bf; +} + +.hljs-variable { + color: #d19a66; +} + +.hljs-class { + color: #e5c07b; +} + +.hljs-title { + color: #61afef; +} + +.hljs-attribute { + color: #d19a66; +} + +.hljs-symbol { + color: #61afef; +} + +.hljs-bullet { + color: #abb2bf; +} + +.hljs-meta { + color: #5c6370; +} + +.hljs-link { + color: #61afef; + text-decoration: underline; +} + +.hljs-doctag { + color: #c678dd; + font-weight: bold; +} + +.hljs-tag { + color: #e06c74; +} + +.hljs-name { + color: #61afef; +} + +.hljs-attr { + color: #d19a66; + color: #00aaff; +} + +.hljs-literal { + color: #d19a66; +} + +.hover\:underline:hover { + text-decoration-line: underline; +} diff --git a/setup.py b/setup.py index 2d6a010..1e2733a 100644 --- a/setup.py +++ b/setup.py @@ -1,86 +1,125 @@ -'''Build package.''' +"""Build package.""" +# SYSTEM DEPS +# sudo apt-get tesseract-ocr +# sudo apt-get install poppler-utils + import os import subprocess + from setuptools import find_namespace_packages, setup def sri(): - if os.name == 'posix': + if os.name == "posix": result = subprocess.run( - ['dpkg', '-l', 'poppler-utils'], stdout=subprocess.PIPE, text=True) + ["dpkg", "-l", "poppler-utils"], stdout=subprocess.PIPE, text=True + ) if result.returncode != 0: print("Requirement poppler-utils installing") - subprocess.run(['sudo', 'apt', 'install', 'poppler-utils']) + subprocess.run(["sudo", "apt", "install", "poppler-utils"]) - result = subprocess.run( - ['dpkg', '-l', 'speedtest-cli'], stdout=subprocess.PIPE, text=True) - if result.returncode != 0: - print("Requirement speedtest-cli -> installing") - subprocess.run(['sudo', 'apt', 'install', 'speedtest-cli']) +def dos_req(): + if os.name == "posix": + subprocess.run( + ["pip", "install", "pdf2docx"], stdout=subprocess.PIPE, text=True + ) -DESCRIPTION = 'Open source Python CLI toolkit for conversion, manipulation, Analysis' -EXCLUDE_FROM_PACKAGES = ["build", "dist", "test"] + +DESCRIPTION = "Open source Python CLI toolkit for conversion, manipulation, Analysis of files (All major file operations)" +EXCLUDE_FROM_PACKAGES = ["build", "dist", "test", "src", "*~", "fweb"] sri() +dos_req() setup( - name="filemac", - author='wambua', - author_email='wambuamwiky2001@gmail.com', - version=open("version.txt").read(), + name="filewarp", + author="wambua", + author_email="swskye17@gmail.com", + version=open(os.path.abspath("version.txt")).read(), packages=find_namespace_packages(exclude=EXCLUDE_FROM_PACKAGES), description=DESCRIPTION, - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + url="https://pypi.org/project/filewarp/", entry_points={ "console_scripts": [ - "filemac=filemac:main" - ]}, - - - python_requires=">=3.6", - install_requires=[ - 'argparse', - 'pdfminer.six', - 'python-docx', - 'python-pptx', - 'gTTS', - 'pypandoc', - 'pydub', - 'requests', - 'Pillow', - 'pandas', - 'opencv-python', - 'pytesseract', - 'PyPDF2', - 'pdf2docx', - 'requests', - 'moviepy', - 'reportlab', - 'numpy', - 'pdf2image' + "filewarp=filewarp:main", + "warp=filewarp:main", + "Filewarp=filewarp:main", + "audiobot=filewarp:audiobot", ], - + }, + python_requires=">=3.8", + install_requires=[ + "click", + "rich", + "argparse", + "pdfminer.six", + "python-docx", + "python-pptx", + "gTTS", + "pypandoc", + "pydub", + "Pillow", + "pandas", + "opencv-python", + "pytesseract", + "PyPDF2", + "pdf2docx", # Needs pymupdf + "pymupdf", + "requests", + "moviepy", + "reportlab", + "numpy", + "pdf2image", + #"poppler", # Required by pdf2image + "openpyxl", + "rich", + "tqdm", + "ffmpeg-python", + "librosa", + "python-magic", + "matplotlib", + "numpy", + "soundfile", + "SpeechRecognition", + "colorama", + "scipy", + "PyMuPDF", + "pyautogui", + "imageio", + "pynput", + "pyaudio", + "cairosvg", + "sounddevice", + "wavio" + ], include_package_data=True, zip_safe=False, - license="GPL v3", - keywords=["file-conversion", "file-analysis", "file-manipulation", "ocr", "image-conversion"], - + license="GNU v3", + keywords=[ + "file-conversion", + "document-conversion", + "file-analysis", + "image-conversion", + "file-manipulation", + "audio-conversion", + "ocr", + "image-conversion", + "audio_effects", + "voice_shift", + "pdf", + "docx", + ], classifiers=[ "Environment :: Console", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ], - - ) diff --git a/test_enhanced_cli.py b/test_enhanced_cli.py new file mode 100644 index 0000000..057c736 --- /dev/null +++ b/test_enhanced_cli.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Test script for the enhanced FileMAC CLI +""" + +from filewarp.cli.app import enhanced_argsdev, RichConsoleUtils, EnhancedHelpSystem, ClipboardManager +import sys +import os + +# Add the project root to Python path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + + +def test_basic_functionality(): + """Test basic functionality of the enhanced CLI""" + print("Testing Enhanced FileMAC CLI...") + + # Test Rich console utilities + print("\n1. Testing Rich Console Utilities:") + RichConsoleUtils.print_info("This is an info message") + RichConsoleUtils.print_success("This is a success message") + RichConsoleUtils.print_warning("This is a warning message") + RichConsoleUtils.print_error("This is an error message") + RichConsoleUtils.print_header("Test Header", "Subtitle") + + # Test clipboard manager + print("\n2. Testing Clipboard Manager:") + print(f"Clipboard available: {ClipboardManager.is_available()}") + + if ClipboardManager.is_available(): + # Test copy to clipboard + test_text = "FileMAC Enhanced CLI Test" + if ClipboardManager.copy_to_clipboard(test_text): + # Test paste from clipboard + pasted = ClipboardManager.paste_from_clipboard() + if pasted == test_text: + RichConsoleUtils.print_success("Clipboard test passed!") + else: + RichConsoleUtils.print_warning("Clipboard paste test failed") + + # Test help system + print("\n3. Testing Help System:") + print("Showing quick start guide...") + EnhancedHelpSystem.show_quick_start() + + print("\n4. Testing Enhanced CLI Entry Point:") + print("This would normally call enhanced_argsdev(), but we'll skip it for testing") + + RichConsoleUtils.print_success("All basic tests completed!") + + +if __name__ == "__main__": + test_basic_functionality() diff --git a/version.txt b/version.txt index 6d7de6e..eca07e4 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.2 +2.1.2 diff --git a/voice/VoiceType.py b/voice/VoiceType.py new file mode 100644 index 0000000..b3617ea --- /dev/null +++ b/voice/VoiceType.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +import sys +import logging +import threading +import speech_recognition as sr +import pyautogui +import subprocess # For Linux typing fallback +from queue import Queue +from threading import Event, Lock +from pynput import keyboard # Replaces `keyboard` for hotkeys + +# Configuration +CONFIG = { + "hotkey_listen": "++v", + "hotkey_exit": "", + "energy_threshold": 300, + "pause_threshold": 0.8, + "timeout_listen": 5, + "lang": "en-US", + "fallback_clipboard": True, + "log_file": "voicetype.log", +} + + +class VoiceTypeEngine: + def __init__(self): + self.r = sr.Recognizer() + self.audio_queue = Queue() + self.is_listening = Event() + self.lock = Lock() + self.setup_logging() + self.configure_recognizer() + self.auto_select_microphone() # Auto-detect microphone + + def setup_logging(self): + logging.basicConfig( + filename=CONFIG["log_file"], + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + + def configure_recognizer(self): + self.r.energy_threshold = CONFIG["energy_threshold"] + self.r.dynamic_energy_threshold = False + self.r.pause_threshold = CONFIG["pause_threshold"] + + def safe_type_text(self, text): + """Types text using GUI automation, avoiding root requirements.""" + try: + with self.lock: + if sys.platform == "linux": + # Linux alternative + subprocess.run(["xdotool", "type", text + " "]) + else: + pyautogui.write(text + " ") + except Exception as e: + logging.error("Typing failed: %s", e) + print(f"Typing failed: {e}") + if CONFIG["fallback_clipboard"]: + self.clipboard_fallback(text) + + def clipboard_fallback(self, text): + """Fallback method using clipboard if typing fails.""" + try: + import pyperclip + + pyperclip.copy(text) + pyautogui.hotkey("ctrl", "v") + except Exception as e: + logging.error("Clipboard fallback failed: %s", e) + + def process_audio(self): + """Processes recognized speech and converts it to text.""" + while self.is_listening.is_set() or not self.audio_queue.empty(): + try: + audio_data = self.audio_queue.get(timeout=1) + text = self.r.recognize_google(audio_data, language=CONFIG["lang"]) + logging.info("Recognized: %s", text) + print(f"Recognized: \033[1m{text}\033[0m") + self.safe_type_text(text) + except sr.UnknownValueError: + logging.warning("Speech not recognized") + except sr.RequestError as e: + logging.error("API unreachable: %s", e) + except Exception as e: + logging.error("Unexpected error: %s", e) + + def listen_worker(self): + """Listens for speech input and sends it to processing queue.""" + with sr.Microphone(device_index=self.microphone_index) as source: + while self.is_listening.is_set(): + try: + audio = self.r.listen( + source, timeout=CONFIG["timeout_listen"], phrase_time_limit=10 + ) + self.audio_queue.put(audio) + except sr.WaitTimeoutError: + continue + except Exception as e: + logging.error("Recording error: %s", e) + + def auto_select_microphone(self): + """Automatically selects the default microphone.""" + try: + with sr.Microphone() as source: + print(f"Using default microphone: {source}") + self.microphone_index = None # Auto-select default mic + except Exception as e: + logging.error("Microphone access error: %s", e) + sys.exit("Error: Unable to access microphone") + + def start(self): + """Starts the VoiceType engine with hotkey support.""" + logging.info("VoiceType Pro Started") + print( + f"VoiceType Pro Active\nStart typing: {CONFIG['hotkey_listen']}\nExit: {CONFIG['hotkey_exit']}" + ) + + listener = keyboard.GlobalHotKeys( + { + CONFIG["hotkey_listen"]: self.toggle_listening, + CONFIG["hotkey_exit"]: self.shutdown, + } + ) + + listener.start() + listener.join() # Keep listening for hotkeys + + def toggle_listening(self): + """Toggles the voice listening state.""" + if not self.is_listening.is_set(): + self.is_listening.set() + threading.Thread(target=self.listen_worker, daemon=True).start() + threading.Thread(target=self.process_audio, daemon=True).start() + else: + self.is_listening.clear() + + def shutdown(self): + """Gracefully shuts down the program.""" + self.is_listening.clear() + logging.info("VoiceType Pro Shutdown") + print("\nVoiceType Pro terminated") + sys.exit(0) + + +if __name__ == "__main__": + try: + engine = VoiceTypeEngine() + engine.start() + except Exception as e: + logging.critical("Critical failure: %s", e) + print(f"Critical error: {str(e)}") + sys.exit(1) diff --git a/voice/__init__.py b/voice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/voice/voice_typing.py b/voice/voice_typing.py new file mode 100644 index 0000000..b3617ea --- /dev/null +++ b/voice/voice_typing.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +import sys +import logging +import threading +import speech_recognition as sr +import pyautogui +import subprocess # For Linux typing fallback +from queue import Queue +from threading import Event, Lock +from pynput import keyboard # Replaces `keyboard` for hotkeys + +# Configuration +CONFIG = { + "hotkey_listen": "++v", + "hotkey_exit": "", + "energy_threshold": 300, + "pause_threshold": 0.8, + "timeout_listen": 5, + "lang": "en-US", + "fallback_clipboard": True, + "log_file": "voicetype.log", +} + + +class VoiceTypeEngine: + def __init__(self): + self.r = sr.Recognizer() + self.audio_queue = Queue() + self.is_listening = Event() + self.lock = Lock() + self.setup_logging() + self.configure_recognizer() + self.auto_select_microphone() # Auto-detect microphone + + def setup_logging(self): + logging.basicConfig( + filename=CONFIG["log_file"], + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + + def configure_recognizer(self): + self.r.energy_threshold = CONFIG["energy_threshold"] + self.r.dynamic_energy_threshold = False + self.r.pause_threshold = CONFIG["pause_threshold"] + + def safe_type_text(self, text): + """Types text using GUI automation, avoiding root requirements.""" + try: + with self.lock: + if sys.platform == "linux": + # Linux alternative + subprocess.run(["xdotool", "type", text + " "]) + else: + pyautogui.write(text + " ") + except Exception as e: + logging.error("Typing failed: %s", e) + print(f"Typing failed: {e}") + if CONFIG["fallback_clipboard"]: + self.clipboard_fallback(text) + + def clipboard_fallback(self, text): + """Fallback method using clipboard if typing fails.""" + try: + import pyperclip + + pyperclip.copy(text) + pyautogui.hotkey("ctrl", "v") + except Exception as e: + logging.error("Clipboard fallback failed: %s", e) + + def process_audio(self): + """Processes recognized speech and converts it to text.""" + while self.is_listening.is_set() or not self.audio_queue.empty(): + try: + audio_data = self.audio_queue.get(timeout=1) + text = self.r.recognize_google(audio_data, language=CONFIG["lang"]) + logging.info("Recognized: %s", text) + print(f"Recognized: \033[1m{text}\033[0m") + self.safe_type_text(text) + except sr.UnknownValueError: + logging.warning("Speech not recognized") + except sr.RequestError as e: + logging.error("API unreachable: %s", e) + except Exception as e: + logging.error("Unexpected error: %s", e) + + def listen_worker(self): + """Listens for speech input and sends it to processing queue.""" + with sr.Microphone(device_index=self.microphone_index) as source: + while self.is_listening.is_set(): + try: + audio = self.r.listen( + source, timeout=CONFIG["timeout_listen"], phrase_time_limit=10 + ) + self.audio_queue.put(audio) + except sr.WaitTimeoutError: + continue + except Exception as e: + logging.error("Recording error: %s", e) + + def auto_select_microphone(self): + """Automatically selects the default microphone.""" + try: + with sr.Microphone() as source: + print(f"Using default microphone: {source}") + self.microphone_index = None # Auto-select default mic + except Exception as e: + logging.error("Microphone access error: %s", e) + sys.exit("Error: Unable to access microphone") + + def start(self): + """Starts the VoiceType engine with hotkey support.""" + logging.info("VoiceType Pro Started") + print( + f"VoiceType Pro Active\nStart typing: {CONFIG['hotkey_listen']}\nExit: {CONFIG['hotkey_exit']}" + ) + + listener = keyboard.GlobalHotKeys( + { + CONFIG["hotkey_listen"]: self.toggle_listening, + CONFIG["hotkey_exit"]: self.shutdown, + } + ) + + listener.start() + listener.join() # Keep listening for hotkeys + + def toggle_listening(self): + """Toggles the voice listening state.""" + if not self.is_listening.is_set(): + self.is_listening.set() + threading.Thread(target=self.listen_worker, daemon=True).start() + threading.Thread(target=self.process_audio, daemon=True).start() + else: + self.is_listening.clear() + + def shutdown(self): + """Gracefully shuts down the program.""" + self.is_listening.clear() + logging.info("VoiceType Pro Shutdown") + print("\nVoiceType Pro terminated") + sys.exit(0) + + +if __name__ == "__main__": + try: + engine = VoiceTypeEngine() + engine.start() + except Exception as e: + logging.critical("Critical failure: %s", e) + print(f"Critical error: {str(e)}") + sys.exit(1)