+"""
+
+from .cli import cli, ArgumentsProcessor
+from .utils.logging_utils import LoggingFormatter, colored_logger
+from .utils.visualizer import audiowave_visualizer
+from .utils.metadata_utils import get_audio_bitrate
+from .core.codec import AudioSegmentArrayCodec
+from .core.effects import VoiceEffectProcessor
+from .core.audio.core import AudioModulator, AudioDenoiser
+
+__version__ = "0.2.0"
+__all__ = [
+ "cli",
+ "ArgumentsProcessor",
+ "LoggingFormatter",
+ "colored_logger",
+ "audiowave_visualizer",
+ "get_audio_bitrate",
+ "AudioSegmentArrayCodec",
+ "VoiceEffectProcessor",
+ "AudioModulator",
+ "AudioDenoiser",
+]
+LOGO = """
+ ///////] /// /// ///////] (O) //////] ///// //////] /////////
+ // // /// /// // // /// /// /// // / /// /// ///
+ ///////// /// /// // / /// /// /// ///// /// /// ///
+ // // /// /// // / /// // // // / // // ///
+// // ////////// /////// / /// /////// /////// /////// ///
+"""
diff --git a/audiobot/cli.py b/audiobot/cli.py
new file mode 100644
index 0000000..4230ab3
--- /dev/null
+++ b/audiobot/cli.py
@@ -0,0 +1,196 @@
+#!/usr/bin/python3
+
+
+"""
+CLI Entry point for audiobot.\n
+Implements:\n
+ Argsmain->cmd argument handler either from other packages or directly form cli
+"""
+
+import argparse
+import logging
+
+import os
+
+import magic
+from .utils.metadata_utils import transcribe_audio
+from filewarp.utils.colors import fg, rs
+from .core.processor import VideoProcessor, AudioProcessor
+from .utils.logging_utils import colored_logger
+from .config.core import Config
+
+RESET = rs
+
+Clogger = colored_logger()
+
+
+class ArgumentsProcessor:
+ def __init__(self, args, parser):
+ self.args = args
+ self.parser = parser
+ self.mime = magic.Magic(mime=True)
+ self.output_dir = os.getcwd() if not self.args.output else self.args.output
+
+ def process(self):
+ if not self.args or self.args.audio_effect:
+ self.parser.print_help()
+ return
+
+ if self.args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ if self.args.output and not os.path.exists(self.args.output):
+ os.makedirs(self.args.output)
+ if self.args.batch:
+ self.batch_processor()
+ else:
+ self.mono_processor()
+
+ def mono_processor(self):
+ try:
+ file_type = self.mime.from_file(self.args.file)
+ Clogger.info(f"{fg.BLUE}Detected file type: {file_type}{RESET}")
+ if file_type.startswith("audio"):
+ if self.args.transcribe:
+ transcribe_audio(self.args.file)
+ AudioProcessor().process_audio_file(
+ self.args.file,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ elif file_type.startswith("video"):
+ VideoProcessor().process_video_file(
+ self.args.file,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ else:
+ Clogger.warning(
+ f"Unsupported file type: {file_type}. Only audio and video files are supported."
+ )
+ except Exception as e:
+ Clogger.error(e)
+
+ def batch_processor(self):
+ try:
+ for root, _, files in os.walk(self.args.file):
+ for file in files:
+ full_path = os.path.join(root, file)
+ file_type = self.mime.from_file(full_path)
+ Clogger.info(f"{fg.BLUE}Detected file type: {file_type}{RESET}")
+ if file_type.startswith("audio"):
+ if self.args.transcribe:
+ transcribe_audio(full_path)
+ AudioProcessor().process_audio_file(
+ full_path,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ elif file_type.startswith("video"):
+ VideoProcessor().process_video_file(
+ full_path,
+ self.args.effect,
+ self.output_dir,
+ self.args.verbose,
+ self.args.visualize,
+ )
+ else:
+ Clogger.warning(f"Ignoring unsupported file type: {file}")
+ except Exception as e:
+ Clogger.info(e)
+
+
+def cli(argsv=None):
+ """
+ Recieve and process agruments from audio/video audio effects
+ """
+ parser = argparse.ArgumentParser(
+ description="Audiobot: A tool for audio effects on audio and video files.",
+ usage="filewarp --audio_effect [-h] [--file FILE] \n\
+ [-e {robotic,deep,high,echo,reverb,whisper,demonic,chipmunk,hacker,lowpass,distortion}] \n\
+ [-o OUTPUT] [-v] [-b] [--visualize] [--transcribe] \n\
+ [--audio_effect]",
+ )
+ parser.add_argument(
+ "file",
+ help=f"{fg.CYAN}The input audio, video file, or directory.{RESET}",
+ )
+ parser.add_argument(
+ "-e",
+ "--effect",
+ choices=[
+ "robotic",
+ "deep",
+ "high",
+ "echo",
+ "reverb",
+ "whisper",
+ "demonic",
+ "chipmunk",
+ "hacker",
+ "lowpass",
+ "highpass",
+ "distortion",
+ "denoise",
+ ],
+ help=f"{fg.CYAN}The voice effect to apply.{RESET}",
+ )
+ parser.add_argument(
+ "--cutoff",
+ type=int,
+ help=f"Cutoff frequency for denoise operation, defualt={fg.YELLOW}200{RESET}",
+ )
+ parser.add_argument(
+ "-N",
+ "--noise",
+ choices=["low", "high", "both"],
+ type=str,
+ default="low",
+ help=f"Specifies the type of noise to remove choices:[{fg.BLUE}low, high, both{RESET}] defualt={fg.YELLOW}low{RESET}",
+ )
+ parser.add_argument(
+ "-o",
+ "--output",
+ help=f"{fg.CYAN}Output directory for modified files.{RESET}",
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help=f"{fg.CYAN}Increase output verbosity.{RESET}",
+ )
+ parser.add_argument(
+ "-b",
+ "--batch",
+ action="store_true",
+ help=f"{fg.CYAN}Batch process all files in a directory.{RESET}",
+ )
+ parser.add_argument(
+ "--visualize",
+ action="store_true",
+ help=f"{fg.CYAN}Visualize the audio waveform before and after modification.{RESET}",
+ )
+ parser.add_argument(
+ "--transcribe",
+ action="store_true",
+ help=f"{fg.CYAN}Transcribe the audio content before applying the effect.{RESET}",
+ )
+ parser.add_argument("--audio_effect", action="store_true", help=argparse.SUPPRESS)
+
+ args = parser.parse_args(argsv) if argsv else parser.parse_args()
+ if args.cutoff:
+ config = Config()
+ config.options["cutoff"] = args.cutoff
+ config.options["noise"] = args.noise
+ # Call argument processor
+ ArgumentsProcessor(args, parser).process()
+
+
+if __name__ == "__main__":
+ cli()
diff --git a/audiobot/config/__init__.py b/audiobot/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/audiobot/config/core.py b/audiobot/config/core.py
new file mode 100644
index 0000000..5efa61d
--- /dev/null
+++ b/audiobot/config/core.py
@@ -0,0 +1,8 @@
+class Config:
+ _instance = None
+
+ def __new__(cls):
+ if cls._instance is None:
+ cls._instance = super(Config, cls).__new__(cls)
+ cls._instance.options = {}
+ return cls._instance
diff --git a/audiobot/core/__init__.py b/audiobot/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/audiobot/core/audio/core.py b/audiobot/core/audio/core.py
new file mode 100644
index 0000000..623cab6
--- /dev/null
+++ b/audiobot/core/audio/core.py
@@ -0,0 +1,283 @@
+import numpy as np
+from ...utils.logging_utils import colored_logger
+import librosa
+from pydub import AudioSegment, effects
+from scipy.signal import butter, lfilter, sosfilt
+from ...config.core import Config
+from filewarp.utils.colors import fg, rs
+
+RESET = rs
+
+Clogger = colored_logger()
+config = Config()
+
+
+class AudioModulator:
+ def __init__(self):
+ self._cutoff = config.options.get("cutoff")
+
+ def pitch_shift(self, audio_segment, n_steps):
+ # Convert the audio samples to a NumPy array in float32
+ samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
+
+ # If the audio is stereo, convert it to mono
+ if audio_segment.channels == 2:
+ samples = audio_segment.set_channels(1)
+
+ # Convert the samples back to NumPy array and flaoting point
+ samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
+
+ # Pitch shift (no need to pass sample_rate separately)
+ shifted_samples = librosa.effects.pitch_shift(
+ samples, sr=audio_segment.frame_rate, n_steps=n_steps
+ )
+
+ # Convert the shifted samples back to int16
+ shifted_audio = AudioSegment(
+ shifted_samples.astype(np.int16).tobytes(),
+ frame_rate=audio_segment.frame_rate,
+ sample_width=audio_segment.sample_width,
+ channels=audio_segment.channels,
+ )
+
+ return shifted_audio
+
+ def hacker(self, audio_segment):
+ """Applies a deep, robotic voice effect used for anonymity."""
+
+ # Step 1: Pitch shift down (lower the pitch)
+ Clogger.info("Applying deep pitch shift for hacker voice")
+ deep_voice = self.pitch_shift(audio_segment, n_steps=-10)
+
+ # Step 2: Speed up for robotic effect
+ Clogger.info("Speeding up for robotic effect")
+ robotic_voice = effects.speedup(deep_voice, playback_speed=1.1)
+ if robotic_voice is None:
+ Clogger.error("Speedup failed")
+ return None
+
+ # Step 3: Apply reverb (check for validity)
+ Clogger.info("Adding subtle echo for distortion")
+ if isinstance(robotic_voice, AudioSegment):
+ # Shorter delay for subtle echo
+ delay = AudioSegment.silent(duration=500)
+
+ Clogger.info("Overlaying echo effect")
+
+ try:
+ echo_effect = robotic_voice.overlay(delay + robotic_voice - 5000)
+ except Exception as e:
+ Clogger.error(f"Error during overlay: {e}")
+ return None
+ else:
+ Clogger.error("Robotic voice generation failed")
+ return None
+
+ # Step 4: Apply low-pass filter (optional)
+ hacker_voice_effect = (
+ effects.low_pass_filter(echo_effect, cutoff=2500) if echo_effect else None
+ )
+ if hacker_voice_effect is None:
+ Clogger.error("Low pass filter failed")
+ return None
+
+ return hacker_voice_effect
+
+ def echo(self, samples, delay=0.2, decay=0.5, sample_rate=44100):
+ """Apply echo effect with a specified delay and decay."""
+ delay_samples = int(sample_rate * delay)
+ echo_signal = np.zeros(len(samples) + delay_samples)
+
+ echo_signal[: len(samples)] = samples
+ echo_signal[delay_samples:] += decay * samples # Delayed echo signal
+
+ return echo_signal[: len(samples)] # Truncate to original length
+
+ def reverb(self, samples, decay=0.7, delay=0.05, sample_rate=44100):
+ try:
+ """Apply a reverb effect by adding delayed and attenuated copies of the signal."""
+ delay_samples = int(sample_rate * delay)
+
+ # Create a delayed version of the samples and attenuate (apply decay)
+ reverb_samples = np.zeros_like(samples)
+
+ if samples.ndim == 2: # Stereo
+ for i in range(delay_samples, len(samples)):
+ reverb_samples[i, 0] = (
+ samples[i, 0] + decay * samples[i - delay_samples, 0]
+ )
+ reverb_samples[i, 1] = (
+ samples[i, 1] + decay * samples[i - delay_samples, 1]
+ )
+ else: # Mono
+ for i in range(delay_samples, len(samples)):
+ reverb_samples[i] = samples[i] + decay * samples[i - delay_samples]
+
+ return reverb_samples
+ except Exception as e:
+ Clogger.error(e)
+ # raise
+
+ def lowpass_filter(self, samples, cutoff=200, sample_rate=44100):
+ """
+ Apply a low-pass filter to remove frequencies higher than the specified cutoff.
+
+ This function uses a 6th-order Butterworth filter to attenuate frequencies above the
+ cutoff frequency, effectively smoothing the audio signal.
+
+ Args:
+ samples (numpy.ndarray): The audio samples as a NumPy array.
+ cutoff (int, optional): The cutoff frequency in Hz. Defaults to 200.
+ Typical cutoff values:
+ - Voice: 1000-2000 Hz
+ - Music: 5000-8000 Hz
+ - Hiss/noise removal: 200-500 Hz
+ sample_rate (int, optional): The sample rate of the audio in Hz. Defaults to 44100.
+
+ Returns:
+ numpy.ndarray: The filtered audio samples as a NumPy array.
+ """
+
+ cutoff = self._cutoff if self._cutoff else cutoff
+ Clogger.debug(f"{fg.BLUE}cutoff: {fg.CYAN}{cutoff}{RESET}")
+ Clogger.info("Apply a low-pass filter to remove frequencies higher than cutoff")
+ nyquist = 0.5 * sample_rate
+ normal_cutoff = cutoff / nyquist
+ b, a = butter(6, normal_cutoff, btype="low", analog=False)
+ filtered_samples = lfilter(b, a, samples)
+
+ return filtered_samples
+
+ def distort(self, samples, gain=10, threshold=0.3):
+ """Apply distortion by clipping the waveform."""
+ Clogger.info("Apply distortion by clipping the waveform.")
+ samples = samples * gain
+ samples = np.clip(samples, -threshold, threshold) # Clip at threshold
+ return samples
+
+ def whisper(self, audio_segment):
+ return effects.low_pass_filter(audio_segment, 70).apply_gain(-10)
+
+ def highpass(self, audio_segment, cutoff: int = 200):
+ cutoff = self._cutoff if self._cutoff else cutoff
+ Clogger.info(f"Cutoff: {fg.BBLUE}{cutoff}{RESET}")
+ return effects.high_pass_filter(audio_segment, cutoff=cutoff)
+
+ def lowpass(self, audio_segment, cutoff: int = 2200):
+ cutoff = self._cutoff if self._cutoff else cutoff
+ Clogger.info(f"Cutoff: {fg.BBLUE}{cutoff}{RESET}")
+ return effects.low_pass_filter(audio_segment, cutoff=cutoff)
+
+ def normalize(self, audio_segment):
+ return effects.normalize(audio_segment)
+
+
+class AudioDenoiser:
+ def __init__(self, sample_rate=44100):
+ self.sample_rate = sample_rate
+ # Dictionaries to cache filter coefficients by cutoff value
+ self._sos_low = {}
+ self._sos_high = {}
+ self._cutoff = config.options.get("cutoff")
+ Clogger.debug(f"{fg.BLUE}cutoff: {fg.CYAN}{self._cutoff}{RESET}")
+
+ def lowpass_filter(
+ self, samples: np.ndarray, cutoff: int = 2200, order: int = 6
+ ) -> np.ndarray:
+ """
+ Apply a 6th-order low-pass Butterworth filter to remove frequencies above the cutoff.
+
+ Args:
+ samples (np.ndarray): The input audio samples.
+ cutoff (int, optional): Cutoff frequency in Hz. Defaults to 2200.
+ order (int, optional): Order of the filter. Defaults to 6.
+
+ Returns:
+ np.ndarray: The low-pass filtered audio samples.
+ """
+ cutoff = self._cutoff if self._cutoff else cutoff
+
+ if not isinstance(samples, np.ndarray):
+ raise ValueError("Input samples must be a NumPy array")
+
+ nyquist = 0.5 * self.sample_rate
+ if cutoff >= nyquist:
+ Clogger.warn(f"Cutoff frequency must be less than Nyquist ({nyquist} Hz)")
+ cutoff = nyquist - (nyquist * 0.1)
+
+ # Cache coefficients to avoid recomputation for the same cutoff value.
+ if cutoff not in self._sos_low:
+ self._sos_low[cutoff] = butter(
+ order, cutoff / nyquist, btype="low", analog=False, output="sos"
+ )
+
+ return sosfilt(self._sos_low[cutoff], samples)
+
+ def highpass_filter(
+ self, samples: np.ndarray, cutoff: int = 200, order: int = 30
+ ) -> np.ndarray:
+ """
+ Apply a 6th-order high-pass Butterworth filter to remove frequencies below the cutoff.
+
+ Args:
+ samples (np.ndarray): The input audio samples.
+ cutoff (int, optional): Cutoff frequency in Hz. Defaults to 200.
+ order (int, optional): Order of the filter. Defaults to 6.
+
+ Returns:
+ np.ndarray: The high-pass filtered audio samples.
+ """
+
+ cutoff = self._cutoff if self._cutoff else cutoff
+
+ if not isinstance(samples, np.ndarray):
+ raise ValueError("Input samples must be a NumPy array")
+
+ nyquist = 0.5 * self.sample_rate
+ if cutoff <= 0:
+ raise ValueError("Cutoff frequency must be positive")
+
+ if cutoff not in self._sos_high:
+ self._sos_high[cutoff] = butter(
+ order, cutoff / nyquist, btype="high", analog=False, output="sos"
+ )
+
+ return sosfilt(self._sos_high[cutoff], samples)
+
+ def denoise(
+ self,
+ samples: np.ndarray,
+ lowpass_cutoff: int = 2200,
+ highpass_cutoff: int = 200,
+ order: int = 6,
+ ) -> np.ndarray:
+ """
+ Denoise the audio by sequentially applying a low-pass filter and a high-pass filter.
+ This combination effectively acts as a band-pass filter,
+ removing both high-frequency noise (hiss) and low-frequency rumble.
+
+ Args:
+ samples (np.ndarray): The input audio samples.
+ lowpass_cutoff (int, optional): Cutoff frequency for low-pass filtering. Defaults to 2200 Hz.
+ highpass_cutoff (int, optional): Cutoff frequency for high-pass filtering. Defaults to 200 Hz.
+ order (int, optional): Order of the filters. Defaults to 6.
+
+ Returns:
+ np.ndarray: The denoised audio samples.
+ """
+ noise = config.options.get("noise") if config.options.get("noise") else "low"
+
+ Clogger.info(
+ f"{fg.BLUE}Noise: {fg.CYAN}{config.options.get('noise')}{RESET}"
+ )
+ if noise == "low":
+ # Remove high-frequency noise
+ return self.lowpass_filter(samples, cutoff=lowpass_cutoff, order=order)
+ if noise == "high":
+ # Remove low-frequency noise
+ return self.highpass_filter(samples, cutoff=highpass_cutoff, order=order)
+ if noise == "both":
+ # Remove high-frequency noise
+ filtered = self.lowpass_filter(samples, cutoff=lowpass_cutoff, order=order)
+ # Remove low-frequency noise
+ return self.highpass_filter(filtered, cutoff=highpass_cutoff, order=order)
diff --git a/audiobot/core/codec.py b/audiobot/core/codec.py
new file mode 100644
index 0000000..e107d65
--- /dev/null
+++ b/audiobot/core/codec.py
@@ -0,0 +1,67 @@
+from pydub import AudioSegment
+import numpy as np
+
+
+class AudioSegmentArrayCodec:
+ """
+ This class provides functionality to convert between pydub AudioSegments and NumPy arrays.
+
+ It allows for the following conversions:\n
+ 1. AudioSegments to NumPy arrays.
+ 2. NumPy arrays to AudioSegments.
+ """
+
+ def __init__(self):
+ """
+ Initializes the AudioSegmentArrayCodec object.
+ Currently, this constructor does not perform any specific operations.
+ """
+ self = self # Note: This line has no effect and can be removed.
+
+ def numpy_to_audiosegment(self, samples, sample_rate, sample_width, channels):
+ """
+ Converts a NumPy array to a pydub AudioSegment.
+
+ Args:
+ samples (numpy.ndarray): The NumPy array representing the audio samples.
+ sample_rate (int): The sample rate of the audio in Hz.
+ sample_width (int): The sample width in bytes (e.g., 2 for 16-bit audio).
+ channels (int): The number of audio channels (1 for mono, 2 for stereo).
+
+ Returns:
+ pydub.AudioSegment: An AudioSegment object created from the NumPy array.
+ """
+ # Flatten the array if it has 2 channels (stereo)
+ if len(samples.shape) == 2 and channels == 2:
+ samples = samples.flatten()
+
+ # Convert the NumPy array to raw audio data
+ raw_data = samples.tobytes()
+
+ # Create a new AudioSegment using the raw audio data
+ return AudioSegment(
+ data=raw_data,
+ sample_width=sample_width,
+ frame_rate=sample_rate,
+ channels=channels,
+ )
+
+ def audiosegment_to_numpy(self, audio_segment):
+ """
+ Converts a pydub AudioSegment to a NumPy array.
+
+ Args:
+ audio_segment (pydub.AudioSegment): The AudioSegment object to convert.
+
+ Returns:
+ tuple: A tuple containing:
+ - numpy.ndarray: The NumPy array representing the audio samples.
+ - int: The sample rate of the audio in Hz.
+ """
+ samples = np.array(audio_segment.get_array_of_samples())
+
+ # If stereo, reshape to (n_samples, 2)
+ if audio_segment.channels == 2:
+ samples = samples.reshape((-1, 2))
+
+ return samples, audio_segment.frame_rate
diff --git a/audiobot/core/effects.py b/audiobot/core/effects.py
new file mode 100644
index 0000000..36e14da
--- /dev/null
+++ b/audiobot/core/effects.py
@@ -0,0 +1,115 @@
+from pydub import effects
+from .codec import AudioSegmentArrayCodec
+from .audio.core import AudioModulator
+from ..utils.logging_utils import colored_logger
+from pydub import AudioSegment
+
+# logger = colored_logger()
+
+
+class VoiceEffectProcessor:
+ def __init__(self, audio_segment, effect: str, verbosity: bool = False):
+ self.effect = effect.lower()
+ self.audio_segment = audio_segment
+ self.verbosity = verbosity
+ self.handler = AudioSegmentArrayCodec()
+ self.logger = colored_logger()
+
+ def _apply_chipmunk(self):
+ return AudioModulator().pitch_shift(
+ effects.speedup(self.audio_segment, 1.01), n_steps=9
+ )
+
+ def _apply_high(self):
+ return AudioModulator().pitch_shift(self.audio_segment, n_steps=4)
+
+ def _apply_lowpass(self):
+ return AudioModulator().lowpass(self.audio_segment)
+
+ def _apply_highpass(self):
+ return AudioModulator().highpass(self.audio_segment)
+
+ def _apply_robotic(self):
+ return AudioModulator().pitch_shift(
+ effects.speedup(self.audio_segment, 1.01), n_steps=-10
+ )
+
+ def _apply_demonic(self):
+ return (
+ AudioModulator()
+ .pitch_shift(effects.speedup(self.audio_segment, 1.01), n_steps=-10)
+ .overlay(
+ AudioSegment.silent(duration=700) + self.audio_segment.fade_out(500)
+ )
+ )
+
+ def _apply_hacker(self):
+ return AudioModulator().hacker(self.audio_segment)
+
+ def _apply_distortion(self):
+ samples, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment)
+ distorted_samples = AudioModulator().distort(samples)
+ return self.handler.numpy_to_audiosegment(
+ distorted_samples,
+ sample_rate,
+ self.audio_segment.sample_width,
+ self.audio_segment.channels,
+ )
+
+ def _apply_deep(self):
+ return AudioModulator().pitch_shift(self.audio_segment, n_steps=-4)
+
+ def _apply_echo(self):
+ delay = AudioSegment.silent(duration=1000)
+ return self.audio_segment.overlay(delay + self.audio_segment)
+
+ def _apply_whisper(self):
+ return AudioModulator().whisper(self.audio_segment)
+
+ def _apply_reverb(self):
+ samples, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment)
+ reverbed_samples = AudioModulator().reverb(samples)
+ return self.handler.numpy_to_audiosegment(
+ reverbed_samples,
+ sample_rate,
+ self.audio_segment.sample_width,
+ self.audio_segment.channels,
+ )
+
+ def denoise(self):
+ from .modulator import AudioDenoiser
+
+ sample, sample_rate = self.handler.audiosegment_to_numpy(self.audio_segment)
+ denoised_sample = AudioDenoiser().denoise(sample)
+ audio_segment = self.handler.numpy_to_audiosegment(
+ denoised_sample,
+ sample_rate,
+ self.audio_segment.sample_width,
+ self.audio_segment.channels,
+ )
+ return audio_segment
+
+ def _get_effects(self):
+ return {
+ "chipmunk": self._apply_chipmunk,
+ "high": self._apply_high,
+ "lowpass": self._apply_lowpass,
+ "robotic": self._apply_robotic,
+ "demonic": self._apply_demonic,
+ "hacker": self._apply_hacker,
+ "distortion": self._apply_distortion,
+ "deep": self._apply_deep,
+ "echo": self._apply_echo,
+ "whisper": self._apply_whisper,
+ "reverb": self._apply_reverb,
+ "denoise": self.denoise,
+ "highpass": self._apply_highpass,
+ }
+
+ def apply_effect(self):
+ effect_handler = self._get_effects().get(self.effect)
+ if effect_handler:
+ return effect_handler()
+ elif self.verbosity:
+ self.logger.critical(f"Unknown voice effect: {self.effect}")
+ return self.audio_segment # Return unmodified audio if effect is unknown
diff --git a/audiobot/core/processor.py b/audiobot/core/processor.py
new file mode 100644
index 0000000..88a50ed
--- /dev/null
+++ b/audiobot/core/processor.py
@@ -0,0 +1,154 @@
+import os
+from .audio.core import AudioModulator
+from moviepy import AudioFileClip, VideoFileClip
+from ..utils.logging_utils import colored_logger
+from pydub import AudioSegment
+from ..utils.visualizer import audiowave_visualizer
+from ..utils.metadata_utils import get_audio_bitrate
+from .effects import VoiceEffectProcessor
+from filewarp.utils.colors import fg, rs
+import sys
+# import io
+
+RESET = rs
+
+Clogger = colored_logger()
+
+
+class VideoProcessor:
+ def __init__(self):
+ pass
+
+ def process_video_file(
+ self,
+ input_file,
+ effect,
+ output_dir,
+ verbosity: bool = False,
+ visualize: bool = False,
+ ):
+ """
+ Process video file by applying audio effects and retaining original bitrate.
+ """
+
+ Clogger.info(f"Set Voice effect : {fg.MAGENTA}{effect}{RESET}")
+ Clogger.info(f"Processing video file: {input_file}")
+
+ try:
+ # Get the original video bitrate
+ original_bitrate = get_audio_bitrate(input_file, verbosity)
+ if verbosity and original_bitrate:
+ Clogger.info(
+ f"Original video bitrate: {fg.YELLOW}{original_bitrate}{RESET}"
+ )
+
+ # Capture stdout and stderr
+ old_stdout = sys.stdout
+ old_stderr = sys.stderr
+ # sys.stdout = captured_stdout = io.StringIO()
+ # sys.stderr = captured_stderr = io.StringIO()
+
+ # Load the video
+ try:
+ video = VideoFileClip(input_file)
+ finally:
+ sys.stdout = old_stdout # Restore stdout
+ sys.stderr = old_stderr # Restore stder
+ audio_file = "temp_audio.wav"
+
+ # Extract audio and save it to a file
+ if verbosity:
+ Clogger.info("Extract audio and write it to file")
+ video.audio.write_audiofile(audio_file)
+ audio_segment = AudioSegment.from_file(audio_file)
+
+ # Apply the selected voice effect
+ Clogger.info(
+ f"Applying the [{fg.BBWHITE}{effect}{RESET}{fg.GREEN}] effect"
+ )
+ modified_audio = VoiceEffectProcessor(audio_segment, effect).apply_effect()
+
+ # Normalize the modified audio
+ modified_audio = AudioModulator().normalize(modified_audio)
+
+ # Export the modified audio to a WAV file
+ if verbosity:
+ Clogger.info("Export the modified audio to a WAV file")
+ modified_audio.export("modified_audio.wav", format="wav")
+
+ # Load the modified audio file back into an AudioFileClip
+ new_audio = AudioFileClip("modified_audio.wav")
+
+ # Set the video to use the modified audio
+ if verbosity:
+ Clogger.info("Set the video audio to the new modified audio")
+ final_video = video.with_audio(new_audio)
+
+ # Define the output file path
+ output_file = os.path.join(
+ output_dir, f"{effect}_{os.path.basename(input_file)}"
+ )
+
+ # Use the original bitrate or default to 5000k if unavailable
+ if verbosity:
+ Clogger.info(
+ f"Set:\n\tCodec = [{fg.MAGENTA}libx264{fg.GREEN}\n"
+ f"\tCodec type = [{fg.MAGENTA}aac{fg.GREEN}\n"
+ f"\tBitrate = [{fg.MAGENTA}{original_bitrate or '5000k'}{RESET}]"
+ )
+
+ final_video.write_videofile(
+ output_file,
+ codec="libx264",
+ audio_codec="aac",
+ bitrate=original_bitrate or "5000k",
+ )
+
+ Clogger.info(f"Modified video saved as: {output_file}")
+ Clogger.debug(f"Final bitrate = {get_audio_bitrate(output_file)}")
+ # Optional: visualize the before and after audio
+ if visualize:
+ audiowave_visualizer(audio_file, "modified_audio.wav")
+
+ # Clean up temporary files
+ if os.path.exists(audio_file):
+ os.remove(audio_file)
+ os.remove("modified_audio.wav")
+
+ except KeyboardInterrupt:
+ Clogger.info("Quit")
+ sys.exit(1)
+ except Exception as e:
+ Clogger.error(f"Error processing video file {input_file}: {e}")
+ # raise
+
+
+class AudioProcessor:
+ def __init__(self):
+ pass
+
+ def process_audio_file(
+ self, input_file, effect, output_dir, verbosity, visualize=False
+ ):
+ Clogger.info(f"Set Voice effect : {fg.MAGENTA}{effect}{RESET}")
+
+ Clogger.info(f"Processing audio file: {fg.MAGENTA}{input_file}{RESET}")
+
+ try:
+ audio_segment = AudioSegment.from_file(input_file)
+ if verbosity:
+ print(f"- INFO - Audio channels: {audio_segment.channels}")
+ print(f"- INFO - Audio sample width: {audio_segment.sample_width}")
+ modified_audio = VoiceEffectProcessor(audio_segment, effect).apply_effect()
+ modified_audio = AudioModulator().normalize(modified_audio)
+ output_file = os.path.join(
+ output_dir, f"{effect}_{os.path.basename(input_file)}"
+ )
+ modified_audio.export(output_file, format="wav")
+ Clogger.info(f"Modified audio saved as: {output_file}")
+
+ if visualize:
+ audiowave_visualizer(input_file, output_file)
+
+ except Exception as e:
+ Clogger.error(f"Error processing audio file {input_file}: {e}")
diff --git a/audiobot/utils/logging_utils.py b/audiobot/utils/logging_utils.py
new file mode 100644
index 0000000..09914a5
--- /dev/null
+++ b/audiobot/utils/logging_utils.py
@@ -0,0 +1,40 @@
+import logging
+from filewarp.utils.colors import fg, rs
+
+RESET = rs
+
+
+class LoggingFormatter(logging.Formatter):
+ COLORS = {
+ logging.DEBUG: fg.BBLUE,
+ logging.INFO: fg.GREEN,
+ logging.WARNING: fg.YELLOW,
+ logging.ERROR: fg.RED,
+ logging.CRITICAL: fg.MAGENTA,
+ }
+
+ def format(self, record):
+ log_color = self.COLORS.get(record.levelno, fg.WHITE)
+ log_message = super().format(record)
+ return f"{log_color}{log_message}{RESET}"
+
+
+def colored_logger(logger_name="colored_logger") -> logging.Logger:
+ """
+ Sets up a colored logger with a single handler.
+
+ Returns:
+ logging.Logger: The configured logger.
+ """
+ logger = logging.getLogger(logger_name)
+
+ if not logger.handlers: # Check if handlers already exist
+ handler = logging.StreamHandler()
+ handler.setFormatter(LoggingFormatter("- %(levelname)s - %(message)s"))
+ logger.addHandler(handler)
+ logger.setLevel(logging.INFO)
+
+ # Prevent log messages from propagating to the root logger.
+ logger.propagate = False
+
+ return logger
diff --git a/audiobot/utils/metadata_utils.py b/audiobot/utils/metadata_utils.py
new file mode 100644
index 0000000..1763fec
--- /dev/null
+++ b/audiobot/utils/metadata_utils.py
@@ -0,0 +1,55 @@
+import speech_recognition as sr
+import ffmpeg
+from .logging_utils import colored_logger
+from filewarp.utils.colors import fg, rs
+
+RESET = rs
+
+Clogger = colored_logger()
+
+
+def get_audio_bitrate(input_file, verbosity=False):
+ """
+ Probes a media file using ffmpeg and returns its metadata.
+
+ Args:
+ input_file (str): The path to the media file.
+
+ Returns:
+ int: bitrate
+
+ Raises:
+ ffmpeg.Error: If ffmpeg returns a non-zero exit code.
+ FileNotFoundError: If the input file does not exist.
+ Exception: For other errors during probing.
+ """
+ if verbosity:
+ Clogger.info(
+ f"Fetch the original bitrate of the video file using {fg.YELLOW}ffmpeg{RESET}."
+ )
+ try:
+ metadata = ffmpeg.probe(input_file)
+ bitrate = None
+ # Iterate over the streams and find the video stream
+ for stream in metadata["streams"]:
+ if stream["codec_type"] == "video":
+ bitrate = stream.get("bit_rate", None)
+ break
+ return bitrate
+ except ffmpeg.Error or Exception as e:
+ Clogger.error(f"Error fetching bitrate for {input_file}: {e}")
+ return None
+
+
+def transcribe_audio(input_file):
+ Clogger.info(f"Transcribing audio: {input_file}")
+ try:
+ recognizer = sr.Recognizer()
+ with sr.AudioFile(input_file) as source:
+ audio = recognizer.record(source)
+ transcription = recognizer.recognize_google(audio)
+ Clogger.info(f"Transcription: {transcription}")
+ return transcription
+ except Exception as e:
+ Clogger.error(f"Error transcribing audio file {input_file}: {e}")
+ return None
diff --git a/audiobot/utils/visualizer.py b/audiobot/utils/visualizer.py
new file mode 100644
index 0000000..d4a5baa
--- /dev/null
+++ b/audiobot/utils/visualizer.py
@@ -0,0 +1,25 @@
+import matplotlib.pyplot as plt
+import soundfile as sf
+from .logging_utils import colored_logger
+
+
+Clogger = colored_logger()
+
+
+def audiowave_visualizer(original_file, modified_file):
+ Clogger.info(f"Visualizing audio: {original_file} and {modified_file}")
+ try:
+ original_data, original_sr = sf.read(original_file)
+ modified_data, modified_sr = sf.read(modified_file)
+
+ plt.figure(figsize=(14, 5))
+ plt.subplot(2, 1, 1)
+ plt.plot(original_data)
+ plt.title("Original Audio Waveform")
+ plt.subplot(2, 1, 2)
+ plt.plot(modified_data)
+ plt.title("Modified Audio Waveform")
+ plt.show()
+
+ except Exception as e:
+ Clogger.error(f"Error visualizing audio: {e}")
diff --git a/audiobot/version.txt b/audiobot/version.txt
new file mode 100644
index 0000000..0c62199
--- /dev/null
+++ b/audiobot/version.txt
@@ -0,0 +1 @@
+0.2.1
diff --git a/build/lib/filemac/AudioExtractor.py b/build/lib/filemac/AudioExtractor.py
deleted file mode 100644
index 65172b1..0000000
--- a/build/lib/filemac/AudioExtractor.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import sys
-from moviepy.editor import VideoFileClip
-import logging
-import logging.handlers
-###############################################################################
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class ExtractAudio:
- def __init__(self, input_file):
- self.input_file = input_file
-
- def preprocess(self):
- try:
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- ls = ["mp4", "mkv"]
- if os.path.isfile(file_path) and any(file_path.lower().endswith(ext) for ext in ls):
- files_to_process.append(file_path)
-
- return files_to_process
- except Exception as e:
- print(e)
-
- def moviepyextract(self):
- try:
- video_list = self.preprocess()
- for input_video in video_list:
- print("\033[1;33mExtracting..\033[1;36m")
- video = VideoFileClip(input_video)
- audio = video.audio
- basename, _ = os.path.splitext(input_video)
- outfile = basename + ".wav"
- audio.write_audiofile(outfile)
- # print(f"\033[1;32mFile saved as \033[36m{outfile}\033[0m")
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- print(e)
-
-
-if __name__ == "__main__":
- vi = ExtractAudio(
- "/home/skye/Music/Melody in My Mind.mp4")
- vi.moviepyextract()
diff --git a/build/lib/filemac/OCRTextExtractor.py b/build/lib/filemac/OCRTextExtractor.py
deleted file mode 100644
index 392ff6d..0000000
--- a/build/lib/filemac/OCRTextExtractor.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import os
-import sys
-import cv2
-import pytesseract
-from PIL import Image
-import logging
-import logging.handlers
-###############################################################################
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-###############################################################################
-'''Do OCR text extraction from a given image file and display the extracted
- text
- to the screen finally save it to a text file assuming the name of the input
- file'''
-
-###############################################################################
-
-
-class ExtractText:
- def __init__(self, input_file):
- self.input_file = input_file
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def OCR(self):
- image_list = self.preprocess()
- ls = ['png', 'jpg']
- image_list = [
- item for item in image_list if any(item.lower().endswith(ext)
- for ext in ls)]
-
- def ocr_text_extraction(image_path):
- '''Load image using OpenCV'''
- img = cv2.imread(image_path)
-
- logger.info(f"\033[2;95mprocessing {image_path}...\033[0m")
-
- try:
- '''Preprocess image for better OCR results'''
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- thresh = cv2.threshold(
- gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
- img_pil = Image.fromarray(thresh)
-
- '''Perform OCR using pytesseract'''
- config = ("-l eng --oem 3 --psm 6")
- text = pytesseract.image_to_string((img_pil), config=config)
-
- '''Remove extra whitespaces and newlines
- text = ' '.join(text.split()).strip()'''
- logger.info("\033[36mFound:\n\033[0m")
- print(text)
- current_path = os.getcwd()
- file_path = os.path.join(current_path, OCR_file)
- ''' Save the extracted text to specified file '''
- logger.info("\033[1;92mGenerating text file for the extracted \
-text..\033[0m")
-
- with open(file_path, 'w') as file:
- file.write(text)
- logger.info(
- f"File saved as \033[1;93m{OCR_file}\033[0m:")
- '''If there are multiple candidate images for text extraction,
- wait for key press before proceeding to the next
- image otherwise don't wait
- size = [i for i in enumerate(image_list)]'''
- if len(image_list) >= 2:
- input("\033[5;97mPress Enter to continue\033[0m")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(0)
- except FileNotFoundError as e:
- logger.error(f"Error: {str(e)}")
- except IOError as e:
- logger.error(
- f"Could not write to output file '{OCR_file}'. \
-Reason: {str(e)}\033[0m")
- except Exception as e:
- logger.error(f"Error: {type(e).__name__}: {str(e)}")
- except Exception as e:
- logger.error(f"Error:>>\033[31m{e}\033[0m")
- return text
-
- for image_path in image_list:
- OCR_file = image_path[:-4] + ".txt"
- ocr_text_extraction(image_path)
diff --git a/build/lib/filemac/Simple_v_Analyzer.py b/build/lib/filemac/Simple_v_Analyzer.py
deleted file mode 100644
index 3b492bc..0000000
--- a/build/lib/filemac/Simple_v_Analyzer.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import sys
-import cv2
-import numpy as np
-
-
-class SA:
-
- def __init__(self, video):
- self.video = video
-
- def SimpleAnalyzer(self):
- try:
- # Read the video file
- cap = cv2.VideoCapture(self.video)
- print("\033[1;33mInitializing..\033[0m")
- # Initialize variables
- frame_count = 0
- total_area = 0
- duration = 0
-
- print("\033[1;36mWorking on it")
- while True:
- ret, frame = cap.read()
-
- if not ret:
- break
- # Increase frame count and accumulate area
- frame_count += 1
- total_area += np.prod(frame.shape[:2])
-
- # Calculate current frame duration
- fps = cap.get(cv2.CAP_PROP_FPS)
- duration += 1 / fps
-
- # Display the resulting frame
- cv2.imshow('Frame', frame)
-
- # Break the loop after pressing 'q'
- if cv2.waitKey(1) == ord('q'):
- break
-
- # Release the video capture object and close all windows
- cap.release()
- cv2.destroyAllWindows()
-
- # Print results
- print(f"Total Frames: \033[1;32m{frame_count}\033[0m")
- print(f"Average Frame Area: \033[1;32m{total_area / frame_count}\033[0m")
- print(f"Duration: \033[1;32m{duration}\033[0m seconds")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- print(e)
- sys.exit(1)
-
-
-if __name__ == "__main__":
- vi = SA("/home/skye/Music/Melody in My Mind.mp4")
- vi.SimpleAnalyzer()
diff --git a/build/lib/filemac/__init__.py b/build/lib/filemac/__init__.py
deleted file mode 100644
index e32c40a..0000000
--- a/build/lib/filemac/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .fmac import main
diff --git a/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc b/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc
deleted file mode 100644
index 36b350c..0000000
Binary files a/build/lib/filemac/__pycache__/AudioExtractor.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc b/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc
deleted file mode 100644
index 2e0efeb..0000000
Binary files a/build/lib/filemac/__pycache__/OCRTextExtractor.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc b/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc
deleted file mode 100644
index a29f114..0000000
Binary files a/build/lib/filemac/__pycache__/Simple_v_Analyzer.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/converter.cpython-311.pyc b/build/lib/filemac/__pycache__/converter.cpython-311.pyc
deleted file mode 100644
index cbc7e1f..0000000
Binary files a/build/lib/filemac/__pycache__/converter.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/__pycache__/formats.cpython-311.pyc b/build/lib/filemac/__pycache__/formats.cpython-311.pyc
deleted file mode 100644
index d2b6f26..0000000
Binary files a/build/lib/filemac/__pycache__/formats.cpython-311.pyc and /dev/null differ
diff --git a/build/lib/filemac/colors.py b/build/lib/filemac/colors.py
deleted file mode 100644
index 7e03e49..0000000
--- a/build/lib/filemac/colors.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-
-from colorama import Fore, Style, init
-
-init(autoreset=True)
-
-if os.name == "posix":
- RESET = '\033[0m'
- RED = '\033[91m'
- DRED = '\033[1;91m'
- GREEN = '\033[92m'
- DGREEN = '\033[1;92m'
- YELLOW = '\033[93m'
- DYELLOW = '\033[1;93m'
- BLUE = '\033[94m'
- DBLUE = '\033[1;94m'
- MAGENTA = '\033[95m'
- DMAGENTA = '\033[1;95m'
- CYAN = '\033[96m'
- DCYAN = '\033[1;96m'
- ICYAN = '\033[3;96m'
-
-elif os.name == "nt":
- RESET = Style.RESET_ALL
- RED = Fore.LIGHTRED_EX
- DRED = Fore.RED
- GREEN = Fore.LIGHTGREEN_EX
- DGREEN = Fore.GREEN
- YELLOW = Fore.LIGHTYELLOW_EX
- DYELLOW = Fore.YELLOW
- BLUE = Fore.LIGHTBLUE_EX
- DBLUE = Fore.BLUE
- MAGENTA = Fore.LIGHTMAGENTA_EX
- DMAGENTA = Fore.MAGENTA
- CYAN = Fore.LIGHTCYAN_EX
- DCYAN = Fore.CYAN
- ICYAN = Fore.WHITE
-
-#return RESET, RED, DRED, GREEN, DGREEN, YELLOW, DYELLOW, BLUE, DBLUE,
-#MAGENTA, DMAGENTA, CYAN, DCYAN
diff --git a/build/lib/filemac/converter.py b/build/lib/filemac/converter.py
deleted file mode 100644
index a46a46f..0000000
--- a/build/lib/filemac/converter.py
+++ /dev/null
@@ -1,1027 +0,0 @@
-#############################################################################
-import logging
-import logging.handlers
-# import math
-import os
-import re
-import sqlite3
-import subprocess
-import sys
-import time
-import traceback
-# import pdfminer.high_level
-# from typing import Iterable
-from pdf2image import convert_from_path
-import cv2
-import pandas as pd
-import pydub
-import PyPDF2
-# import pytesseract
-import requests
-import speedtest
-from docx import Document
-# from pydub.playback import play
-from gtts import gTTS
-# from PyPDF2 import PdfFileReader
-from moviepy.editor import VideoFileClip
-from pdf2docx import parse
-from PIL import Image
-from pptx import Presentation
-from pydub import AudioSegment
-from .colors import (RESET, GREEN, DGREEN, YELLOW, DYELLOW, CYAN, BLUE, DBLUE,
- MAGENTA, DMAGENTA, RED, DRED, ICYAN)
-from reportlab.lib.pagesizes import letter
-from reportlab.platypus import Paragraph, SimpleDocTemplate
-
-from .formats import (SUPPORTED_AUDIO_FORMATS, SUPPORTED_IMAGE_FORMATS,
- SUPPORTED_VIDEO_FORMATS)
-
-# import pygame
-# from aspose.words import Document as aspose_document
-# from aspose.slides import Presentation as aspose_presentation
-# from show_progress import progress_show
-# from PIL import ImageDraw, ImageFont
-###############################################################################
-
-PYGAME_DETECT_AVX2 = 1
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class MakeConversion:
-
- '''Initialize the class'''
-
- def __init__(self, input_file):
- self.input_file = input_file
-
- '''Check input object whether it's a file or a directory if a file append
- the file to a set and return it otherwise append directory full path
- content to the set and return the set file. The returned set will be
- evaluated in the next step as required on the basis of requested operation
- For every requested operation, the output file if any is automatically
- generated on the basis of the input filename and saved in the sam
- directory as the input file
- '''
-
- def preprocess(self):
- try:
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
- except Exception as e:
- print(e)
-
-###############################################################################
-# Convert word file to pdf document (docx)
-###############################################################################
- def word_to_pdf(self):
- word_list = self.preprocess()
- ls = ["doc", "docx"]
- word_list = [
- item for item in word_list if any(item.lower().endswith(ext) for ext in ls)]
- for word_file in word_list:
- if word_file.lower().endswith("doc"):
- pdf_file = word_file[:-3] + "pdf"
- elif word_file.lower().endswith("docx"):
- pdf_file = word_file[:-4] + "pdf"
-
- try:
- print(
- f'{BLUE}Converting: {RESET}{word_file} {BLUE}to {RESET}{pdf_file}')
- if os.name == 'posix': # Check if running on Linux
- # Use subprocess to run the dpkg and grep commands
- result = subprocess.run(
- ['dpkg', '-l', 'libreoffice'], stdout=subprocess.PIPE, text=True)
- if result.returncode != 0:
- print(
- "Please install libreoffice to use this functionality !")
- sys.exit(1)
- subprocess.run(['soffice', '--convert-to',
- 'pdf', word_file, pdf_file])
- # print(f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}")
- elif os.name == "nt":
- try:
- from docx2pdf import convert
- except ImportError:
- print("Run pip install docx2pdf for this function to work")
- sys.exit(1)
- convert(word_file, pdf_file)
- print(
- f"{DMAGENTA} Successfully converted {word_file} to {pdf_file}{RESET}")
-
- except Exception as e:
- print(f"Error converting {word_file} to {pdf_file}: {e}")
-
-###############################################################################
-# Convert pdf file to word document (docx)
-###############################################################################
- def pdf_to_word(self):
- pdf_list = self.preprocess()
- pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
- for pdf_file in pdf_list:
- if pdf_file.lower().endswith("pdf"):
- word_file = pdf_file[:-3] + "docx"
-
- try:
-
- parse(pdf_file, word_file, start=0, end=None)
-
- print(f'{GREEN}Converting to word..{RESET}', end='\r')
-
- logger.info(f"{DMAGENTA} Successfully converted{pdf_file} \
-to {word_file}{RESET}")
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- logger.info(f'{DRED}All conversion attempts have failed: \
-{e}{RESET}')
-
-###############################################################################
-# Convert text file(s) to pdf document (docx)
-###############################################################################
- def txt_to_pdf(input_file, output_file):
- """Convert a .txt file to a PDF."""
-
- # Read the contents of the input .txt file
- with open(input_file, 'r', encoding='utf-8') as file:
- text_contents = file.readlines()
-
- # Initialize the PDF document
- doc = SimpleDocTemplate(output_file, pagesize=letter)
-
- # Create a story to hold the elements of the PDF
- story = []
-
- # Iterate through each line in the input .txt file and add it to the PDF
- for line in text_contents:
- story.append(Paragraph(line.strip(), style="normalText"))
-
- # Build and write the PDF document
- doc.build(story)
-
-###############################################################################
-# Convert word file(s) to pptx document (pptx/ppt)
-###############################################################################
- def word_to_pptx(self):
- word_list = self.preprocess()
- word_list = [item for item in word_list if item.lower().endswith(
- "docx") or item.lower().endswith("doc")]
-
- for word_file in word_list:
-
- if word_list is None:
- print("Please provide appropriate file type")
- sys.exit(1)
- if word_file.lower().endswith("docx"):
- pptx_file = word_file[:-4] + "pptx"
- elif word_file.lower().endswith("doc"):
- pptx_file = word_file[:-3] + "pptx"
- try:
- # Load the Word document
- print(F"{DYELLOW}Load the Word document..{RESET}")
- doc = Document(word_file)
-
- # Create a new PowerPoint presentation
- print(F"{DYELLOW}Create a new PowerPoint presentation..{RESET}")
- prs = Presentation()
-
- # Iterate through each paragraph in the Word document
- print(
- f"{DGREEN}Populating pptx slides with {DYELLOW}{len(doc.paragraphs)}{DGREEN} entries..{RESET}")
- count = 0
- for paragraph in doc.paragraphs:
- count += 1
- perc = (count/len(doc.paragraphs))*100
- print(
- f"{DMAGENTA}Progress:: \033[1;36m{perc:.2f}%{RESET}", end="\r")
- # Create a new slide in the PowerPoint presentation
- slide = prs.slides.add_slide(prs.slide_layouts[1])
-
- # Add the paragraph text to the slide
- slide.shapes.title.text = paragraph.text
-
- # Save the PowerPoint presentation
- prs.save(pptx_file)
- print(f"\n{DGREEN}Done{RESET}")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- logger.error(e)
-
-###############################################################################
-# Convert word file to txt file'''
-###############################################################################
-
- def word_to_txt(self):
- word_list = self.preprocess()
- word_list = [item for item in word_list if item.lower().endswith(
- "docx") or item.lower().endswith("doc")]
- for file_path in word_list:
- if file_path.lower().endswith("docx"):
- txt_file = file_path[:-4] + "txt"
- elif file_path.lower().endswith("doc"):
- txt_file = file_path[:-3] + "txt"
- try:
- doc = Document(file_path)
- print("INFO Processing...")
-
- with open(txt_file, 'w', encoding='utf-8') as f:
- Par = 0
- for paragraph in doc.paragraphs:
- f.write(paragraph.text + '\n')
- Par += 1
-
- print(f"Par:{BLUE}{Par}/{len(doc.paragraphs)}{RESET}", end='\r')
- logger.info(f"{DMAGENTA}Conversion of file to txt success{RESET}")
-
- except KeyboardInterrupt:
- print("\nExit")
- sys.exit()
- except Exception as e:
- logger.error(
- f"Dear user something went amiss while attempting the conversion:\n {e}")
- with open("conversion.log", "a") as log_file:
- log_file.write(f"Couldn't convert {file_path} to {txt_file}:\
-REASON->{e}")
-
-###############################################################################
-# Convert pdf file to text file
-###############################################################################
- def pdf_to_txt(self):
- pdf_list = self.preprocess()
- pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
- for file_path in pdf_list:
- txt_file = file_path[:-3] + "txt"
- try:
- with open(file_path, 'rb') as file:
- pdf_reader = PyPDF2.PdfReader(file)
- text = ''
- for page_num in range(len(pdf_reader.pages)):
- page = pdf_reader.pages[page_num]
- text += page.extract_text()
- with open(txt_file, 'w', encoding='utf-8') as f:
- f.write(text)
- logger.info(f"{DMAGENTA}Successfully converted {file_path} to \
-{txt_file}{RESET}")
- except Exception as e:
- logger.error(
- f"Oops somethin went astray while converting {file_path} \
-to {txt_file}: {e}")
- with open("conversion.log", "a") as log_file:
- log_file.write(
- f"Error converting {file_path} to {txt_file}: {e}\n")
-
-###############################################################################
-# Convert ppt file to word document
-###############################################################################
- def ppt_to_word(self):
- ppt_list = self.preprocess()
- ppt_list = [item for item in ppt_list if item.lower().endswith(
- "pptx") or item.lower().endswith("ppt")]
- for file_path in ppt_list:
- if file_path.lower().endswith("pptx"):
- word_file = file_path[:-4] + "docx"
- elif file_path.lower().endswith("ppt"):
- word_file = file_path[:-3] + "docx"
- try:
- presentation = Presentation(file_path)
- document = Document()
-
- for slide in presentation.slides:
- for shape in slide.shapes:
- if shape.has_text_frame:
- text_frame = shape.text_frame
- for paragraph in text_frame.paragraphs:
- new_paragraph = document.add_paragraph()
- for run in paragraph.runs:
- new_run = new_paragraph.add_run(run.text)
- # Preserve bold formatting
- new_run.bold = run.font.bold
- # Preserve italic formatting
- new_run.italic = run.font.italic
- # Preserve underline formatting
- new_run.underline = run.font.underline
- # Preserve font name
- new_run.font.name = run.font.name
- # Preserve font size
- new_run.font.size = run.font.size
- try:
- # Preserve font color
- new_run.font.color.rgb = run.font.color.rgb
- except AttributeError:
- # Ignore error and continue without
- # setting the font color
- pass
- # Add a new paragraph after each slide
- document.add_paragraph()
- document.save(word_file)
- logger.info(f"{DMAGENTA}Successfully converted {file_path} to \
- {word_file}{RESET}")
- except Exception as e:
- logger.error(
- f"Oops somethin gwent awry while attempting to convert \
- {file_path} to {word_file}:\n>>>{e}")
- with open("conversion.log", "a") as log_file:
- log_file.write(
- f"Oops something went astray while attempting \
- convert {file_path} to {word_file}:{e}\n")
-
-###############################################################################
-# Convert text file to word
-###############################################################################
- def text_to_word(self):
- flist = self.preprocess()
- flist = [item for item in flist if item.lower().endswith("txt")]
- for file_path in flist:
- if file_path.lower().endswith("txt"):
- word_file = file_path[:-3] + "docx"
-
- try:
- # Read the text file
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
- text_content = file.read()
-
- # Filter out non-XML characters
- filtered_content = re.sub(
- r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]+', '', text_content)
-
- # Create a new Word document
- doc = Document()
- # Add the filtered text content to the document
- doc.add_paragraph(filtered_content)
-
- # Save the document as a Word file
- doc.save(word_file)
- logger.info(f"{DMAGENTA}Successfully converted {file_path} to \
- {word_file}{RESET}")
- except FileExistsError as e:
- logger.error(f"{str(e)}")
- except Exception as e:
- logger.error(
- f"Oops Unable to perfom requested conversion: {e}\n")
- with open("conversion.log", "a") as log_file:
- log_file.write(
- f"Error converting {file_path} to {word_file}: \
-{e}\n")
-
-###############################################################################
-# Convert xlsx file(s) to word file(s)
-###############################################################################
- def convert_xls_to_word(self):
- xls_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xls_list = [item for item in xls_list if any(
- item.lower().endswith(ext) for ext in ls)]
- print(F"{DGREEN}Initializing conversion sequence{RESET}")
- for xls_file in xls_list:
- if xls_file.lower().endswith("xlsx"):
- word_file = xls_file[:-4] + "docx"
- elif xls_file.lower().endswith("xls"):
- word_file = xls_file[:-3] + "docx"
- try:
- '''Read the XLS file using pandas'''
-
- df = pd.read_excel(xls_file)
-
- '''Create a new Word document'''
- doc = Document()
-
- '''Iterate over the rows of the dataframe and add them to the
- Word document'''
- logger.info(f"{ICYAN}Converting {xls_file}..{RESET}")
- # time.sleep(2)
- total_rows = df.shape[0]
- for _, row in df.iterrows():
- current_row = _ + 1
- percentage = (current_row / total_rows)*100
- for value in row:
- doc.add_paragraph(str(value))
- print(f"Row {DYELLOW}{current_row}/{total_rows} \
-{DBLUE}{percentage:.1f}%{RESET}", end="\r")
- # print(f"\033[1;36m{row}{RESET}")
-
- # Save the Word document
- doc.save(word_file)
- print(F"{DGREEN}Conversion successful!{RESET}", end="\n")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- print("Oops Conversion failed:", str(e))
-
-###############################################################################
- '''Convert xlsx/xls file/files to text file format'''
-###############################################################################
-
- def convert_xls_to_text(self):
- xls_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xls_list = [
- item for item in xls_list if any(item.lower().endswith(ext)
- for ext in ls)]
- print(F"{DGREEN}Initializing conversion sequence{RESET}")
- for xls_file in xls_list:
- if xls_file .lower().endswith("xlsx"):
- txt_file = xls_file[:-4] + "txt"
- elif xls_file .lower().endswith("xls"):
- txt_file = xls_file[:-3] + "txt"
- try:
- # Read the XLS file using pandas
- logger.info(f"Converting {xls_file}..")
- df = pd.read_excel(xls_file)
-
- # Convert the dataframe to plain text
- text = df.to_string(index=False)
- chars = len(text)
- words = len(text.split())
- lines = len(text.splitlines())
-
- print(
- f"Preparing to write: {DYELLOW}{chars} \033[1;30m \
-characters{DYELLOW} {words}\033[1;30m words {DYELLOW}{lines}\033[1;30m \
-lines {RESET}", end="\n")
- # Write the plain text to the output file
- with open(txt_file, 'w') as file:
- file.write(text)
-
- print(F"{DGREEN}Conversion successful!{RESET}", end="\n")
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- print("Oops Conversion failed:", str(e))
-
-###############################################################################
- '''Convert xlsx/xls file to csv(comma seperated values) format'''
-###############################################################################
-
- def convert_xlsx_to_csv(self):
- xls_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xls_list = [
- item for item in xls_list if any(item.lower().endswith(ext)
- for ext in ls)]
- for xls_file in xls_list:
- if xls_file.lower().endswith("xlsx"):
- csv_file = xls_file[:-4] + "csv"
- elif xls_file.lower().endswith("xls"):
- csv_file = xls_file[:-3] + "csv"
- try:
- '''Load the Excel file'''
- print(F"{DGREEN}Initializing conversion sequence{RESET}")
- df = pd.read_excel(xls_file)
- logger.info(f"Converting {xls_file}..")
- total_rows = df.shape[0]
- print(f"Writing {DYELLOW}{total_rows} rows {RESET}", end="\n")
- for i in range(101):
- print(f"Progress: {i}%", end="\r")
- '''Save the DataFrame to CSV'''
- df.to_csv(csv_file, index=False)
- print(F"{DMAGENTA} Conversion successful{RESET}")
- except KeyboardInterrupt:
- print("Exiting")
- sys.exit(1)
- except Exception as e:
- print(e)
-
-###############################################################################
-# Convert xlsx file(s) to sqlite
-###############################################################################
-
- def convert_xlsx_to_database(self):
- xlsx_list = self.preprocess()
- ls = ["xlsx", "xls"]
- xlsx_list = [
- item for item in xlsx_list if any(item.lower().endswith(ext)
- for ext in ls)]
- for xlsx_file in xlsx_list:
- if xlsx_file.lower().endswith("xlsx"):
- sqlfile = xlsx_file[:-4]
- elif xlsx_file.lower().endswith("xls"):
- sqlfile = xlsx_file[:-3]
- try:
- db_file = input(
- F"{DBLUE}Please enter desired sql filename: {RESET}")
- table_name = input(
- "Please enter desired table name: ")
- # res = ["db_file", "table_name"]
- if any(db_file) == "":
- db_file = sqlfile + "sql"
- table_name = sqlfile
- if not db_file.endswith(".sql"):
- db_file = db_file + ".sql"
- column = 0
- for i in range(20):
- column += 0
- # Read the Excel file into a pandas DataFrame
- print(f"Reading {xlsx_file}...")
- df = pd.read_excel(xlsx_file)
- print(f"{DGREEN}Initializing conversion sequence{RESET}")
- print(f"{DGREEN} Connected to sqlite3 database::{RESET}")
- # Create a connection to the SQLite database
- conn = sqlite3.connect(db_file)
- print(F"{DYELLOW} Creating database table::{RESET}")
- # Insert the DataFrame into a new table in the database
- df.to_sql(table_name, column, conn,
- if_exists='replace', index=False)
- print(
- f"Operation successful{RESET} file saved as \033[32{db_file}{RESET}")
- # Close the database connection
- conn.close()
- except KeyboardInterrupt:
- print("\nExiting")
- sys.exit(1)
- except Exception as e:
- logger.error(f"{e}")
-
-###############################################################################
-# Create image objects from given files
-###############################################################################
- def doc2image(self, outf="png"):
- outf_list = ['png', 'jpg']
- if outf not in outf_list:
- outf = "png"
- path_list = self.preprocess()
- ls = ["pdf", "doc", "docx"]
- file_list = [
- item for item in path_list if any(item.lower().endswith(ext)
- for ext in ls)]
- imgs = []
- for file in file_list:
- if file.lower().endswith("pdf"):
- # Convert the PDF to a list of PIL image objects
- print("Generate image objects ..")
- images = convert_from_path(file)
-
- # Save each image to a file
- fname = file[:-4]
- print(f"{YELLOW}Target images{BLUE} {len(images)}{RESET}")
- for i, image in enumerate(images):
- print(f"{DBLUE}{i}{RESET}", end="\r")
- yd = f"{fname}_{i+1}.{outf}"
- image.save(yd)
- imgs.append(yd)
- print(f"{GREEN}Ok{RESET}")
-
- return imgs
-
-
-class Scanner:
-
- def __init__(self, input_file):
- self.input_file = input_file
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def scanPDF(self):
- pdf_list = self.preprocess()
- pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
-
- for pdf in pdf_list:
- out_f = pdf[:-3] + 'txt'
- print(f"{YELLOW}Read pdf ..{RESET}")
-
- with open(pdf, 'rb') as f:
- reader = PyPDF2.PdfReader(f)
- text = ''
-
- pg = 0
- for page_num in range(len(reader.pages)):
- pg += 1
-
- print(f"{DYELLOW}Progress:{RESET}", end="")
- print(f"{CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r")
- page = reader.pages[page_num]
- text += page.extract_text()
-
- print(f"\n{text}")
- print(F"\n{YELLOW}Write text to {GREEN}{out_f}{RESET}")
- with open(out_f, 'w') as f:
- f.write(text)
-
- print(F"{DGREEN}Ok{RESET}")
-
- def scanAsImgs(self):
- file = self.input_file
- mc = MakeConversion(file)
- img_objs = mc.doc2image()
- # print(img_objs)
- from .OCRTextExtractor import ExtractText
- text = ''
- for i in img_objs:
- extract = ExtractText(i)
- tx = extract.OCR()
- if tx is not None:
- text += tx
- print(text)
- print(f"{GREEN}Ok{RESET}")
- return text
-
-
-class FileSynthesis:
-
- def __init__(self, input_file):
- self.input_file = input_file
- # self.CHUNK_SIZE = 20_000
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- @staticmethod
- def join_audios(files, output_file):
- masterfile = output_file + "_master.mp3"
- print(
- f"{DBLUE}Create a master file {DMAGENTA}{masterfile}{RESET}", end='\r')
- # Create a list to store files
- ogg_files = []
- # loop through the directory while adding the ogg files to the list
- print(files)
- for filename in files:
- print(f"Join {DBLUE}{len(files)}{RESET} files")
- # if filename.endswith('.ogg'):
- # ogg_file = os.path.join(path, filename)
- ogg_files.append(AudioSegment.from_file(filename))
-
- # Concatenate the ogg files
- combined_ogg = ogg_files[0]
- for i in range(1, len(files)):
- combined_ogg += ogg_files[i]
-
- # Export the combined ogg to new mp3 file or ogg file
- combined_ogg.export(output_file + "_master.ogg", format='ogg')
- print(F"{DGREEN}Master file:Ok {RESET}")
-
- def Synthesise(self, text: str, output_file: str, CHUNK_SIZE: int = 20_000, ogg_folder: str = 'tempfile', retries: int = 5) -> None:
- """Converts given text to speech using Google Text-to-Speech API."""
- out_ls = []
- try:
- if not os.path.exists(ogg_folder):
- os.mkdir(ogg_folder)
- print(f"{DYELLOW}Get initial net speed..{RESET}")
- st = speedtest.Speedtest() # get initial network speed
- st.get_best_server()
- download_speed: float = st.download() # Keep units as bytes
- logger.info(
-
- f"{GREEN} Conversion to mp3 sequence initialized start\
-speed {CYAN}{download_speed/1_000_000:.2f}Kbps{RESET}")
-
- for attempt in range(retries):
- try:
- '''Split input text into smaller parts and generate
- individual gTTS objects'''
- counter = 0
- for i in range(0, len(text), CHUNK_SIZE):
- chunk = text[i:i+CHUNK_SIZE]
- output_filename = f"{output_file}_{counter}.ogg"
- counter += 1
- # print(output_filename)
- if os.path.exists(output_filename):
- output_filename = f"{output_file}_{counter+1}.ogg"
- # print(output_filename)
- tts = gTTS(text=chunk, lang='en', slow=False)
- tts.save(output_filename)
- out_ls.append(output_filename)
- break
- # print(out_ls)
- '''Handle any network related issue gracefully'''
- except Exception in (ConnectionError, ConnectionAbortedError,
- ConnectionRefusedError,
- ConnectionResetError) as e:
- logger.error(f"Sorry boss connection problem encountered: {e} in {attempt+1}/{retries}:")
- time.sleep(5) # Wait 5 seconds before retrying
-
- # Handle connectivity/network error
- except requests.exceptions.RequestException as e:
- logger.error(f"{e}")
- except Exception as e:
- logger.error(f'{DRED} Error during conversion attempt \
-{attempt+1}/{retries}:{e}{RESET}')
- tb = traceback.extract_tb(sys.exc_info()[2])
- logger.info("\n".join([f" > {line}"
- for line in map(str, tb)]))
- time.sleep(3) # Wait 5 seconds before retrying
- pass
-
- if attempt >= retries:
- logger.error(
- f"Conversion unsuccessful after {retries} attempts.")
- sys.exit(2)
-
- finally:
- # print(out_ls)
- # Combine generated gTTS objects
- if len(out_ls) >= 1:
- FileSynthesis.join_audios(out_ls, output_file)
-
- st = speedtest.Speedtest()
- logger.info("Done")
- print("Get final speed ...")
- logger.info(
-
- f"{YELLOW}Final Network Speed: {st.download()/(10**6):.2f} Kbps{RESET}")
-
- @staticmethod
- def pdf_to_text(pdf_path):
- logger.info('''Processing the file...\n''')
- logger.info(
- F'{GREEN} Initializing pdf to text conversion sequence...{RESET}')
- try:
- with open(pdf_path, 'rb') as file:
- pdf_reader = PyPDF2.PdfReader(file)
- text = ''
- for page_num in range(len(pdf_reader.pages)):
- page = pdf_reader.pages[page_num]
- text += page.extract_text()
- print(F"{DGREEN}Ok{RESET}")
- return text
- except Exception as e:
- logger.error(
- f"{DRED}Failed to extract text from '{YELLOW}{pdf_path}'{RESET}:\n {e}")
-
- @staticmethod
- def text_file(input_file):
- try:
- with open(input_file, 'r', errors='ignore') as file:
- text = file.read().replace('\n', ' ')
- return text
- except FileNotFoundError:
- logger.error("File '{}' was not found.".format(input_file))
- except Exception as e:
- logger.error(
- F"{DRED}Error converting {input_file} to text: {str(e)}\
-{RESET}")
-
- @staticmethod
- def docx_to_text(docx_path):
- try:
- logger.info(f"{BLUE} Converting {docx_path} to text...{RESET}")
- doc = Document(docx_path)
- paragraphs = [paragraph.text for paragraph in doc.paragraphs]
- return '\n'.join(paragraphs)
- except FileNotFoundError:
- logger.error(f"File '{docx_path}' was not found.")
- except Exception as e:
- logger.error(
- F"{DRED}Error converting {docx_path} to text: {e}\
-{RESET}")
-
- '''Handle input files based on type to initialize conversion sequence'''
-
- def audiofy(self):
- input_list = self.preprocess()
- extdoc = ["docx", "doc"]
- ls = {"pdf", "docx", "doc", "txt"}
- input_list = [item for item in input_list if item.lower().endswith(tuple(ls))]
- for input_file in input_list:
- if input_file.endswith('.pdf'):
- text = FileSynthesis.pdf_to_text(input_file)
- output_file = input_file[:-4]
-
- elif input_file.lower().endswith(tuple(extdoc)):
-
- text = FileSynthesis.docx_to_text(input_file)
- output_file = input_file[:-5]
-
- elif input_file.endswith('.txt'):
- text = FileSynthesis.text_file(input_file)
- output_file = input_file[:-4]
-
- else:
- logger.error('Unsupported file format. Please provide \
-a PDF, txt, or Word document.')
- sys.exit(1)
- try:
- FileSynthesis.Synthesise(None, text, output_file)
- except KeyboardInterrupt:
- sys.exit(1)
-
-
-###############################################################################
-# Convert video file to from one format to another'''
-###############################################################################
-
-
-class VideoConverter:
-
- def __init__(self, input_file, out_format):
- self.input_file = input_file
- self.out_format = out_format
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def CONVERT_VIDEO(self):
- try:
- input_list = self.preprocess()
- out_f = self.out_format.upper()
- input_list = [item for item in input_list if any(
- item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS)]
- print(F"{DYELLOW}Initializing conversion..{RESET}")
-
- for file in input_list:
- if out_f.upper() in SUPPORTED_VIDEO_FORMATS:
- _, ext = os.path.splitext(file)
- output_filename = _ + '.' + out_f.lower()
- print(output_filename)
- else:
- print("Unsupported output format")
- sys.exit(1)
- format_codec = {
- "MP4": "mpeg4",
- "AVI": "rawvideo",
- # "OGV": "avc",
- "WEBM": "libvpx",
- "MOV": "mpeg4",
- "MKV": "MPEG4",
- "FLV": "flv"
- # "WMV": "WMV"
- }
- '''Load the video file'''
- print(f"{DBLUE}oad file{RESET}")
- video = VideoFileClip(file)
- '''Export the video to a different format'''
- print(f"{DMAGENTA}Converting file to {output_filename}{RESET}")
- video.write_videofile(
- output_filename, codec=format_codec[out_f])
- '''Close the video file'''
- print(f"{DGREEN}Done{RESET}")
- video.close()
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
- except Exception as e:
- print(e)
-
-
-###############################################################################
-# Convert Audio file to from one format to another'''
-###############################################################################
-
-
-class AudioConverter:
-
- def __init__(self, input_file, out_format):
- self.input_file = input_file
- self.out_format = out_format
-
- def preprocess(self):
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
-
- def pydub_conv(self):
- input_list = self.preprocess()
- out_f = self.out_format
- input_list = [item for item in input_list if any(
- item.lower().endswith(ext) for ext in SUPPORTED_AUDIO_FORMATS)]
- print(F"{DYELLOW}Initializing conversion..{RESET}")
- for file in input_list:
- if out_f.lower() in SUPPORTED_AUDIO_FORMATS:
- _, ext = os.path.splitext(file)
- output_filename = _ + '.' + out_f
- else:
- print("Unsupported output format")
- sys.exit(1)
- fmt = ext[1:]
- print(fmt, out_f)
- audio = pydub.AudioSegment.from_file(file, fmt)
- print(f"{DMAGENTA}Converting to {output_filename}{RESET}")
- audio.export(output_filename, format=out_f)
- # new_audio = pydub.AudioSegment.from_file('output_audio.')
- print(f"{DGREEN}Done{RESET}")
- # play(new_audio)
- # new_audio.close()
-
-
-###############################################################################
-# Convert images file to from one format to another
-###############################################################################
-
-
-class ImageConverter:
-
- def __init__(self, input_file, out_format):
- self.input_file = input_file
- self.out_format = out_format
-
- def preprocess(self):
- try:
- files_to_process = []
-
- if os.path.isfile(self.input_file):
- files_to_process.append(self.input_file)
- elif os.path.isdir(self.input_file):
- if os.listdir(self.input_file) is None:
- print("Cannot work with empty folder")
- sys.exit(1)
- for file in os.listdir(self.input_file):
- file_path = os.path.join(self.input_file, file)
- if os.path.isfile(file_path):
- files_to_process.append(file_path)
-
- return files_to_process
- except FileNotFoundError:
- print("File not found")
- sys.exit(1)
-
- def convert_image(self):
- try:
- input_list = self.preprocess()
- out_f = self.out_format.upper()
-
- input_list = [item for item in input_list if any(
- item.lower().endswith(ext) for ext in SUPPORTED_IMAGE_FORMATS[out_f])]
- for file in input_list:
- print(file)
- if out_f.upper() in SUPPORTED_IMAGE_FORMATS:
- _, ext = os.path.splitext(file)
- output_filename = _ + \
- SUPPORTED_IMAGE_FORMATS[out_f].lower()
- else:
- print("Unsupported output format")
- sys.exit(1)
- '''Load the image using OpenCV: '''
- print(F"{DYELLOW}Reading input image..{RESET}")
- img = cv2.imread(file)
- '''Convert the OpenCV image to a PIL image: '''
- print(f"{DMAGENTA}Converting to PIL image{RESET}")
- pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
- '''Save the PIL image to a different format: '''
- print(f"\033[1;36mSaving image as {output_filename}{RESET}")
- pil_img.save(output_filename, out_f)
- print(f"{DGREEN}Done{RESET}")
- '''Load the image back into OpenCV: '''
- print(f"{DMAGENTA}Load and display image{RESET}")
- opencv_img = cv2.imread(output_filename)
- '''Display the images: '''
- cv2.imshow('OpenCV Image', opencv_img)
- # pil_img.show()
- '''Wait for the user to press a key and close the windows: '''
- cv2.waitKey(0)
- cv2.destroyAllWindows()
- except KeyboardInterrupt:
- print("\nExiting..")
- sys.exit(1)
diff --git a/build/lib/filemac/dd.py b/build/lib/filemac/dd.py
deleted file mode 100644
index 90fbe1f..0000000
--- a/build/lib/filemac/dd.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from OCRTextExtractor import ExtractText
-img_objs = ['/home/skye/Software Engineering/Y2/SEM2/RV/SPE 2210 Client Side Programming Year II Semester II_1.png']
-text = ''
-for i in img_objs:
- extract = ExtractText(i)
- tx = extract.OCR()
- print(tx)
- if tx is not None:
- text += tx
-print(text)
diff --git a/build/lib/filemac/fmac.py b/build/lib/filemac/fmac.py
deleted file mode 100644
index 91b28ba..0000000
--- a/build/lib/filemac/fmac.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python3.11.7
-# multimedia_cli/main.py
-import argparse
-import logging
-import logging.handlers
-import sys
-
-from . import handle_warnings
-from .AudioExtractor import ExtractAudio
-from .colors import (RESET, DYELLOW)
-from .converter import (AudioConverter, FileSynthesis, ImageConverter,
- MakeConversion, Scanner, VideoConverter)
-from .formats import (SUPPORTED_AUDIO_FORMATS_SHOW, SUPPORTED_DOC_FORMATS,
- SUPPORTED_IMAGE_FORMATS_SHOW,
- SUPPORTED_VIDEO_FORMATS_SHOW)
-from .image_op import Compress_Size
-from .OCRTextExtractor import ExtractText
-from .Simple_v_Analyzer import SA
-
-# from .formats import SUPPORTED_INPUT_FORMATS, SUPPORTED_OUTPUT_FORMATS
-handle_warnings
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class Eval:
-
- def __init__(self, file, outf):
- self.file = file
- self.outf = outf
-
- def document_eval(self):
- ls = ["docx", "doc"]
- sheetls = ["xlsx", "xls"]
- try:
- conv = MakeConversion(self.file)
- if self.file.lower().endswith(tuple(sheetls)):
- if self.outf.lower() == "csv":
- conv.convert_xlsx_to_csv()
- elif self.outf.lower() == "txt":
- conv.convert_xls_to_text()
- elif self.outf.lower() == "doc" or self.outf == "docx":
- conv.convert_xls_to_word()
- elif self.outf.lower() == "db":
- conv.convert_xlsx_to_database()
-
- elif self.file.lower().endswith(tuple(ls)):
- if self.outf.lower() == "txt":
- conv.word_to_txt()
- elif self.outf.lower() == "pdf":
- conv.word_to_pdf()
- elif self.outf.lower() == "pptx":
- conv.word_to_pptx()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- elif self.file.endswith('txt'):
- if self.outf.lower() == "pdf":
- conv.txt_to_pdf()
- elif self.outf.lower() == "doc" or self.outf == "docx" or self.outf == "word":
- conv.text_to_word()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- elif self.file.lower().endswith('ppt') or self.file.lower().endswith('pptx'):
- if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word":
- conv.ppt_to_word()
-
- elif self.file.lower().endswith('pdf'):
- if self.outf.lower() == "doc" or self.outf.lower() == "docx" or self.outf == "word":
- conv.pdf_to_word()
- elif self.outf.lower() == "txt":
- conv.pdf_to_txt()
- elif self.outf.lower() == "audio" or self.outf.lower() == "ogg":
- conv = FileSynthesis(self.file)
- conv.audiofy()
-
- else:
- print(f"{DYELLOW}Unsupported Conversion type{RESET}")
- except Exception as e:
- logger.error(e)
-
-
-def main():
- parser = argparse.ArgumentParser(
- description="Multimedia Element Operations")
-
- parser.add_argument(
- "--convert_doc", help=f"Converter document file(s) to different format ie pdf_to_docx.\
- example {DYELLOW}filemac --convert_doc example.docx -t pdf{RESET}")
-
- parser.add_argument(
- "--convert_audio", help=f"Convert audio file(s) to and from different format ie mp3 to wav\
- example {DYELLOW}filemac --convert_audio example.mp3 -t wav{RESET}")
-
- parser.add_argument(
- "--convert_video", help=f"Convert video file(s) to and from different format ie mp4 to mkv.\
- example {DYELLOW}filemac --convert_video example.mp4 -t mkv{RESET}")
-
- parser.add_argument(
- "--convert_image", help=f"Convert image file(s) to and from different format ie png to jpg.\
- example {DYELLOW}filemac --convert_image example.jpg -t png{RESET}")
-
- parser.add_argument(
-
- "--convert_doc2image", help=f"Convert documents to images ie png to jpg.\
- example {DYELLOW}filemac --convert_doc2image example.pdf -t png{RESET}")
-
- parser.add_argument("-xA", "--extract_audio",
- help=f"Extract audio from a video.\
- example {DYELLOW}filemac -xA example.mp4 {RESET}")
-
- parser.add_argument(
- "-Av", "--Analyze_video", help=f"Analyze a given video.\
- example {DYELLOW}filemac --analyze_video example.mp4 {RESET}")
-
- parser.add_argument("-t", "--target_format",
- help="Target format for conversion (optional)")
-
- parser.add_argument(
- "--resize_image", help=f"change size of an image compress/decompress \
- example {DYELLOW}filemac --resize_image example.png -t png {RESET}")
-
- parser.add_argument("-t_size", help="used in combination with resize_image \
- to specify target image size")
-
- parser.add_argument(
- "-S", "--scan", help=f"Scan pdf file and extract text\
- example {DYELLOW}filemac --scan example.pdf {RESET}")
-
- parser.add_argument(
- "-SA", "--scanAsImg", help=f"Scan pdf file and extract text\
- example {DYELLOW}filemac --scanAsImg example.pdf {RESET}")
-
- parser.add_argument("--OCR", help=f"Extract text from an image.\
- example {DYELLOW}filemac --OCR image.png{RESET}")
-
- args = parser.parse_args()
-
-
-# Call function to handle document conversion inputs before begining conversion
- if args.convert_doc == 'help':
- print(SUPPORTED_DOC_FORMATS)
- sys.exit(1)
- if args.convert_doc:
- ev = Eval(args.convert_doc, args.target_format)
- ev.document_eval()
-
-
-# Call function to handle video conversion inputs before begining conversion
- elif args.convert_video:
- if args.convert_video == 'help' or args.convert_video is None:
- print(SUPPORTED_VIDEO_FORMATS_SHOW)
- sys.exit(1)
- ev = VideoConverter(args.convert_video, args.target_format)
- ev.CONVERT_VIDEO()
-# Call function to handle image conversion inputs before begining conversion
-
- elif args.convert_image:
- if args.convert_image == 'help' or args.convert_image is None:
- print(SUPPORTED_IMAGE_FORMATS_SHOW)
- sys.exit(1)
- conv = ImageConverter(args.convert_image, args.target_format)
- conv.convert_image()
-
-# Handle image resizing
- elif args.resize_image:
- res = Compress_Size(args.resize_image)
- res.resize_image(args.t_size)
-
-# Handle documents to images conversion
- elif args.convert_doc2image:
- conv = MakeConversion(args.convert_doc2image)
- conv.doc2image(args.target_format)
-
-# Call function to handle audio conversion inputs before begining conversion
- elif args.convert_audio:
- if args.convert_audio == 'help' or args.convert_audio is None:
- print(SUPPORTED_AUDIO_FORMATS_SHOW)
- sys.exit(1)
- ev = AudioConverter(args.convert_audio, args.target_format)
- ev.pydub_conv()
-
-
-# Call module to evaluate audio files before making audio extraction from input video files conversion
- elif args.extract_audio:
- vi = ExtractAudio(args.extract_audio)
- vi.moviepyextract()
-
-# Call module to scan the input and extract text
- elif args.scan:
- sc = Scanner(args.scan)
- sc.scanPDF()
-
-# Call module to scan the input FILE as image object and extract text
- elif args.scanAsImg:
- sc = Scanner(args.scanAsImg)
- tx = sc.scanAsImgs()
-# Call module to handle Candidate images for text extraction inputs before begining conversion
- elif args.OCR:
- conv = ExtractText(args.OCR)
- conv.OCR()
-
- elif args.Analyze_video:
- analyzer = SA(args.Analyze_video)
- analyzer.SimpleAnalyzer()
-
-
-if __name__ == "__main__":
- main()
diff --git a/build/lib/filemac/formats.py b/build/lib/filemac/formats.py
deleted file mode 100644
index 6490294..0000000
--- a/build/lib/filemac/formats.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# multimedia_cli/formats.py
-from .colors import CYAN, DBLUE, DMAGENTA, DYELLOW, RESET
-
-SUPPORTED_DOC_FORMATS = f"""
-|---------------------------------------------------------------------------
-|{DBLUE}Input format{RESET} |{DBLUE}Output format{RESET} |
-|________________________________|__________________________________________|
-| xlsx {DYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) |
-| | |
-| doc/docx{DYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) |
-| | |
-| txt {DYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) |
-| | |
-| pdf {DYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) |
-| | |
-| pptx/ppt{DYELLOW}-------------------->{RESET}|doc/docx |
-| |
-|___________________________________________________________________________|
-"""
-
-
-def p():
- print(SUPPORTED_DOC_FORMATS)
-
-
-# Add supported input and output formats for each media type
-SUPPORTED_AUDIO_FORMATS = ["wav", # Waveform Audio File Format
- "mp3", # MPEG Audio Layer III
- "ogg",
- "flv",
- "ogv",
- "webm",
- "aac", # Advanced Audio Codec
- "bpf",
- "aiff",
- "flac"] # Free Lossless Audio Codec)
-
-SUPPORTED_AUDIO_FORMATS_SHOW = f'''
-|==============================|
-| {DBLUE}Supported I/O formats {RESET} |
-|==============================|
-| {CYAN} wav {DYELLOW} |
-| {CYAN} mp3 {DYELLOW} |
-| {CYAN} ogg {DYELLOW} |
-| {CYAN} flv {DYELLOW} |
-| {CYAN} ogv {DYELLOW} |
-| {CYAN} matroska {DYELLOW} |
-| {CYAN} mov {DYELLOW} |
-| {CYAN} webm {DYELLOW} |
-| {CYAN} aac {DYELLOW} |
-| {CYAN} bpf {DYELLOW} |
---------------------------------
-
-'''
-
-SUPPORTED_VIDEO_FORMATS = ["MP4", # MPEG-4 part 14
- "AVI", # Audio Video Interleave
- "OGV",
- "WEBM",
- "MOV", # QuickTime Movie
- "MKV", # Matroska Multimedia Container - MKV is known for its support of high-quality content.
- "FLV", #
- "WMV"]
-
-SUPPORTED_VIDEO_FORMATS_SHOW = f'''
-,_______________________________________,
-|x| {DBLUE}Supported I/O formats{RESET} |x|
-|x|-----------------------------------{DYELLOW}|x|
-|x| {DMAGENTA} MP4 {DYELLOW} |x|
-|x| {DMAGENTA} AVI {DYELLOW} |x|
-|x| {DMAGENTA} OGV {DYELLOW} |x|
-|x| {DMAGENTA} WEBM{DYELLOW} |x|
-|x| {DMAGENTA} MOV {DYELLOW} |x|
-|x| {DMAGENTA} MKV {DYELLOW} |x|
-|x| {DMAGENTA} FLV {DYELLOW} |x|
-|x| {DMAGENTA} WMV {DYELLOW} |x|
-|,|___________________________________|,|{DYELLOW}
-'''
-
-SUPPORTED_IMAGE_FORMATS = {
- "JPEG": ".jpg", # Joint Photographic Experts Group -Lossy compression
- "PNG": ".png", # Joint Photographic Experts Group - not lossy
- "GIF": ".gif", # Graphics Interchange Format
- "BM": ".bmp",
- "BMP": ".dib",
- "DXF": ".dxf", # Autocad format 2D
- "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression
- "EXR": ".exr",
- "pic": ".pic",
- "pict": "pct",
- "PDF": ".pdf",
- "WebP": ".webp",
- "ICNS": ".icns",
- "PSD": ".psd",
- "SVG": ".svg", # Scalable vector Graphics
- "EPS": ".eps",
- "PostSciript": ".ps",
- "PS": ".ps"}
-
-SUPPORTED_IMAGE_FORMATS_SHOW = f'''
-__________________________________________
-|x|{DBLUE}Supported I/O formats{RESET} |x|
-|x|_____________________________________{DYELLOW}|x|
-|x| {DMAGENTA} JPEG {DYELLOW} |x|
-|x| {DMAGENTA} PNG {DYELLOW} |x|
-|x| {DMAGENTA} GIF {DYELLOW} |x|
-|x| {DMAGENTA} BM {DYELLOW} |x|
-|x| {DMAGENTA} TIFF {DYELLOW} |x|
-|x| {DMAGENTA} EXR {DYELLOW} |x|
-|x| {DMAGENTA} PDF {DYELLOW} |x|
-|x| {DMAGENTA} WebP{DYELLOW} |x|
-|x| {DMAGENTA} ICNS {DYELLOW} |x|
-|x| {DMAGENTA} PSD {DYELLOW} |x|
-|x| {DMAGENTA} SVG {DYELLOW} |x|
-|x| {DMAGENTA} EPS {DYELLOW} |x|
-|x| {DMAGENTA} Postscript {DYELLOW} |x|
-|_|_____________________________________|x|
-'''
-
-SUPPORTED_DOCUMENT_FORMATS = ['pdf', 'doc', 'docx', 'csv', 'xlsx', 'xls',
- 'ppt', 'pptx', 'txt', 'ogg', 'mp3', 'audio']
diff --git a/build/lib/filemac/handle_warnings.py b/build/lib/filemac/handle_warnings.py
deleted file mode 100644
index 3e592d1..0000000
--- a/build/lib/filemac/handle_warnings.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import warnings
-
-warnings.simplefilter("ignore", RuntimeWarning)
-with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore", message="Your system is avx2 capable but pygame was not built with support for it.", category=RuntimeWarning)
diff --git a/build/lib/filemac/image_op.py b/build/lib/filemac/image_op.py
deleted file mode 100644
index 61cfe6d..0000000
--- a/build/lib/filemac/image_op.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from PIL import Image
-import os
-import logging
-import logging.handlers
-
-logging.basicConfig(level=logging.INFO, format='%(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-
-
-class Compress_Size:
-
- def __init__(self, input_image_path):
- self.input_image_path = input_image_path
-
- def resize_image(self, target_size):
- ext = input_image_path[-3:]
- output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
-
- original_image = Image.open(input_image_path)
- original_size = original_image.size
- size = os.path.getsize(input_image_path)
- print(f"Original image size \033[93m{size/1000_000:.2f}MiB")
-
- # Calculate the aspect ratio of the original image
- aspect_ratio = original_size[0] / original_size[1]
-
- # Convert the target sixze to bytes
- tz = int(target_size[:-2])
- if target_size[-2:].lower() == 'mb':
- target_size_bytes = tz * 1024 * 1024
- elif target_size[-2:].lower() == 'kb':
- target_size_bytes = tz * 1024
- else:
- logger.warning("Invalid units. Please use either \033[1;95m'MB'\033[0m\
- or \033[1;95m'KB'\033[0m")
-
- # Calculate the new dimensions based on the target size
- new_width, new_height = Compress_Size.calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes)
- print("\033[94mProcessing ..\033[0m")
- resized_image = original_image.resize((new_width, new_height))
- resized_image.save(output_image_path)
- t_size = os.path.getsize(output_image_path)/1000_000
- print("\033[1;92mOk\033[0m")
- print(f"Image resized to \033[1;93m{t_size:.2f}\033[0m and saved to \033[1;93m{output_image_path}")
-
- def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes):
- # Calculate the new dimensions based on the target size in bytes
- original_size_bytes = original_size[0] * original_size[1] * 3 # Assuming 24-bit color depth
- scale_factor = (target_size_bytes / original_size_bytes) ** 0.5
-
- new_width = int(original_size[0] * scale_factor)
- new_height = int(original_size[1] * scale_factor)
-
- return new_width, new_height
-
-
-if __name__ == "__main__":
- input_image_path = input("Enter the path to the input image: ")
- target_size = input("Enter the target output size (MB or KB): ")
- ext = input_image_path[-3:]
- output_image_path = os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
-
- init = Compress_Size(input_image_path)
- init.resize_image(target_size)
diff --git a/docs/CLI_ENHANCEMENT_PLAN.md b/docs/CLI_ENHANCEMENT_PLAN.md
new file mode 100644
index 0000000..5728251
--- /dev/null
+++ b/docs/CLI_ENHANCEMENT_PLAN.md
@@ -0,0 +1,342 @@
+# FileMAC CLI Enhancement Plan
+
+## Overview
+
+This document outlines the comprehensive plan to enhance FileMAC's command-line interface using Rich and pyperclip libraries to create a more robust, user-friendly experience.
+
+## Current State Analysis
+
+### Strengths
+- ✅ Rich library already integrated for progress bars
+- ✅ Pyperclip available in environment
+- ✅ Existing color support via custom utilities
+- ✅ Comprehensive functionality across 40+ commands
+- ✅ Well-structured operation mapping system
+
+### Opportunities for Improvement
+- ❌ Basic argparse interface could be more user-friendly
+- ❌ Text-based help lacks visual appeal
+- ❌ Limited interactive elements
+- ❌ No clipboard integration
+- ❌ Inconsistent progress feedback
+
+## Enhancement Strategy
+
+### Phase 1: Foundation (Week 1-2)
+
+**Objective**: Establish core utilities and infrastructure
+
+**Tasks**:
+1. **Create Rich Console Wrapper** (`filewarp/utils/rich_utils.py`)
+ - Custom theme matching existing color scheme
+ - Standardized message formats (info, success, error, warning)
+ - Console initialization and configuration
+
+2. **Implement Clipboard Utilities** (`filewarp/utils/clipboard.py`)
+ - `copy_to_clipboard()` function
+ - `paste_from_clipboard()` function
+ - Error handling for clipboard operations
+
+3. **Basic Rich Integration**
+ - Replace `print()` statements with Rich console methods
+ - Add color consistency across modules
+ - Create standard message formats
+
+### Phase 2: Core Enhancements (Week 3-4)
+
+**Objective**: Enhance core CLI functionality with Rich features
+
+**Tasks**:
+1. **Enhanced Help System** (`filewarp/cli/help.py`)
+ - Rich-formatted command tables
+ - Categorized command display
+ - Interactive help navigation
+
+2. **Progress Bars for All Operations** (`filewarp/utils/progress.py`)
+ - Standardized progress bar creation
+ - Consistent styling across modules
+ - Time estimates and completion percentages
+
+3. **Enhanced Error Handling** (Enhance `filewarp/core/exceptions.py`)
+ - Rich-formatted error panels
+ - Contextual error information
+ - Suggested solutions and troubleshooting
+
+### Phase 3: Advanced Features (Week 5-6)
+
+**Objective**: Add interactive elements and workflow improvements
+
+**Tasks**:
+1. **Interactive File Selection** (`filewarp/cli/interactive.py`)
+ - Visual file listing with tables
+ - Multi-file selection interface
+ - File preview capabilities
+
+2. **Clipboard Workflow Integration** (`filewarp/cli/clipboard_workflows.py`)
+ - Clipboard-based input workflows
+ - Result copying to clipboard
+ - Batch operation support
+
+3. **Operation Summary Display** (`filewarp/cli/summary.py`)
+ - Visual operation summaries
+ - Success/error breakdowns
+ - Clipboard copy options
+
+### Phase 4: Integration (Week 7)
+
+**Objective**: Full integration with existing CLI
+
+**Tasks**:
+1. **Enhanced CLI Entry Point** (Modify `filewarp/cli/cli.py`)
+ - Rich welcome message
+ - Clipboard support flag
+ - Enhanced argument parsing
+
+2. **Operation Mapper Enhancement** (Extend `OperationMapper`)
+ - Rich progress display
+ - Clipboard integration
+ - Enhanced completion messages
+
+## Implementation Details
+
+### Rich Utilities Implementation
+
+```python
+# filewarp/utils/rich_utils.py
+from rich.console import Console
+from rich.theme import Theme
+
+custom_theme = Theme({
+ "info": "cyan",
+ "warning": "yellow",
+ "error": "bold red",
+ "success": "bold green",
+ "debug": "magenta",
+ "prompt": "bold blue"
+})
+
+console = Console(theme=custom_theme)
+
+def print_info(message):
+ console.print(f"[info]ℹ {message}[/info]")
+
+def print_success(message):
+ console.print(f"[success]✓ {message}[/success]")
+
+def print_error(message):
+ console.print(f"[error]❌ {message}[/error]")
+
+def print_warning(message):
+ console.print(f"[warning]⚠ {message}[/warning]")
+```
+
+### Clipboard Utilities Implementation
+
+```python
+# filewarp/utils/clipboard.py
+import pyperclip
+from .rich_utils import console, print_success, print_error
+
+def copy_to_clipboard(text):
+ """Copy text to system clipboard"""
+ try:
+ pyperclip.copy(text)
+ print_success("Copied to clipboard!")
+ return True
+ except Exception as e:
+ print_error(f"Failed to copy to clipboard: {str(e)}")
+ return False
+
+def paste_from_clipboard():
+ """Get text from system clipboard"""
+ try:
+ content = pyperclip.paste()
+ return content if content else None
+ except Exception as e:
+ print_error(f"Failed to access clipboard: {str(e)}")
+ return None
+```
+
+### Enhanced Help System
+
+```python
+# filewarp/cli/help.py
+from rich.panel import Panel
+from rich.table import Table
+from rich.box import ROUNDED
+from .rich_utils import console
+
+def show_main_help():
+ """Display enhanced help with Rich formatting"""
+ table = Table(
+ title="📁 FileMAC Commands",
+ show_header=True,
+ header_style="bold magenta",
+ box=ROUNDED,
+ border_style="blue"
+ )
+
+ table.add_column("Command", style="cyan", no_wrap=True)
+ table.add_column("Description", style="white")
+ table.add_column("Example", style="green")
+
+ commands = [
+ ("--convert_doc", "Convert documents between formats", "filewarp --convert_doc file.docx -to pdf"),
+ ("--convert_audio", "Convert audio files", "filewarp --convert_audio file.mp3 -to wav"),
+ # ... more commands
+ ]
+
+ for cmd, desc, example in commands:
+ table.add_row(cmd, desc, example)
+
+ panel = Panel.fit(
+ table,
+ title="[bold]FileMAC Help System[/bold]",
+ border_style="blue",
+ subtitle="Advanced file conversion toolkit"
+ )
+
+ console.print(panel)
+```
+
+## Migration Strategy
+
+### Backward Compatibility
+- ✅ Keep all existing command-line arguments
+- ✅ Maintain current functionality
+- ✅ Add new features as optional flags
+- ✅ Preserve existing workflows
+
+### Gradual Rollout Plan
+1. **Week 1-2**: Foundation utilities
+2. **Week 3-4**: Core Rich enhancements
+3. **Week 5-6**: Advanced interactive features
+4. **Week 7**: Full integration and testing
+
+### Risk Assessment
+
+**Low Risk**:
+- Rich already in dependencies
+- Gradual migration approach
+- Backward compatibility maintained
+
+**Medium Risk**:
+- User adaptation to new UI
+- Clipboard permissions on some systems
+- Performance impact of Rich rendering
+
+**Mitigation**:
+- Provide fallback to text mode
+- Add configuration options
+- Comprehensive error handling
+- User education
+
+## Benefits Realization
+
+### Immediate Benefits
+- ✅ Better visual feedback for users
+- ✅ Professional, modern CLI appearance
+- ✅ Consistent color scheme and formatting
+- ✅ Enhanced error messages with context
+
+### Medium-Term Benefits
+- ✅ Faster workflows with clipboard integration
+- ✅ Better user experience with progress indicators
+- ✅ Interactive file selection and processing
+- ✅ Visual operation summaries
+
+### Long-Term Benefits
+- ✅ Foundation for advanced CLI features
+- ✅ Improved user adoption and satisfaction
+- ✅ Competitive advantage in CLI tools
+- ✅ Easier maintenance and extension
+
+## Testing Approach
+
+### Unit Testing
+- Test Rich utilities in isolation
+- Verify clipboard functionality
+- Validate progress bar behavior
+
+### Integration Testing
+- Test with existing CLI commands
+- Verify backward compatibility
+- Check error handling
+
+### User Testing
+- Gather feedback on new UI
+- Test interactive workflows
+- Validate clipboard integration
+
+### Performance Testing
+- Measure Rich rendering impact
+- Test with large file operations
+- Validate progress bar performance
+
+## Documentation Requirements
+
+### Updated Documentation
+- ✅ README.md with Rich features
+- ✅ Examples of new clipboard workflows
+- ✅ Visual guides for enhanced UI
+- ✅ Updated help system documentation
+
+### User Education
+- ✅ Migration guide for existing users
+- ✅ New feature tutorials
+- ✅ Best practices for Rich CLI usage
+- ✅ Troubleshooting guide
+
+## Implementation Timeline
+
+```mermaid
+gantt
+ title FileMAC CLI Enhancement Timeline
+ dateFormat YYYY-MM-DD
+ section Phase 1: Foundation
+ Rich Utilities :a1, 2023-11-01, 5d
+ Clipboard Helpers :a2, 2023-11-06, 3d
+ Basic Integration :a3, 2023-11-09, 2d
+
+ section Phase 2: Core Enhancements
+ Enhanced Help :b1, 2023-11-13, 4d
+ Progress Bars :b2, 2023-11-17, 3d
+ Error Handling :b3, 2023-11-20, 3d
+
+ section Phase 3: Advanced Features
+ Interactive Selection :c1, 2023-11-24, 5d
+ Clipboard Workflows :c2, 2023-11-29, 4d
+ Operation Summaries :c3, 2023-12-03, 3d
+
+ section Phase 4: Integration
+ CLI Enhancement :d1, 2023-12-06, 5d
+ Testing & Debugging :d2, 2023-12-11, 4d
+ Documentation :d3, 2023-12-15, 3d
+```
+
+## Success Metrics
+
+### Quantitative Metrics
+- ✅ Reduction in user errors
+- ✅ Increase in command usage
+- ✅ Faster operation completion times
+- ✅ Higher user satisfaction scores
+
+### Qualitative Metrics
+- ✅ Positive user feedback
+- ✅ Increased feature adoption
+- ✅ Improved documentation clarity
+- ✅ Enhanced professional appearance
+
+## Conclusion
+
+This enhancement plan provides a clear, low-risk path to transform FileMAC's CLI from functional to exceptional. By leveraging existing Rich integration and adding strategic pyperclip functionality, we can significantly improve user experience and productivity while maintaining all existing functionality.
+
+The gradual migration approach ensures minimal disruption and allows for continuous feedback and improvement throughout the process.
+
+**Next Steps**:
+1. Implement Phase 1 foundation utilities
+2. Begin gradual integration with existing modules
+3. Test thoroughly and gather user feedback
+4. Proceed through phases as planned
+5. Document and communicate changes effectively
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..86d894e
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+ FileMAC - Multimedia File Operation Kit
+
+
+
+
+
+
+
+
FileMAC
+
+ A Comprehensive Multimedia File Operation Kit
+
+
+
+
+
+
+
+
+
Introduction
+
+ FileMAC is a Python-based command-line interface (CLI) utility
+ designed for efficient file conversion, manipulation, and analysis. It
+ supports various multimedia operations, including document conversion,
+ file analysis, and text-to-speech conversion using Google's
+ Text-to-Speech (gTTS) library.
+
+
+
+
+
+
Features
+
+
Convert documents between various formats.
+
Analyze and manipulate multimedia files.
+
Generate audio files from text using gTTS.
+
+ Command-line interface for easy integration into scripts and
+ workflows.
+
+
Supports Linux operating systems.
+
+ Encapsulates reputable multimedia elements for robust performance.
+
+
+
+
+
+
+
Installation
+
Install FileMAC using pip:
+
pip install filewarp
+
+ Alternatively, install directly from the GitHub repository:
+
Detail-oriented Electrical and Electronics Technician with specialized training in power systems and hands-on experience in geothermal power plant operations. Skilled in electrical system maintenance, troubleshooting, and circuit analysis.
+
+
+
+
EDUCATION
+
+ 2021 - 2024
+ Ikutha Technical and Vocational College
+ Diploma in Electrical and Electronics (Power Option)
+ Completed: April 3, 2024
+
+
+
+
+
PROFESSIONAL EXPERIENCE
+
+ May 2023 - July 2023
+ KenGen - Olkaria Geothermal Power Plants
+ Electrical Maintenance Intern
+
+
+
Performed maintenance of electrical systems and power distribution equipment
+
Maintained turbine generators and auxiliary systems
+ Detail-oriented Electrical and Electronics Technician with specialized training in power systems
+ and hands-on experience in geothermal power plant operations. Skilled in electrical system maintenance,
+ troubleshooting, and circuit analysis. Seeking to leverage technical expertise and problem-solving
+ abilities in a challenging electrical engineering role.
+
+
+
+
+
Education
+
+
+
2021 - 2024
+
Ikutha Technical and Vocational College
+
Diploma in Electrical and Electronics (Power Option)
+
Completed: April 3, 2024
+
+
+
+
January 2016 - November 2019
+
Kea Secondary School
+
Kenya Certificate of Secondary Education (KCSE)
+
Mean Grade: C- (Minus)
+
+
+
+
+
Technical Skills
+
+
Electrical System Maintenance
+
Power System Operations
+
Circuit Analysis
+
PLC Programming
+
Solar Installation
+
Transformer Maintenance
+
Battery Systems
+
Technical Reporting
+
+
+
+
+ """
diff --git a/filewarp/core/html/styles/__init__.py b/filewarp/core/html/styles/__init__.py
new file mode 100644
index 0000000..208878b
--- /dev/null
+++ b/filewarp/core/html/styles/__init__.py
@@ -0,0 +1,4 @@
+from .css_parser import CSSParser
+from .style_applier import StyleApplier
+
+__all__ = ["CSSParser", "StyleApplier"]
diff --git a/filewarp/core/html/styles/css_parser.py b/filewarp/core/html/styles/css_parser.py
new file mode 100644
index 0000000..cbbffda
--- /dev/null
+++ b/filewarp/core/html/styles/css_parser.py
@@ -0,0 +1,69 @@
+"""
+Advanced CSS parsing functionality
+"""
+
+import re
+from typing import Dict, List
+
+
+class CSSParser:
+ """Advanced CSS parser with support for various CSS features"""
+
+ def __init__(self):
+ self.styles = {}
+
+ def parse_css(self, css_content: str) -> Dict[str, Dict]:
+ """Parse CSS content into style dictionary"""
+ # Remove comments
+ css_content = re.sub(r"/\*.*?\*/", "", css_content, flags=re.DOTALL)
+
+ # Parse rules
+ rules = re.findall(r"([^{]+)\{([^}]+)\}", css_content)
+
+ for selector, properties in rules:
+ selector = selector.strip()
+ style_dict = self._parse_properties(properties)
+
+ if selector:
+ self.styles[selector] = style_dict
+
+ return self.styles
+
+ def _parse_properties(self, properties: str) -> Dict[str, str]:
+ """Parse CSS properties string"""
+ style_dict = {}
+ declarations = [d.strip() for d in properties.split(";") if d.strip()]
+
+ for declaration in declarations:
+ if ":" in declaration:
+ prop, value = declaration.split(":", 1)
+ prop = prop.strip().lower()
+ value = value.strip()
+ style_dict[prop] = value
+
+ return style_dict
+
+ def get_styles_for_element(
+ self, tag: str, classes: List[str] = None, element_id: str = None
+ ) -> Dict[str, str]:
+ """Get combined styles for an element based on tag, classes, and ID"""
+ combined_styles = {}
+
+ # Tag styles
+ if tag in self.styles:
+ combined_styles.update(self.styles[tag])
+
+ # Class styles
+ if classes:
+ for class_name in classes:
+ class_selector = f".{class_name}"
+ if class_selector in self.styles:
+ combined_styles.update(self.styles[class_selector])
+
+ # ID styles
+ if element_id:
+ id_selector = f"#{element_id}"
+ if id_selector in self.styles:
+ combined_styles.update(self.styles[id_selector])
+
+ return combined_styles
diff --git a/filewarp/core/html/styles/style_applier.py b/filewarp/core/html/styles/style_applier.py
new file mode 100644
index 0000000..feb7bb6
--- /dev/null
+++ b/filewarp/core/html/styles/style_applier.py
@@ -0,0 +1,83 @@
+"""
+Style application logic for different CSS properties
+"""
+
+from docx.shared import Pt, RGBColor
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from typing import Dict
+import re
+
+from ..utils.color_utils import ColorConverter
+
+
+class StyleApplier:
+ """Applies CSS styles to DOCX elements"""
+
+ def __init__(self):
+ self.color_converter = ColorConverter()
+
+ def apply_text_styles(self, run, styles: Dict[str, str]):
+ """Apply text-related styles to a run"""
+ for prop, value in styles.items():
+ self._apply_text_style(run, prop, value)
+
+ def _apply_text_style(self, run, prop: str, value: str):
+ """Apply a single text style property"""
+ try:
+ if prop == "color":
+ color = self.color_converter.parse_color(value)
+ if color:
+ run.font.color.rgb = color
+
+ elif prop == "font-size":
+ size = self._parse_font_size(value)
+ if size:
+ run.font.size = Pt(size)
+
+ elif prop == "font-family":
+ run.font.name = value.split(",")[0].strip().strip("\"'")
+
+ elif prop == "font-weight":
+ if value in ["bold", "bolder", "700", "800", "900"]:
+ run.font.bold = True
+ elif value in ["normal", "lighter", "400"]:
+ run.font.bold = False
+
+ elif prop == "font-style":
+ if value == "italic":
+ run.font.italic = True
+ elif value == "normal":
+ run.font.italic = False
+
+ elif prop == "text-decoration":
+ if "underline" in value:
+ run.font.underline = True
+ if "line-through" in value:
+ run.font.strike = True
+
+ elif prop == "text-transform":
+ if value == "uppercase":
+ run.text = run.text.upper()
+ elif value == "lowercase":
+ run.text = run.text.lower()
+ elif value == "capitalize":
+ run.text = run.text.title()
+
+ except Exception:
+ pass
+
+ def _parse_font_size(self, size_str: str) -> float:
+ """Parse font size to points"""
+ try:
+ if "px" in size_str:
+ return float(size_str.replace("px", "").strip()) * 0.75
+ elif "pt" in size_str:
+ return float(size_str.replace("pt", "").strip())
+ elif "em" in size_str:
+ return float(size_str.replace("em", "").strip()) * 11 # Default size
+ elif "%" in size_str:
+ return (float(size_str.replace("%", "").strip()) / 100) * 11
+ else:
+ return float(size_str)
+ except (ValueError, TypeError):
+ return None
diff --git a/filewarp/core/html/tests.py b/filewarp/core/html/tests.py
new file mode 100644
index 0000000..c4dda59
--- /dev/null
+++ b/filewarp/core/html/tests.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Test script for the CV Converter library
+"""
+
+import os
+import sys
+
+# Add the library to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "cv_converter"))
+
+from filewarp.core.html import HTML2Word
+from filewarp.core.html.examples.templates import Templates
+
+
+def test_basic_conversion():
+ """Test basic conversion"""
+ print("Testing basic CV conversion...")
+
+ converter = HTML2Word()
+ html_content = Templates.get_basic_template()
+
+ converter.convert(html_content, "test_basic_cv.docx")
+ print("✓ Basic CV created: test_basic_cv.docx")
+
+
+def test_advanced_conversion():
+ """Test advanced conversion with styling"""
+ print("Testing advanced CV conversion...")
+
+ converter = HTML2Word()
+ html_content = Templates.get_advanced_cv()
+
+ converter.convert(html_content, "test_advanced_cv.docx")
+ print("✓ Advanced CV created: test_advanced_cv.docx")
+
+
+def test_file_conversion():
+ """Test conversion from HTML file"""
+ print("Testing file-based conversion...")
+
+ # Create test HTML file
+ with open("test_cv.html", "w", encoding="utf-8") as f:
+ f.write(Templates.get_basic_template())
+
+ converter = HTML2Word()
+ converter.convert_file("test_cv.html", "test_file_cv.docx")
+ print("✓ File-based CV created: test_file_cv.docx")
+
+
+def main():
+ """Run all tests"""
+ print("CV Converter Library Test Suite")
+ print("=" * 40)
+
+ try:
+ test_basic_conversion()
+ test_advanced_conversion()
+ test_file_conversion()
+
+ print("\n" + "=" * 40)
+ print("All tests completed successfully! 🎉")
+ print("\nGenerated files:")
+ for file in [
+ "test_basic_cv.docx",
+ "test_advanced_cv.docx",
+ "test_file_cv.docx",
+ ]:
+ if os.path.exists(file):
+ print(f" - {file}")
+
+ except Exception as e:
+ print(f"\n❌ Error during testing: {e}")
+ import traceback
+
+ traceback.print_exc()
+
+
+if __name__ == "__main__":
+ # main()
+ converter = HTML2Word()
+ converter.convert_file("/home/skye/Downloads/MWG-CV.html", "test.docx")
diff --git a/filewarp/core/html/utils/__init__.py b/filewarp/core/html/utils/__init__.py
new file mode 100644
index 0000000..d779482
--- /dev/null
+++ b/filewarp/core/html/utils/__init__.py
@@ -0,0 +1,9 @@
+from .color_utils import ColorConverter
+from .validation import validate_css, validate_html, validate_file_path
+
+__all__ = [
+ "ColorConverter",
+ "validate_css",
+ "validate_html",
+ "validate_file_path",
+]
diff --git a/filewarp/core/html/utils/color_utils.py b/filewarp/core/html/utils/color_utils.py
new file mode 100644
index 0000000..a13d09c
--- /dev/null
+++ b/filewarp/core/html/utils/color_utils.py
@@ -0,0 +1,121 @@
+"""
+Color conversion and parsing utilities
+"""
+
+import re
+from docx.shared import RGBColor
+from typing import Optional
+
+
+class ColorConverter:
+ """Converts various color formats to RGBColor"""
+
+ def __init__(self):
+ self.named_colors = {
+ "black": RGBColor(0, 0, 0),
+ "white": RGBColor(255, 255, 255),
+ "red": RGBColor(255, 0, 0),
+ "green": RGBColor(0, 128, 0),
+ "blue": RGBColor(0, 0, 255),
+ "yellow": RGBColor(255, 255, 0),
+ "cyan": RGBColor(0, 255, 255),
+ "magenta": RGBColor(255, 0, 255),
+ "gray": RGBColor(128, 128, 128),
+ "grey": RGBColor(128, 128, 128),
+ "orange": RGBColor(255, 165, 0),
+ "purple": RGBColor(128, 0, 128),
+ "brown": RGBColor(165, 42, 42),
+ "pink": RGBColor(255, 192, 203),
+ "navy": RGBColor(0, 0, 128),
+ "teal": RGBColor(0, 128, 128),
+ "olive": RGBColor(128, 128, 0),
+ "maroon": RGBColor(128, 0, 0),
+ "silver": RGBColor(192, 192, 192),
+ "lime": RGBColor(0, 255, 0),
+ "aqua": RGBColor(0, 255, 255),
+ "fuchsia": RGBColor(255, 0, 255),
+ }
+
+ def parse_color(self, color_str: str) -> Optional[RGBColor]:
+ """
+ Parse color string and return RGBColor
+
+ Supports:
+ - Hex: #RRGGBB, #RGB
+ - RGB: rgb(r, g, b)
+ - RGBA: rgba(r, g, b, a) - alpha ignored
+ - Named colors: red, blue, etc.
+ """
+ if not color_str:
+ return None
+
+ color_str = color_str.strip().lower()
+
+ # Named colors
+ if color_str in self.named_colors:
+ return self.named_colors[color_str]
+
+ # Hex colors
+ hex_match = re.match(r"#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})", color_str)
+ if hex_match:
+ r, g, b = [int(x, 16) for x in hex_match.groups()]
+ return RGBColor(r, g, b)
+
+ # Short hex colors
+ short_hex_match = re.match(r"#([0-9a-f])([0-9a-f])([0-9a-f])", color_str)
+ if short_hex_match:
+ r, g, b = [int(x * 2, 16) for x in short_hex_match.groups()]
+ return RGBColor(r, g, b)
+
+ # RGB colors
+ rgb_match = re.match(r"rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)", color_str)
+ if rgb_match:
+ r, g, b = [int(x) for x in rgb_match.groups()]
+ return RGBColor(r, g, b)
+
+ # RGBA colors (ignore alpha)
+ rgba_match = re.match(
+ r"rgba\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*[\d.]+\s*\)", color_str
+ )
+ if rgba_match:
+ r, g, b = [int(x) for x in rgba_match.groups()[:3]]
+ return RGBColor(r, g, b)
+
+ # HSL colors (basic conversion)
+ hsl_match = re.match(r"hsl\(\s*(\d+)\s*,\s*(\d+)%\s*,\s*(\d+)%\s*\)", color_str)
+ if hsl_match:
+ h, s, l = [int(x) for x in hsl_match.groups()]
+ return self._hsl_to_rgb(h, s, l)
+
+ return None
+
+ def _hsl_to_rgb(self, h: int, s: int, l: int) -> RGBColor:
+ """Convert HSL color to RGB (simplified)"""
+ # Normalize values
+ h = h % 360
+ s = max(0, min(100, s)) / 100
+ l = max(0, min(100, l)) / 100
+
+ # Simplified conversion
+ c = (1 - abs(2 * l - 1)) * s
+ x = c * (1 - abs((h / 60) % 2 - 1))
+ m = l - c / 2
+
+ if 0 <= h < 60:
+ r, g, b = c, x, 0
+ elif 60 <= h < 120:
+ r, g, b = x, c, 0
+ elif 120 <= h < 180:
+ r, g, b = 0, c, x
+ elif 180 <= h < 240:
+ r, g, b = 0, x, c
+ elif 240 <= h < 300:
+ r, g, b = x, 0, c
+ else:
+ r, g, b = c, 0, x
+
+ r = int((r + m) * 255)
+ g = int((g + m) * 255)
+ b = int((b + m) * 255)
+
+ return RGBColor(r, g, b)
diff --git a/filewarp/core/html/utils/validation.py b/filewarp/core/html/utils/validation.py
new file mode 100644
index 0000000..4a0d2d6
--- /dev/null
+++ b/filewarp/core/html/utils/validation.py
@@ -0,0 +1,93 @@
+"""
+Validation utilities for the converter
+"""
+
+import os
+import re
+from pathlib import Path
+
+
+def validate_html(html_content: str) -> bool:
+ """
+ Validate HTML content
+
+ Args:
+ html_content: HTML string to validate
+
+ Returns:
+ bool: True if valid
+
+ Raises:
+ ValueError: If HTML content is invalid
+ """
+ if not html_content or not isinstance(html_content, str):
+ raise ValueError("HTML content must be a non-empty string")
+
+ if len(html_content.strip()) == 0:
+ raise ValueError("HTML content cannot be empty or whitespace only")
+
+ # Basic check for HTML tags
+ if not re.search(r"<[^>]+>", html_content):
+ raise ValueError("HTML content must contain valid HTML tags")
+
+ return True
+
+
+def validate_file_path(file_path: str, file_type: str = "input") -> bool:
+ """
+ Validate file path
+
+ Args:
+ file_path: Path to validate
+ file_type: Type of file ('input' or 'output')
+
+ Returns:
+ bool: True if valid
+
+ Raises:
+ ValueError: If file path is invalid
+ FileNotFoundError: If input file doesn't exist
+ """
+ if (
+ not file_path
+ or not isinstance(file_path, str)
+ and not isinstance(file_path, Path)
+ ):
+ raise ValueError(f"{file_type} file path must be a non-empty string")
+
+ if file_type == "input":
+ if not os.path.exists(file_path):
+ raise FileNotFoundError(f"Input file not found: {file_path}")
+
+ if not os.path.isfile(file_path):
+ raise ValueError(f"Input path is not a file: {file_path}")
+
+ elif file_type == "output":
+ output_dir = os.path.dirname(file_path)
+ if output_dir and not os.path.exists(output_dir):
+ try:
+ os.makedirs(output_dir)
+ except OSError as e:
+ raise ValueError(f"Cannot create output directory: {e}")
+
+ # Check file extension
+ if not file_path.lower().endswith((".html", ".htm", ".docx")):
+ raise ValueError(f"File must have .html, .htm, or .docx extension: {file_path}")
+
+ return True
+
+
+def validate_css(css_content: str) -> bool:
+ """
+ Validate CSS content
+
+ Args:
+ css_content: CSS string to validate
+
+ Returns:
+ bool: True if valid
+ """
+ if not css_content or not isinstance(css_content, str):
+ raise ValueError("CSS content must be a non-empty string")
+
+ return True
diff --git a/filewarp/core/image/core.py b/filewarp/core/image/core.py
new file mode 100644
index 0000000..1b2ebb1
--- /dev/null
+++ b/filewarp/core/image/core.py
@@ -0,0 +1,770 @@
+import shutil
+from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter
+import re
+from pathlib import Path
+from docx.shared import Inches, Mm
+from docx import Document
+import os
+import sys
+from tqdm import tqdm
+from PIL import Image
+import cv2
+from typing import List, Tuple, Union, Optional
+from ...utils.simple import logger
+from ...utils.decorators import Decorators
+from ...utils.formats import SUPPORTED_IMAGE_FORMATS
+from ...utils.file_utils import modify_filename_if_exists, DirectoryScanner
+from ...utils.colors import fg, rs
+
+RESET = rs
+
+
+class ImageCompressor:
+ def __init__(self, input_image_path):
+ self.input_image_path = input_image_path
+
+ def resize_image(self, target_size):
+ try:
+ input_image_path = self.input_image_path
+ ext = input_image_path[-3:]
+ output_image_path = (
+ os.path.splitext(input_image_path)[0] + f"_resized.{ext}"
+ )
+
+ original_image = Image.open(input_image_path)
+ original_size = original_image.size
+ size = os.path.getsize(input_image_path)
+ print(f"Original image size {fg.YELLOW}{size / 1000_000:.2f}MiB{RESET}")
+
+ # Calculate the aspect ratio of the original image
+ aspect_ratio = original_size[0] / original_size[1]
+
+ # Convert the target sixze to bytes
+ tz = int(target_size[:-2])
+ if target_size[-2:].lower() == "mb":
+ target_size_bytes = tz * 1024 * 1024
+ elif target_size[-2:].lower() == "kb":
+ target_size_bytes = tz * 1024
+ else:
+ logger.warning(
+ f"Invalid units. Please use either {fg.BMAGENTA}'MB'{RESET}\
+ or {fg.BMAGENTA}'KB'{RESET}"
+ )
+
+ # Calculate the new dimensions based on the target size
+ new_width, new_height = ImageCompressor.calculate_new_dimensions(
+ original_size, aspect_ratio, target_size_bytes
+ )
+ resized_image = original_image.resize((new_width, new_height))
+ resized_image.save(output_image_path, optimize=True, format="png")
+ t_size = os.path.getsize(output_image_path) / 1000_000
+
+ except KeyboardInterrupt:
+ print("\nQuit⏹️")
+ sys.exit(1)
+ except KeyError:
+ print("KeyError")
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+ def calculate_new_dimensions(original_size, aspect_ratio, target_size_bytes):
+ try:
+ # Calculate the new dimensions based on the target size in bytes
+ original_size_bytes = (
+ original_size[0] * original_size[1] * 3
+ ) # Assuming 24-bit color depth
+ scale_factor = (target_size_bytes / original_size_bytes) ** 0.5
+
+ new_width = int(original_size[0] * scale_factor)
+ new_height = int(original_size[1] * scale_factor)
+
+ return new_width, new_height
+ except KeyboardInterrupt:
+ print("\nQuit⏹️")
+ sys.exit(1)
+ except KeyError:
+ print("KeyError")
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+
+class ImageConverter:
+ """Convert images file to from one format to another"""
+
+ def __init__(self, input_file, out_format):
+ self.input_file = input_file
+ self.out_format = out_format
+
+ def preprocess(self) -> list:
+ try:
+ files_to_process = []
+
+ if os.path.isfile(self.input_file):
+ files_to_process.append(self.input_file)
+ elif os.path.isdir(self.input_file):
+ if os.listdir(self.input_file) is None:
+ print("Cannot work with empty folder")
+ sys.exit(1)
+ for file in os.listdir(self.input_file):
+ file_path = os.path.join(self.input_file, file)
+ if os.path.isfile(file_path):
+ files_to_process.append(file_path)
+
+ return files_to_process
+ except FileNotFoundError:
+ print("File not found❕")
+ sys.exit(1)
+
+ def convert_image(self) -> os.PathLike:
+ try:
+ input_list = self.preprocess()
+ out_f = self.out_format.upper()
+ out_f = "JPEG" if out_f == "JPG" else out_f
+ input_list = [
+ item
+ for item in input_list
+ if any(
+ item.lower().endswith(ext)
+ for ext in SUPPORTED_IMAGE_FORMATS.values()
+ )
+ ]
+
+ for file in tqdm(input_list, unit="files"):
+ if out_f.upper() in SUPPORTED_IMAGE_FORMATS:
+ _ = os.path.splitext(file)[0]
+ output_filename = _ + SUPPORTED_IMAGE_FORMATS[out_f].lower()
+ else:
+ print("Unsupported output format")
+ sys.exit(1)
+ """Load the image using OpenCV: """
+ img = cv2.imread(file)
+ """Convert the OpenCV image to a PIL image: """
+ pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+
+ pil_img.save(output_filename, out_f)
+
+ return output_filename
+ except KeyboardInterrupt:
+ print("\nQuit❕")
+ sys.exit(1)
+ except AssertionError:
+ print("Assertion failed.")
+ except KeyError:
+ print(
+ f"{fg.RED}ERROR:\tPending Implementation for{fg.ICYAN} {out_f} {fg.BWHITE}format{RESET}"
+ )
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+
+class GrayscaleConverter:
+ """
+ Class for converting images to grayscale and saving the processed output.
+
+ Attributes:
+ input_obj (Optional[Union[list[str], str, os.PathLike]]): Input file(s) or directory.
+ output_file (Optional[Union[list[str], str, os.PathLike]]): Output file path or directory.
+ """
+
+ def __init__(
+ self,
+ input_obj: Union[List[str], Tuple[str], str, os.PathLike],
+ output_file: Optional[Union[list[str], str, os.PathLike]] = None,
+ ):
+ """
+ Initializes the GrayscaleConverter object.
+
+ Args:
+ input_obj: Input file(s) or directory.
+ output_file: Output file path or directory.
+ """
+ self.input_obj = input_obj
+ self.output_file = output_file
+
+ def get_output_file(
+ self, image_path: Optional[Union[str, os.PathLike]] = None
+ ) -> Union[str, os.PathLike]:
+ """
+ Computes the correct output file path for a given input file.
+
+ Args:
+ image_path: Path to the input file.
+
+ Returns:
+ The computed output file path.
+ """
+ if self.output_file and self.output_file.endswith(
+ tuple(SUPPORTED_IMAGE_FORMATS.values())
+ ):
+ return os.path.abspath(self.output_file)
+ if self.output_file:
+ return os.path.abspath(os.path.splitext(self.output_file)[0] + ".png")
+ if image_path:
+ return os.path.abspath(
+ os.path.splitext(os.path.basename(image_path))[0] + ".png"
+ )
+ return "default_output.txt"
+
+ def run(self):
+ """
+ Runs the image to grayscale conversion operation on the input files.
+
+ Applies the for_loop_decorator to process each image in the input list.
+ """
+ file_list = DirectoryScanner(self.input_obj).run()
+
+ @Decorators().for_loop_decorator(file_list)
+ def process_image(self, image_path):
+ """Processes a single image, converting it to grayscale and saving."""
+ try:
+ img = cv2.imread(image_path)
+ if img is None:
+ raise FileNotFoundError(f"Could not read image: {image_path}")
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ _, thresh = cv2.threshold(
+ gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+ )
+ self.save_pil_image(thresh, image_path)
+ except FileNotFoundError as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+ except Exception as e:
+ raise
+ logger.error(f"An unexpected error occurred: {fg.RED}{e}{RESET}")
+
+ process_image(self)
+
+ def save_pil_image(self, thresh, image_path):
+ """
+ Saves a NumPy array representing a grayscale image as a PIL Image.
+
+ Args:
+ thresh: The NumPy array representing the grayscale image.
+ image_path: The path of the original image, used to derive the output filename.
+ """
+ try:
+ img_pil = Image.fromarray(thresh)
+ filename = self.get_output_file(image_path)
+ filename = modify_filename_if_exists(filename)
+ img_pil.save(filename)
+ except Exception as e:
+ raise
+ logger.error(f"Unable to save the image: {fg.RED}{e}{RESET}")
+
+
+class ImageDocxConverter:
+ """
+ A class for converting images to DOCX documents.
+ """
+
+ def __init__(
+ self,
+ image_list: Union[Tuple[str], List[str]] = None,
+ input_dir: Union[str, os.PathLike] = None,
+ output_path: Union[str, os.PathLike] = None,
+ image_size: Tuple[float, float] = (6, 8), # Default to 6x8 inches
+ margin_mm: float = 25, # Default margin of 25mm (approx 1 inch)
+ ) -> None:
+ """
+ Initializes the ImageToDocxConverter object.
+
+ Args:
+ output_path: Path to save the output DOCX file + the file name e.g ~/Document/output.docx.
+ filename: Name of the output DOCX file.
+ image_size: Tuple (width, height) in inches.
+ margin_mm: Margin in millimeters.
+ """
+ self.image_list = image_list
+ self.input_dir = input_dir
+ self.output_path = output_path if output_path else self.ensure_output_file()
+ self.image_size = image_size
+ self.margin_mm = margin_mm
+ self.document = Document() # Create a new document object filename
+
+ # Set document margins in the constructor
+ sections = self.document.sections
+ for section in sections:
+ section.top_margin = Mm(self.margin_mm)
+ section.bottom_margin = Mm(self.margin_mm)
+ section.left_margin = Mm(self.margin_mm)
+ section.right_margin = Mm(self.margin_mm)
+ self.create_output_directory() # Create output directory in constructor
+
+ def ensure_output_file(self) -> os.PathLike:
+ file_name = "filewarp_image2docx.docx"
+ if self.input_dir:
+ base_dir = self.input_dir
+ else:
+ base_dir = Path(self.image_list[0]).parent
+
+ file_path = os.path.join(base_dir, file_name)
+
+ return file_path
+
+ def create_output_directory(self) -> None:
+ """
+ Creates the output directory if it does not exist.
+ """
+ Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
+
+ def get_valid_images(self, image_paths: List[str]) -> List[str]:
+ """
+ Filters the list of image paths, returning only those with supported formats.
+
+ Args:
+ image_paths: A list of file paths to images.
+
+ Returns:
+ A list of file paths to valid images.
+ """
+ valid_images = []
+ for image_path in image_paths:
+ try:
+ if Image.open(image_path).format.lower() in [
+ _formats[1:] for _formats in SUPPORTED_IMAGE_FORMATS.values()
+ ]:
+ valid_images.append(image_path)
+ else:
+ print(
+ f"{fg.MAGENTA}Skipping unsupported image format: {fg.CYAN}{image_path}{RESET}"
+ )
+ except Exception as e:
+ print(
+ f"{fg.RED}Error processing image {fg.YELLOW}{image_path} - {fg.RED} {e}{RESET}"
+ )
+ return valid_images
+
+ def convert_images_to_docx(self, image_paths: List[str]) -> os.PathLike:
+ """
+ Converts a list of images to a single DOCX document.
+
+ Args:
+ image_paths: List of image file paths.
+ """
+
+ valid_images = self.get_valid_images(image_paths)
+ if not valid_images:
+ print("No valid images to convert.")
+ return
+
+ for image_path in valid_images:
+ try:
+ # Add a paragraph for each image
+ paragraph = self.document.add_paragraph()
+ run = paragraph.add_run()
+ run.add_picture(
+ image_path,
+ width=Inches(self.image_size[0]),
+ height=Inches(self.image_size[1]),
+ )
+ # Add a page break after each image, except the last one
+ if image_path != valid_images[-1]:
+ self.document.add_page_break()
+ except Exception as e:
+ print(
+ f"{fg.RED}Error processing image {fg.YELLOW}{image_path}:{fg.RED} {e}{RESET}"
+ )
+
+ docx_file_path = (
+ self.output_path
+ if self.output_path.endswith(("docx", "doc"))
+ else f"{self.output_path}.docx"
+ )
+ self.document.save(docx_file_path)
+ return docx_file_path
+
+ def convert_images_in_directory(self, input_dir, output_path) -> os.PathLike:
+ """
+ Converts all images in a directory to a PDF.
+
+ Args:
+ input_dir (str): The directory containing the images.
+ output_path (str): The path to save the generated Word File.
+ file_extensions (tuple, optional): Tuple of image file extensions to include.
+ """
+
+ if not os.path.exists(input_dir):
+ raise FileNotFoundError(f"Directory not found: {input_dir}")
+
+ image_paths = sorted(
+ [os.path.join(input_dir, f) for f in os.listdir(input_dir)]
+ )
+
+ image_paths = self.get_valid_images(image_paths)
+
+ if not image_paths:
+ raise ValueError(f"No images found in directory: {input_dir}")
+
+ self.create_pdf_from_images(image_paths, output_path)
+ return output_path
+
+ def run(self) -> os.PathLike:
+ """
+ Runs the conversion process.
+
+ Args:
+ image_paths: List of image file paths to convert.
+ """
+ if not any((self.image_list, self.input_dir)):
+ print("No image paths provided.")
+ sys.exit()
+
+ if self.image_list and self.output_path:
+ if all(os.path.exists(img) for img in self.image_list):
+ docx_file_path = self.convert_images_to_docx(self.image_list)
+ elif self.input_dir and self.output_path:
+ if os.path.exists(self.input_dir):
+ docx_file_path = self.convert_images_in_directory(
+ self.input_dir, self.output_path
+ )
+
+ if docx_file_path:
+ # print(f"{fg.GREEN_RG}Successfully created DOCX: {fg.BLUE}{docx_file_path}{RESET}")
+ pass
+
+ return docx_file_path
+
+ def cli(self, args: List[str]) -> None:
+ """
+ Main function to parse command line arguments and perform the conversion.
+
+ Args:
+ args: List of command line arguments.
+ """
+ if not args or "-h" in args or "--help" in args:
+ print(
+ """
+ Usage: python image_to_docx.py [options] image1 image2 ... imageN
+
+ Options:
+ -h, --help show this help message and exit
+ -o, --output PATH path to save the output DOCX file (default: current directory)
+ -n, --name FILENAME name of the output DOCX file (default: output_document)
+ -s, --size WIDTHxHEIGHT size of images in inches (e.g., 6x8) (default: 6x8)
+ -m, --margin MARGIN_MM margin in millimeters (default: 25)
+ """
+ )
+ sys.exit()
+
+ image_paths = []
+ output_path = "." # Current directory
+ filename = "output_document"
+ image_size = (6, 8) # Default 6x8 inches
+ margin_mm = 25
+
+ i = 1
+ while i < len(args):
+ if args[i] in ("-o", "--output"):
+ output_path = args[i + 1]
+ i += 2
+ elif args[i] in ("-n", "--name"):
+ filename = args[i + 1]
+ i += 2
+ elif args[i] in ("-s", "--size"):
+ try:
+ size_str = args[i + 1]
+ width, height = map(float, size_str.split("x"))
+ image_size = (width, height)
+ except ValueError:
+ print("Invalid size format. Please use WIDTHxHEIGHT (e.g., 6x8).")
+ sys.exit(1)
+ i += 2
+ elif args[i] in ("-m", "--margin"):
+ try:
+ margin_mm = float(args[i + 1])
+ except ValueError:
+ print("Invalid margin format. Please provide a numeric value.")
+ sys.exit(1)
+ i += 2
+ else:
+ if not args[i].startswith("-"):
+ image_paths.append(args[i])
+ i += 1
+ else:
+ print(f"Unknown argument: {args[i]}")
+ sys.exit(1)
+
+ converter = ImageDocxConverter(output_path, filename, image_size, margin_mm)
+ converter.run(image_paths)
+
+
+class ImagePdfConverter:
+ """
+ A class for converting images to PDF.
+ """
+
+ def __init__(
+ self,
+ image_list: Union[List[str], Tuple[str]] = None,
+ input_dir=None,
+ output_pdf_path=None,
+ page_size=letter,
+ order: bool = False,
+ base: bool = False,
+ walk: bool = False,
+ clean: bool = False,
+ ):
+ self.image_list = image_list
+ self.input_dir = input_dir
+ self.page_size = page_size
+ self.order = order
+ self.base = base
+ self.walk = walk
+ self.clean = clean
+ self.output_pdf_path = (
+ output_pdf_path if output_pdf_path else self.ensure_output_file()
+ )
+
+ def ensure_output_file(self) -> os.PathLike:
+ file_name = "filewarp_image2pdf.pdf"
+ if self.input_dir:
+ base_dir = self.input_dir
+ if self.base:
+ one_file = os.listdir(self.input_dir)[0]
+ base_name, ext = os.path.splitext(one_file)
+ if "_img_" in base_name:
+ base_name = base_name.split("_img_")[0]
+ file_name = base_name + ".pdf"
+ else:
+ file_name = self.input_dir.split("_imgs")[0] + ".pdf"
+ else:
+ base_dir = Path(self.image_list[0]).parent
+
+ file_path = os.path.join(base_dir, file_name)
+
+ return file_path
+
+ def _clean(self, dirs: list):
+ for d in dirs:
+ abspath = os.path.abspath(d)
+ # print(f"{fg.BWHITE}Nuke: {fg.BYELLOW}{abspath}{fg.RESET}")
+ if (
+ os.path.exists(d) and os.path.isdir(d)
+ # and Path(d).is_relative_to(os.path.expanduser("~"))
+ ):
+ shutil.rmtree(abspath)
+
+ def create_pdf_from_images(
+ self, image_paths, output_pdf_path, resize_to_fit=True
+ ) -> os.PathLike:
+ """
+ Creates a PDF from a list of image paths.
+
+ Args:
+ image_paths (list): A list of image file paths.
+ output_pdf_path (str): The path to save the generated PDF.
+ resize_to_fit (bool, optional): Whether to resize images to fit the page. Defaults to True.
+
+ Raises:
+ FileNotFoundError: If any image path is invalid.
+ ValueError: If image_paths is empty or contains non-image files.
+ Exception: for pillow image opening errors, or reportlab canvas errors.
+ """
+
+ if not image_paths:
+ raise ValueError("Image paths list is empty.")
+
+ for image_path in image_paths:
+ if not os.path.exists(image_path):
+ raise FileNotFoundError(f"Image not found: {image_path}")
+ try:
+ Image.open(image_path)
+ except Exception as e:
+ raise ValueError(f"Error opening image {image_path}: {e}")
+
+ try:
+ c = canvas.Canvas(output_pdf_path, pagesize=self.page_size)
+ width, height = self.page_size
+
+ for image_path in image_paths:
+ img = Image.open(image_path)
+ img_width, img_height = img.size
+
+ if resize_to_fit:
+ ratio = min(width / img_width, height / img_height)
+ new_width = img_width * ratio
+ new_height = img_height * ratio
+ x = (width - new_width) / 2
+ y = (height - new_height) / 2
+ else:
+ x = (width - img_width) / 2
+ y = (height - img_height) / 2
+ new_width = img_width
+ new_height = img_height
+
+ c.drawImage(
+ image_path,
+ x,
+ y,
+ width=new_width,
+ height=new_height,
+ preserveAspectRatio=True,
+ )
+ c.showPage()
+
+ c.save()
+
+ return output_pdf_path
+ except Exception as e:
+ raise Exception(f"Error creating PDF: {e}")
+
+ @staticmethod
+ def ensure_format(input_image) -> os.PathLike:
+ from ..imagepy.converter import ImageConverter
+
+ converter = ImageConverter(input_image, "png")
+ output_image = converter.convert_image()
+ return output_image
+
+ def extract_img_number(self, filename):
+ match = re.search(r"_img_(\d+)", filename)
+ return int(match.group(1)) if match else float("inf")
+
+ def _sort(self, obj, ext):
+ if self.order:
+ if isinstance(obj, list):
+ return sorted(
+ obj,
+ key=lambda f: self.extract_img_number(f),
+ )
+ return sorted(
+ [
+ os.path.join(obj, f)
+ for f in os.listdir(obj)
+ if f.lower().endswith(ext)
+ ],
+ key=lambda f: self.extract_img_number(f),
+ )
+ else:
+ return sorted(
+ [
+ os.path.join(obj, f)
+ for f in os.listdir(obj)
+ if f.lower().endswith(ext)
+ ]
+ )
+
+ def convert_images_in_directory_recursive(
+ self, input_dir, output_pdf_path, file_extensions=(".jpg", ".jpeg", ".png")
+ ):
+ """
+ Recursively walks through a directory and its subdirectories,
+ converting images in each folder into a separate PDF.
+
+ Args:
+ input_dir (str): Root directory containing images.
+ output_root (str): Directory to save the generated PDFs.
+ file_extensions (tuple): Supported image extensions.
+ """
+ try:
+ if not os.path.exists(input_dir):
+ raise FileNotFoundError(f"Directory not found: {input_dir}")
+
+ # if not os.path.exists(output_root):
+ # os.makedirs(output_root)
+ dclean = []
+ for root, _, files in os.walk(input_dir):
+ image_paths = [
+ os.path.join(root, f)
+ for f in files
+ if f.lower().endswith(file_extensions)
+ ]
+
+ if not image_paths:
+ continue # No valid images in this directory
+
+ # Optional: sort images with your custom logic
+ image_paths = self._sort(image_paths, file_extensions)
+
+ # Ensure formats are valid
+ for index, image in enumerate(image_paths):
+ if not image.lower().endswith(file_extensions):
+ image_paths[index] = self.ensure_format(image)
+
+ # Create a relative PDF name based on the subdir structure
+ fname = os.path.split(root)[-1].split("_imgs")[0] + ".pdf"
+ relative_path = os.path.join(
+ os.path.dirname((os.path.relpath(root, input_dir))), fname
+ )
+ # Host dir for images to be cleaned is clean is on
+ dname = os.path.relpath(root, input_dir)
+ dclean.append(dname)
+
+ # pdf_name = relative_path.replace(os.sep, "_") + ".pdf"
+ # pdf_output_path = os.path.join(output_root, pdf_name)
+
+ # Create the PDF for this folder
+ self.create_pdf_from_images(image_paths, relative_path)
+ print(f"{fg.BWHITE}Created PDF{RESET}: {relative_path}")
+ if self.clean:
+ self._clean(dclean)
+ except Exception as e:
+ print(f"\033[31m{e}\033[0m")
+ sys.exit(1)
+
+ def convert_images_in_directory(
+ self, input_dir, output_pdf_path, file_extensions=(".jpg", ".jpeg", ".png")
+ ) -> os.PathLike:
+ try:
+ """
+ Converts all images in a directory to a PDF.
+
+ Args:
+ input_dir (str): The directory containing the images.
+ output_pdf_path (str): The path to save the generated PDF.
+ file_extensions (tuple, optional): Tuple of image file extensions to include.
+ """
+
+ if not os.path.exists(input_dir):
+ raise FileNotFoundError(f"Directory not found: {input_dir}")
+
+ image_paths = self._sort(input_dir, ext=file_extensions)
+
+ for index, image in enumerate(image_paths):
+ if not image.endswith(file_extensions):
+ image_paths[index] = self.ensure_format(image)
+
+ if not image_paths:
+ raise ValueError(
+ f"\033[31mNo images found in directory:\033[1m {input_dir}\033[0m"
+ )
+
+ self.create_pdf_from_images(image_paths, output_pdf_path)
+ return output_pdf_path
+ except ValueError as e:
+ print(e)
+ sys.exit(1)
+
+ def run(self) -> os.PathLike:
+ """
+ Runs the PDF creation based on the object's initialization parameters.
+ """
+ if self.image_list and self.output_pdf_path:
+ if all(os.path.exists(img) for img in self.image_list):
+ output_pdf_path = self.create_pdf_from_images(
+ self.image_list, self.output_pdf_path
+ )
+ # print(f"{fg.GREEN}PDF created successfully from directory!{RESET}")
+ # print(f"{fg.GREEN}Output:{RESET} {fg.BLUE}{output_pdf_path}{RESET}")
+ else:
+ print(f"{fg.RED}One or more images in the list do not exist.{RESET}")
+ elif self.input_dir and self.output_pdf_path:
+ if os.path.exists(self.input_dir):
+ if self.walk:
+ output_pdf_path = self.convert_images_in_directory_recursive(
+ self.input_dir, self.output_pdf_path
+ )
+ else:
+ output_pdf_path = self.convert_images_in_directory(
+ self.input_dir, self.output_pdf_path
+ )
+ # print(f"{fg.GREEN}PDF created successfully from directory!{RESET}")
+ # print(f"{fg.BWHITE}Output:{RESET} {fg.BLUE}{output_pdf_path}{RESET}")
+ else:
+ print(f"Directory {fg.YELLOW}{self.input_dir}{RESET} does not exist.")
+ else:
+ print(
+ "Please provide either image_list and output_pdf_path or input_dir and output_pdf_path during object instantiation."
+ )
+ return
+ return output_pdf_path
diff --git a/filewarp/core/image/extractor.py b/filewarp/core/image/extractor.py
new file mode 100644
index 0000000..c12872c
--- /dev/null
+++ b/filewarp/core/image/extractor.py
@@ -0,0 +1,269 @@
+import sys
+import fitz # PyMuPDF for PDF
+from docx import Document
+from PIL import Image
+from io import BytesIO
+from typing import List, Union, Tuple
+from pathlib import Path
+import os
+from ...utils.colors import fg, rs
+from ...utils.file_utils import dirbuster
+
+RESET = rs
+
+
+class ImageExtractor:
+ """
+ Base class for extracting images from document files.
+ """
+
+ def __init__(self, output_path: str = None, tsize: tuple = (20, 20)) -> None:
+ """
+ Initializes the ImageExtractor object.
+
+ Args:
+ output_path: Path to save the extracted images.
+ """
+ base_path = (
+ os.path.join(output_path, "FilemacExctracts")
+ if output_path
+ else os.path.join(os.path.abspath(os.getcwd()), "FilemacExctracts")
+ )
+ self.output_path = base_path
+ self.tsize = tsize
+ self.output_base = None
+
+ def _extract_images(self, file_path: str) -> List[Image.Image]:
+ """
+ Extracts images from the given file. This is a placeholder
+ for the actual extraction logic, to be implemented by
+ subclasses.
+
+ Args:
+ file_path: Path to the document file.
+
+ Returns:
+ A list of PIL Image objects. Returns an empty list if no images
+ are found or if there is an error.
+ """
+ raise NotImplementedError("Subclasses must implement this method")
+
+ def extract_and_save_images(self, file_path: str) -> None:
+ """
+ Extracts and saves images from the given file.
+
+ Args:
+ file_path: Path to the document file.
+ """
+ images = self._extract_images(file_path)
+ self.output_base = os.path.split(file_path)[0]
+ if not images:
+ print(f"No images found in {file_path}")
+ return
+
+ base_filename = Path(file_path).stem
+ self._save_images(images, base_filename)
+
+ def is_page_sized_image(self, img, target_size=(595, 842), tolerance=1):
+ """Check if image is approximately page-sized (default: A4 at 72 DPI)."""
+ img_width, img_height = img.size
+ target_width, target_height = self.tsize if self.tsize else target_size
+
+ within_width = (
+ img_width > target_width
+ ) # abs(img_width - target_width) >= target_width * tolerance
+ within_height = (
+ img_height > target_height
+ # abs(img_height - target_height) >= target_height * tolerance
+ )
+
+ return within_width and within_height
+
+ def _save_images(self, images: List[Image.Image], base_filename: str) -> None:
+ """
+ Saves the extracted images to the output directory.
+
+ Args:
+ images: A list of PIL Image objects.
+ base_filename: The base filename to use when saving images (e.g., 'page_1').
+ """
+ self.output_path = os.path.join(self.output_base, f"{base_filename}_imgs")
+ os.makedirs(self.output_path, exist_ok=True) # Ensure directory exists
+
+ for i, img in enumerate(images):
+ try:
+ if self.tsize and not self.is_page_sized_image(img):
+ print(
+ f"Skipping image {i + 1}: ({fg.CYAN}{img.size}{RESET}) <= {fg.BLUE}{self.tsize}{RESET}"
+ )
+ continue
+
+ # Generate a unique filename for each image
+ img_format = img.format or "PNG" # Default to PNG if format is None
+ safe_filename = f"{base_filename}_img_{i + 1}.{img_format.lower()}"
+
+ img_path = Path(self.output_path) / safe_filename
+ img.save(img_path)
+ print(f"Saved image: {fg.GREEN}{img_path}{RESET}")
+ except Exception as e:
+ raise
+ print(f"Error saving image {i + 1} from {base_filename}: {e}")
+
+
+class PdfImageExtractor(ImageExtractor):
+ """
+ Extracts images from PDF files.
+ """
+
+ def __init__(self, output_path, size):
+ super().__init__(
+ output_path, size or (20, 20)
+ ) # Call Parent.__init__ with value
+
+ def _extract_images(self, file_path: str) -> List[Image.Image]:
+ """
+ Extracts images from a PDF file using PyMuPDF.
+
+ Args:
+ file_path: Path to the PDF file.
+
+ Returns:
+ A list of PIL Image objects.
+ """
+ print(f"{fg.BWHITE}File: {fg.BLUE}{file_path}{RESET}")
+ images: List[Image.Image] = []
+ try:
+ pdf_document = fitz.open(file_path)
+ for page_index in range(len(pdf_document)):
+ page = pdf_document.load_page(page_index)
+ image_list = page.get_images(full=True) # Get detailed image info
+ for img_index, img_info in enumerate(image_list):
+ xref = img_info[0] # Get the XREF of the image
+ base_image = pdf_document.extract_image(xref)
+ image_bytes = base_image["image"]
+ try:
+ pil_image = Image.open(BytesIO(image_bytes))
+ images.append(pil_image)
+ except Exception as e:
+ print(
+ f"Error processing image {img_index + 1} from PDF page {page_index + 1}: {e}"
+ )
+ pdf_document.close()
+ except Exception as e:
+ print(f"Error processing PDF file: {file_path} - {e}")
+ return images
+
+
+class DocxImageExtractor(ImageExtractor):
+ """
+ Extracts images from DOCX files.
+ """
+
+ def __init__(self, output_path, size):
+ super().__init__(
+ output_path, size or (20, 20)
+ ) # Call Parent.__init__ with value
+
+ def _extract_images(self, file_path: str) -> List[Image.Image]:
+ """
+ Extracts images from a DOCX file.
+
+ Args:
+ file_path: Path to the DOCX file.
+
+ Returns:
+ A list of PIL Image objects.
+ """
+ images: List[Image.Image] = []
+ try:
+ docx_document = Document(file_path)
+ for part in docx_document.part.rels.values():
+ if "image" in part.target_ref:
+ image_bytes = part.target_part.blob
+ try:
+ pil_image = Image.open(BytesIO(image_bytes))
+ images.append(pil_image)
+ except Exception as e:
+ print(f"Error processing image from DOCX: {e}")
+ except Exception as e:
+ print(f"Error processing DOCX file: {file_path} - {e}")
+ return images
+
+
+def process_files(
+ file_paths: Union[Tuple[str], List[str]],
+ output_path: str = os.getcwd(),
+ tsize: tuple = None,
+) -> None:
+ """
+ Processes the given files and extracts images from them.
+
+ Args:
+ file_paths: List of paths to the files to process.
+ output_path: Path to save the extracted images.
+ """
+ try:
+ for file_path in file_paths:
+ if os.path.isdir(file_path):
+ files = dirbuster(file_path)
+ process_files(files, tsize=tsize)
+ if file_path.lower().endswith(".pdf"):
+ extractor = PdfImageExtractor(output_path, tsize)
+ extractor.extract_and_save_images(file_path)
+ elif file_path.lower().endswith((".docx")):
+ extractor = DocxImageExtractor(output_path, tsize)
+ extractor.extract_and_save_images(file_path)
+ else:
+ print(f"Skipping unsupported file format: {file_path}")
+ except KeyboardInterrupt:
+ print("\nQuit")
+ sys.exit()
+
+
+def main(args: List[str]) -> None:
+ """
+ Main function to parse command line arguments and perform image extraction.
+
+ Args:
+ args: List of command line arguments.
+ """
+ if not args or "-h" in args or "--help" in args:
+ print(
+ """
+ Usage: python extract_images.py [options] file1 file2 ... fileN
+
+ Options:
+ -h, --help show this help message and exit
+ -o, --output PATH path to save the extracted images (default: extracted_images)
+ """
+ )
+ sys.exit()
+
+ file_paths = []
+ output_path = "extracted_images" # Default output path
+
+ i = 1
+ while i < len(args):
+ if args[i] in ("-o", "--output"):
+ output_path = args[i + 1]
+ i += 2
+ else:
+ if not args[i].startswith("-"):
+ file_paths.append(args[i])
+ i += 1
+ else:
+ print(f"Unknown argument: {args[i]}")
+ sys.exit(1)
+
+ file_paths.append(
+ "/home/skye/Downloads/KDEConnect/SPE 2304 Server Side Programming Year III Semester II.pdf"
+ )
+ if not file_paths:
+ print("No files provided for image extraction.")
+ sys.exit(1)
+
+ process_files(file_paths, output_path)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff --git a/filewarp/core/ocr.py b/filewarp/core/ocr.py
new file mode 100644
index 0000000..1ee6d70
--- /dev/null
+++ b/filewarp/core/ocr.py
@@ -0,0 +1,199 @@
+import logging
+import os
+import sys
+from typing import Union, List, Optional
+
+import cv2
+import pytesseract
+from PIL import Image
+from rich.progress import Progress
+from ..utils.colors import fg, bg, rs
+from ..utils.file_utils import modify_filename_if_exists, DirectoryScanner
+
+
+RESET = rs
+
+# Define constants for better readability and maintainability
+SUPPORTED_IMAGE_FORMATS = {"png", "jpg", "jpeg"}
+DEFAULT_CONFIG = "-l eng --oem 3 --psm 6"
+DEFAULT_SEPARATOR = "\n"
+
+# Configure logging at the module level
+logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
+logger = logging.getLogger(__name__)
+
+
+class ExtractText:
+ """
+ Extracts text from images using OCR, with options for file/directory input,
+ output file naming, and text separation.
+ """
+
+ def __init__(
+ self,
+ input_obj: Optional[Union[list[str], tuple[str], str, os.PathLike]],
+ sep: str = DEFAULT_SEPARATOR,
+ ):
+ """
+ Initializes the ExtractText object.
+
+ Args:
+ input_obj: Path to the image file or directory containing images.
+ sep: Separator to use when joining extracted text. Defaults to newline.
+ """
+ if not isinstance(input_obj, (str, list, os.PathLike)):
+ raise TypeError(
+ f"input_obj must be a string or os.PathLike, not {type(input_obj)}"
+ )
+ self.input_obj = input_obj
+ self.sep = sep
+ self.sep = (
+ "\n"
+ if self.sep == "newline"
+ else (
+ "\t"
+ if self.sep == "tab"
+ else (
+ " "
+ if self.sep == "space"
+ else ("" if self.sep == "none" else self.sep)
+ )
+ )
+ )
+
+ """
+ separator_map = {
+ "newline": "\n",
+ "tab": "\t",
+ "space": " ",
+ "none": "",
+ }
+
+ self.sep = separator_map.get(self.sep, self.sep)
+ """
+
+ def _process_image(self, image_path: str, output_file: str) -> str:
+ """
+ Extracts text from a single image and saves it to a file.
+
+ Args:
+ image_path: Path to the image file.
+ output_file: Path to the output text file.
+
+ Returns:
+ The extracted text. Returns an empty string on error.
+ """
+ try:
+ # Load image using OpenCV
+ img = cv2.imread(image_path)
+ if img is None:
+ raise ValueError(f"Could not read image: {image_path}")
+
+ # Preprocess image for better OCR results
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+ img_pil = Image.fromarray(thresh)
+
+ # Perform OCR using pytesseract
+ self.sep = (
+ self.sep.replace("\r\n", "\n")
+ .replace("\\n", "\n")
+ .replace("\r", "\n")
+ .replace("\r\t", "\t")
+ .replace("\\t", "\t")
+ )
+
+ text = pytesseract.image_to_string(img_pil, config=DEFAULT_CONFIG)
+ text = self.sep.join(text.splitlines()) # handle empty lines
+ logger.info("")
+ logger.info(f"Extracted text from {image_path}")
+ print(f"{fg.YELLOW}{text}{RESET}")
+
+ # Save text to file
+ with open(output_file, "w", encoding="utf-8") as file: # Specify encoding
+ file.write(text)
+ return text
+
+ except FileNotFoundError as e:
+ logger.error(f"File not found: {e}")
+ except IOError as e:
+ logger.error(f"IOError: {e}")
+ except pytesseract.TesseractError as e:
+ logger.error(f"Tesseract error: {e}")
+ except cv2.error as e:
+ logger.error(f"OpenCV error processing {image_path}: {e}")
+ except Exception as e:
+ logger.error(
+ f"An unexpected error occurred while processing {image_path}: {e}"
+ )
+
+ return "" # Return empty string on error
+
+ def run(
+ self, output_file: Optional[Union[list[str], str, os.PathLike]] = None
+ ) -> Optional[List[str]]:
+ """
+ Runs the OCR extraction process on the input file(s) or directory.
+
+ Args:
+ output_file: Optional path to a single output file. If provided, all
+ extracted text will be written to this file. If None, output
+ files will be generated based on input image names.
+
+ Returns:
+ A list of extracted texts, or None if no images were processed.
+ If output_file is provided, returns a list with a single string.
+ """
+
+ image_list = DirectoryScanner(self.input_obj).run()
+ num_images = len(image_list)
+ extracted_texts = []
+
+ if num_images == 0:
+ logger.warning("No images found to process.")
+ return None
+
+ try:
+ if output_file:
+ # Process all images and concatenate text into one output file
+ all_text = ""
+ # with Progress() as progress:
+ # task = progress.add_task(
+ # "[yellow]Extracting text...", total=num_images
+ # )
+ for image_path in image_list:
+ all_text += (
+ self._process_image(
+ image_path, os.path.splitext(output_file)[0] + ".txt"
+ )
+ + self.sep
+ )
+ # progress.update(task, advance=1)
+ with open(output_file, "w", encoding="utf-8") as f:
+ f.write(all_text)
+ return [all_text] # Return a list containing the combined text
+
+ else:
+ # Process each image individually, creating separate output files
+ with Progress() as progress:
+ task = progress.add_task(
+ "[yellow]Extracting text...", total=num_images
+ )
+ for image_path in image_list:
+ _output_file = (
+ os.path.splitext(os.path.basename(image_path))[0] + ".txt"
+ )
+ _output_file = modify_filename_if_exists(_output_file)
+ text = self._process_image(image_path, _output_file)
+ extracted_texts.append(text)
+ progress.update(task, advance=1)
+ return extracted_texts
+
+ except KeyboardInterrupt:
+ print(
+ f"\n[{bg.YELLOW}X{RESET}]Operation interrupted by {fg.UBLUE}user{RESET}.[/]"
+ )
+ sys.exit(0)
+ except Exception as e:
+ logger.error(f"An unexpected error occurred: {bg.RED}{e}{RESET}")
+ return None # Ensure None is returned on error
diff --git a/filewarp/core/pdf/core.py b/filewarp/core/pdf/core.py
new file mode 100644
index 0000000..790cb7b
--- /dev/null
+++ b/filewarp/core/pdf/core.py
@@ -0,0 +1,403 @@
+import os
+import subprocess
+import sys
+
+import PyPDF2
+from pdf2image import convert_from_path
+from PIL import Image # ImageSequence
+from tqdm.auto import tqdm
+from ...utils.simple import logger
+from ..document import DocumentConverter
+from ..exceptions import FilemacError, FileSystemError
+from ...utils.colors import fg, bg, rs
+from ..ocr import ExtractText
+
+
+RESET = rs
+DEFAULT_SEPARATOR = "\n"
+
+
+class PDF2LongImageConverter:
+ def __init__(self, pdf_file):
+ self.pdf_file = pdf_file
+
+ def preprocess(self):
+ ext = self.pdf_file.split(".")[-1].lower()
+ if ext == "pdf":
+ long_image = self.convert(self.pdf_file)
+ return long_image
+ if ext == "doc" or ext == "docx":
+ conv = DocumentConverter(self.pdf_file)
+
+ path = conv.word_to_pdf()
+ long_image = self.convert(path)
+ return long_image
+ elif ext == "odt":
+ return self.subprocess_executor()
+
+ def subprocess_executor(self):
+ # pdf_file = ext = doc.split('.')[0] + 'docx'
+ # logger.info(f"{fg.DCYAN}Invoked soffice ..{RESET}")
+ subprocess.call(
+ [
+ "soffice",
+ "--convert-to",
+ "pdf",
+ self.pdf_file,
+ "--outdir",
+ os.path.dirname(self.pdf_file),
+ ]
+ )
+ pdf_file = os.path.abspath(
+ os.path.dirname(self.pdf_file)
+ + "/"
+ + (self.pdf_file.split("/")[-1].split(".")[0])
+ + ".pdf"
+ )
+ long_image = self.convert(pdf_file)
+ return long_image
+
+ @staticmethod
+ def convert(pdf_file):
+ try:
+ # logger.info(f"{fg.BYELLOW}Read pdf{RESET}")
+ images = convert_from_path(pdf_file)
+ out_img = pdf_file[:-4] + ".png"
+ heights = [img.size[1] for img in images]
+ total_height = sum(heights)
+ max_width = max([img.size[0] for img in images])
+
+ # logger.info(f"{fg.DCYAN}Draw image ..{RESET}")
+ new_im = Image.new("RGB", (max_width, total_height))
+
+ y_offset = 0
+ for i, img in enumerate(images):
+ # print(f"{fg.BBLUE}{i}{RESET}", end="\r")
+ new_im.paste(img, (0, y_offset))
+ y_offset += img.size[1]
+ # logger.info(f"{fg.BYELLOW}Save dest: {fg.BMAGENTA}{out_img}{RESET}")
+ new_im.save(out_img)
+ # logger.info(f"{fg.BGREEN}Success😇✅{RESET}")
+ return out_img
+ except FileNotFoundError:
+ raise FileSystemError(f"{fg.RED}File not found!{RESET}")
+ except KeyboardInterrupt:
+ logger.DEBUG("\nQuit❕")
+ sys.exit()
+ except Exception as e:
+ raise FilemacError(f"{fg.RED}{e}{RESET}")
+
+
+class PageExtractor:
+ """
+ Extract pages specified by pange range from a pdf file and save them as a new file
+ Args:
+ Pdf -> pdf file to be operated on.
+ start -> Page in from which to start extraction (default 1)
+ stop -> Stop page for extraction default is last page (-1)
+ Range of pages to be extracted is given by Llimit and Ulimit inclusive
+ Returns:
+ outf-> the output file contsining the extracted pages
+ """
+
+ def __init__(
+ self,
+ pdf,
+ start: int = 1,
+ stop: int = -1,
+ ):
+ self.pdf = pdf
+ self.start = start # max(start - 1, 1)
+ self.stop = stop
+
+ # Normalize indexing
+ if self.start != 0:
+ self.start = self.start - 1
+
+ # Due to 0 indexing we wont subtract
+ # if self.stop:
+ # self.stop = self.stop - 1
+
+ if self.stop is None:
+ # Do not add due to 0 indexing
+ self.stop = int(self.start) + 1
+
+ self.outf = f"{pdf.split('.')[0]}_{start}_{self.stop}_extract.pdf"
+
+ def getPages(self):
+ """
+ Extract the the page range. Write the pages to new pdf file
+ if self.stop (Ulimit) == -1 all pages are extracted from the Llimit to the last Page
+ """
+ try:
+ reader = PyPDF2.PdfReader(self.pdf)
+
+ if self.stop == -1:
+ self.stop = len(reader.pages)
+
+ pdf_writer = PyPDF2.PdfWriter()
+ for page_num in range(self.start, self.stop):
+ # print(f"{fg.BBLUE}[📄]{RESET}{fg.DCYAN}Page {page_num + 1}{RESET}")
+ page = reader.pages[page_num]
+ pdf_writer.add_page(page)
+
+ # Write the merged PDF to the output file
+ with open(self.outf, "wb") as out_file:
+ pdf_writer.write(out_file)
+ # print(f"{fg.BBLUE}[+]{RESET} {fg.BWHITE}File {fg.BMAGENTA}{self.outf}{RESET}")
+ return self.outf
+ except KeyboardInterrupt:
+ print("\n [!] Quit")
+ exit(2)
+ except FileNotFoundError as e:
+ print(f"[{bg.BRED}-{RESET}] {fg.RED}{e}{RESET}")
+ except Exception as e:
+ print(e)
+ # raise
+
+ @staticmethod
+ def run(kwargs):
+ """
+ Args:
+ kwargs type: list - Contains Upper and lower limit (first and last page)
+ Returns:
+ None
+ """
+ if len(kwargs) > 2:
+ arg1, arg2, arg3 = kwargs
+ init = PageExtractor(arg1, int(arg2), int(arg3))
+ init.getPages()
+ elif len(kwargs) == 2:
+ (
+ arg1,
+ arg2,
+ ) = kwargs
+ arg2 = int(arg2)
+ # arg3 = arg2
+ init = PageExtractor(arg1, arg2)
+ init.getPages()
+ else:
+ pass
+
+
+class PDFCombine:
+ def __init__(self, obj1, obj2=None, outf=None, order="AA"):
+ self.obj1 = obj1
+ self.obj2 = obj2
+ self.outf = outf
+ self.order = order
+
+ if self.outf is None:
+ try:
+ self.outf = os.path.join(
+ os.path.join(
+ os.path.split(self.obj1[0])[0],
+ f"{os.path.split(self.obj1[0])[1].split('.')[0]}_{os.path.split(self.obj1[1])[1].split('.')[0]}_filewarp.pdf",
+ )
+ )
+ except Exception:
+ self.outf = "Filemac_pdfjoin.pdf"
+
+ def controller(self):
+ if self.order in {"AB", "BA", "ABA", "BAB"}:
+ self.combine_pdfs_ABA_interleave()
+ elif self.order in {"AA", "BB", "AAB", "BBA"}:
+ if type(self.obj1) is list:
+ self.merge_All_AAB()
+ else:
+ self.combine_pdfs_AAB_order()
+
+ def combine_pdfs_ABA_interleave(self):
+ try:
+ pdf_writer = PyPDF2.PdfWriter()
+ # Create PdfReader objects for each input PDF file
+ pdf_readers = [PyPDF2.PdfReader(file) for file in self.obj1]
+
+ max_pages = max(len(reader.pages) for reader in pdf_readers)
+ # pdf_readers = [PyPDF2.PdfReader(pdf) for pdf in pdf_files]
+
+ for page_num in range(max_pages):
+ for reader in pdf_readers:
+ if page_num < len(reader.pages):
+ # print(f"{fg.CYAN}Page {fg.BBLUE}{page_num + 1}/{len(reader.pages)}{RESET}", end="\r")
+ # Order pages in terms of page1-pd1, page2-pd2
+ page = reader.pages[page_num]
+ pdf_writer.add_page(page)
+
+ with open(self.outf, "wb") as self.outf:
+ pdf_writer.write(self.outf)
+ # print(f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf.name}{RESET}")
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ sys.exit(1)
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+ def combine_pdfs_AAB_order(self):
+ try:
+ pdf_writer = PyPDF2.PdfWriter()
+ reader1 = PyPDF2.PdfReader(self.obj1)
+ reader2 = PyPDF2.PdfReader(self.obj2)
+ # pdf_readers = [PyPDF2.PdfReader(pdf) for pdf in pdf_files]
+
+ # print(f"{fg.CYAN}File A{RESET}")
+ for p1_num in range(len(reader1.pages)):
+ # print(f"Page {p1_num + 1}/{len(reader1.pages)}", end="\r")
+ p1 = reader1.pages[p1_num]
+ # Order pages in terms of page1-pd1, page2-pd2
+ pdf_writer.add_page(p1)
+
+ # print(f"\n{fg.CYAN}File B{RESET}")
+ for p2_num in range(len(reader2.pages)):
+ # print(f"Page {p2_num + 1}/{len(reader2.pages)}", end="\r")
+ p2 = reader2.pages[p2_num]
+ pdf_writer.add_page(p2)
+
+ with open(self.outf, "wb") as self.outf:
+ pdf_writer.write(self.outf)
+ # print(f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf.name}{RESET}")
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ sys.exit(1)
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+ def merge_All_AAB(self):
+ try:
+ pdf_writer = PyPDF2.PdfWriter()
+
+ # List to store the reader objects
+ pdf_readers = [PyPDF2.PdfReader(file) for file in self.obj1]
+
+ # max_pages = max(len(reader.pages) for reader in pdf_readers)
+
+ for reader in pdf_readers:
+ for page_num in range(len(reader.pages)):
+ # print(f"{fg.BWHITE}Page {fg.CYAN}{page_num + 1}/{len(reader.pages)}{RESET}",end="\r")
+ page = reader.pages[page_num]
+ pdf_writer.add_page(page)
+
+ # Write the merged PDF to the output file
+ with open(self.outf, "wb") as out_file:
+ pdf_writer.write(out_file)
+ # print(f"\n{fg.FCYAN}PDFs combined with specified page order into{RESET}{fg.BBLUE} {self.outf}{RESET}")
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ sys.exit(1)
+ except Exception as e:
+ print(f"{fg.RED}{e}{RESET}")
+
+
+class PDFScanner:
+ """Implementation of scanning to extract data from pdf files and images
+ input_file -> file to be scanned pdf,image
+ Args:
+ input_file->file to be scanned
+ no_strip-> Preserves text formating once set to True, default: False
+ Returns:
+ None"""
+
+ def __init__(self, input_file, sep: str = DEFAULT_SEPARATOR):
+ self.input_file = input_file
+ self.sep = sep
+
+ def preprocess(self):
+ files_to_process = []
+
+ if os.path.isfile(self.input_file):
+ files_to_process.append(self.input_file)
+ elif os.path.isdir(self.input_file):
+ for file in os.listdir(self.input_file):
+ file_path = os.path.join(self.input_file, file)
+ if os.path.isfile(file_path):
+ files_to_process.append(file_path)
+
+ return files_to_process
+
+ def scanPDF(self, obj=None):
+ """Obj - object for scanning where the object is not a list"""
+ pdf_list = self.preprocess()
+ pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
+ if obj:
+ pdf_list = [obj]
+
+ for pdf in pdf_list:
+ out_f = pdf[:-3] + "txt"
+ # print(f"{fg.YELLOW}Read pdf ..{RESET}")
+
+ with open(pdf, "rb") as f:
+ reader = PyPDF2.PdfReader(f)
+ text = ""
+
+ pg = 0
+ for page_num in range(len(reader.pages)):
+ pg += 1
+
+ # print(f"{fg.BYELLOW}Progress:{RESET}", end="")
+ # print(f"{fg.CYAN}{pg}/{len(reader.pages)}{RESET}", end="\r")
+ page = reader.pages[page_num]
+ text += page.extract_text()
+
+ # print(f"\n{text}")
+ # print(f"\n{fg.YELLOW}Write text to {fg.GREEN}{out_f}{RESET}")
+ with open(out_f, "w") as f:
+ f.write(text)
+
+ # print(f"\n{fg.BGREEN}Ok{RESET}")
+
+ def scanAsImgs(self):
+ file = self.input_file
+ mc = DocumentConverter(file)
+ img_objs = mc.doc2image()
+
+ text = ""
+
+ for i in tqdm(img_objs, desc="Extracting", leave=False):
+ extract = ExtractText(i, self.sep)
+ _text = extract.run()
+ if _text is not None:
+ text += "".join(_text)
+ with open(f"{self.input_file[:-4]}_filewarp.txt", "a") as _writer:
+ _writer.write(text)
+
+ def _cleaner_():
+ # print(f"{fg.FMAGENTA}Clean")
+ for obj in img_objs:
+ if os.path.exists(obj):
+ # print(obj, end="\r")
+ os.remove(obj)
+ txt_file = f"{obj[:-4]}.txt"
+ if os.path.exists(txt_file):
+ # print(f"{bg.CYAN}{txt_file}{RESET}", end="\r")
+ os.remove(txt_file)
+
+ _cleaner_()
+ # from ...utils.screen import clear_screen
+
+ # clear_screen()
+ # print(f"{bg.GREEN}Full Text{RESET}")
+ # print(text)
+ # print(f"{fg.BWHITE}Text File ={fg.IGREEN}{self.input_file[:-4]}_filewarp.txt{RESET}")
+ # print(f"{fg.GREEN}Ok✅{RESET}")
+ return text
+
+ def scanAsLongImg(self) -> bool:
+ """Convert the pdf to long image for scanning - text extraction"""
+
+ try:
+ pdf_list = self.preprocess()
+ pdf_list = [item for item in pdf_list if item.lower().endswith("pdf")]
+ from ..pdf.core import PDF2LongImageConverter
+
+ for file in pdf_list:
+ converter = PDF2LongImageConverter(file)
+ file = converter.preprocess()
+
+ tx = ExtractText(file, self.sep)
+ text = "".join(tx.run())
+ if text is not None:
+ print(text)
+ # print(f"{fg.GREEN}Ok{RESET}")
+ return True
+ except Exception as e:
+ print(e)
diff --git a/filewarp/core/recorder.py b/filewarp/core/recorder.py
new file mode 100644
index 0000000..a8843db
--- /dev/null
+++ b/filewarp/core/recorder.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+import numpy as np
+import sounddevice as sd
+import wavio
+import time
+from pynput import keyboard
+import sys
+
+
+class SoundRecorder:
+ def __init__(self, frequency=44100, channels=2, dtype=np.int16):
+ self.fs = frequency # Sample rate (samples per second)
+ self.channels = 2 # Number of audio channels
+ self.dtype = dtype # Data type for the recording
+
+ self.paused = False # Global flag for pause
+ self.recording = [] # Buffer for recorded chunks
+ self.start_time = 0 # Start time for elapsed time tracking
+ self.elapsed_time = 0 # Track elapsed time
+ self.running = True # Track recording status
+ self.filename = self.filename_prober()
+
+ def format_time(self, seconds):
+ hours = int(seconds // 3600)
+ minutes = int((seconds % 3600) // 60)
+ sec = int(seconds % 60)
+ return f"\033[34m{hours:02d}\033[35m:{minutes:02d}\033[32m:{sec:02d} \033[0m"
+
+ def on_press(self, key):
+ # global paused, running
+ try:
+ if key == keyboard.Key.space:
+ self.paused = not self.paused # Toggle pause/resume
+ if self.paused:
+ print("\nPaused... Press SPACE to resume.")
+ else:
+ print("\nRecording resumed... Press SPACE to pause.")
+ elif key == keyboard.Key.enter:
+ self.running = False # Stop recording
+ print("\nRecording finished.")
+ return False # Stop listener
+ except Exception as e:
+ print(f"Error: {e}")
+
+ def record_audio(self):
+ # global paused, recording, start_time, elapsed_time, running
+ print("Press SPACE to pause/resume, ENTER to stop and save.")
+ start_time = time.time()
+
+ def callback(indata, frames, callback_time, status):
+ if not self.paused:
+ self.recording.append(indata.copy())
+ self.elapsed_time = time.time() - start_time
+ print(f"Elapsed Time: {self.format_time(self.elapsed_time)}", end="\r")
+
+ with sd.InputStream(
+ samplerate=self.fs,
+ channels=self.channels,
+ dtype=self.dtype,
+ callback=callback,
+ ):
+ with keyboard.Listener(on_press=self.on_press) as listener:
+ while self.running:
+ time.sleep(0.1) # Prevents high CPU usage
+ listener.stop()
+
+ return (
+ np.concatenate(self.recording, axis=0)
+ if self.recording
+ else np.array([], dtype=self.dtype)
+ )
+
+ def run(self):
+ try:
+ r_data = self.record_audio()
+ self.save_audio(r_data)
+ return self.filename
+ except KeyboardInterrupt:
+ sys.exit()
+
+ def save_audio(self, recording):
+ if recording.size == 0:
+ print("No audio recorded.")
+ else:
+ wavio.write(self.filename, recording, self.fs, sampwidth=2)
+ print(f"Recording saved as {self.filename}")
+
+ @staticmethod
+ def filename_prober():
+ _filename = None
+
+ while not _filename:
+ _filename = input("\033[94mEnter Desired File Name\033[0;1;89m:")
+
+ filename = f"{_filename}.wav" if len(_filename.split(".")) < 2 else _filename
+ return filename
+
+
+if __name__ == "__main__":
+ try:
+ filename = input("\033[94mEnter Desired File Name\033[0;1;89m:") + ".wav"
+ recorder = SoundRecorder()
+ file = recorder.run()
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ exit(1)
diff --git a/filewarp/core/svg/core.py b/filewarp/core/svg/core.py
new file mode 100644
index 0000000..fcbede0
--- /dev/null
+++ b/filewarp/core/svg/core.py
@@ -0,0 +1,47 @@
+import cairosvg
+
+
+class SVGConverter:
+ """
+ A utility class for converting SVG files to various formats using CairoSVG.
+ Supported formats: PNG, PDF, SVG (optimized).
+ """
+
+ @staticmethod
+ def to_png(input_svg: str, output_path: str, is_string: bool = False):
+ """
+ Convert SVG to PNG.
+ :param input_svg: Path to SVG file or raw SVG string.
+ :param output_path: Output PNG file path.
+ :param is_string: Set True if input_svg is raw SVG data.
+ """
+ if is_string:
+ cairosvg.svg2png(bytestring=input_svg.encode(), write_to=output_path)
+ else:
+ cairosvg.svg2png(url=input_svg, write_to=output_path)
+
+ @staticmethod
+ def to_pdf(input_svg: str, output_path: str, is_string: bool = False):
+ """
+ Convert SVG to PDF.
+ :param input_svg: Path to SVG file or raw SVG string.
+ :param output_path: Output PDF file path.
+ :param is_string: Set True if input_svg is raw SVG data.
+ """
+ if is_string:
+ cairosvg.svg2pdf(bytestring=input_svg.encode(), write_to=output_path)
+ else:
+ cairosvg.svg2pdf(url=input_svg, write_to=output_path)
+
+ @staticmethod
+ def to_svg(input_svg: str, output_path: str, is_string: bool = False):
+ """
+ Convert/Optimize SVG to SVG.
+ :param input_svg: Path to SVG file or raw SVG string.
+ :param output_path: Output SVG file path.
+ :param is_string: Set True if input_svg is raw SVG data.
+ """
+ if is_string:
+ cairosvg.svg2svg(bytestring=input_svg.encode(), write_to=output_path)
+ else:
+ cairosvg.svg2svg(url=input_svg, write_to=output_path)
diff --git a/filewarp/core/text/core.py b/filewarp/core/text/core.py
new file mode 100644
index 0000000..167b9b9
--- /dev/null
+++ b/filewarp/core/text/core.py
@@ -0,0 +1,111 @@
+"""Create a word document directly from a text file."""
+
+from docx import Document
+from docx.shared import Pt, RGBColor
+
+from ...utils.colors import fg, rs
+
+RESET = rs
+
+
+class StyledText:
+ """
+ Args:
+ obj-> input object (normally a formated text file)
+ fsize ->font-size default = 12: int
+ fstyle -> font-name default = Times New Roman: str
+ out_obj -> output object(file) name: str
+ Returns:
+ None
+
+ Given obj -> Text file where:
+ '#' is used to specify formarting
+ Only three heading leavels are supported.
+ '#' Heading1,
+ '##' -> Heading2,
+ '###' -> Heading3
+ """
+
+ def __init__(
+ self, obj, out_obj=None, fsize: int = 12, fstyle: str = "Times New Roman"
+ ):
+ self.obj = obj
+ self.out_obj = out_obj
+ self.fsize = fsize
+ self.fstyle = fstyle
+ if self.out_obj is None:
+ self.out_obj = f"{self.obj.split('.')[0]}_filewarp.docx"
+
+ def text_to_word(self):
+ """
+ Create new document,
+ heading_styles -> define formating
+ Open the text file and read it line by line.
+ For every line check whether it starts with '#' format specify , ommit the specifier and formart the line.
+ Strip empty spaces from every line.
+ Set body font to fstyle and font size to fsize.
+ """
+
+ print(f"{fg.BWHITE}Set Font: {fg.CYAN}{self.fsize}{RESET}")
+ print(f"{fg.BWHITE}Set Style: {fg.CYAN}{self.fstyle}{RESET}")
+ # Create a new Document
+ doc = Document()
+
+ # Define formatting for headings and body text
+ head_font_name = self.fstyle
+ heading_styles = {
+ # Heading 1
+ 1: {"font_size": Pt(18), "font_color": RGBColor(126, 153, 184)},
+ # Heading 2
+ 2: {"font_size": Pt(16), "font_color": RGBColor(0, 120, 212)},
+ # Heading 3
+ 3: {"font_size": Pt(14), "font_color": RGBColor(0, 120, 212)},
+ # Heading 4
+ 4: {"font_size": Pt(13), "font_color": RGBColor(0, 120, 212)},
+ }
+
+ body_font_name = "Times New Roman"
+ body_font_size = Pt(self.fsize)
+ body_font_color = RGBColor(0, 0, 0) # Black color
+
+ # Open the text file and read content
+ with open(self.obj, "r") as file:
+ lines = file.readlines()
+
+ for i, line in enumerate(lines):
+ print(
+ f"{fg.BWHITE}Line: {fg.DCYAN}{i}{fg.YELLOW} of {fg.BLUE}{len(lines)}{RESET}",
+ end="\r",
+ )
+ # Determine heading level or body text
+ if line.startswith("#"):
+ level = line.count("#")
+ level = min(level, 3) # Support up to 3 levels of headings
+ style = heading_styles.get(level, heading_styles[1])
+ p = doc.add_paragraph()
+ # Remove '#' and extra space
+ run = p.add_run(line[level + 1 :].strip())
+ run.font.size = style["font_size"]
+ run.font.name = head_font_name
+ run.font.color.rgb = style["font_color"]
+ p.style = f"Heading{level}"
+ else:
+ p = doc.add_paragraph()
+ run = p.add_run(line.strip())
+ run.font.name = body_font_name
+ run.font.size = body_font_size
+ run.font.color.rgb = body_font_color
+
+ # Save the document
+ print("\n")
+ doc.save(self.out_obj)
+ print(
+ f"{fg.BWHITE}Text file converted to Word document: {fg.MAGENTA}{self.out_obj}{RESET}"
+ )
+
+
+if __name__ == "__main__":
+ init = StyledText("/home/skye/Documents/FMAC/file2.txt")
+
+ # Call the function
+ init.text_to_word()
diff --git a/filewarp/core/tts/core.py b/filewarp/core/tts/core.py
new file mode 100644
index 0000000..e69de29
diff --git a/filewarp/core/tts/gtts.py b/filewarp/core/tts/gtts.py
new file mode 100644
index 0000000..1f1918b
--- /dev/null
+++ b/filewarp/core/tts/gtts.py
@@ -0,0 +1,562 @@
+import json
+import math
+import os
+import PyPDF2
+import shutil
+import sys
+from docx import Document
+from threading import Lock, Thread
+from typing import List, Union
+import requests
+from gtts import gTTS
+from pydub import AudioSegment
+from rich.errors import MarkupError
+from ..document import DocumentConverter
+from ...utils.colors import fg, rs
+from ...utils.simple import logger
+
+RESET = rs
+
+_ext_word = ["doc", "docx"]
+
+
+class GoogleTTS:
+ """Definition of audiofying class"""
+
+ def __init__(
+ self,
+ obj: Union[os.PathLike, str, List[Union[os.PathLike, str]]],
+ resume: bool = True,
+ ):
+ self.obj = obj
+ self.resume = resume
+
+ @staticmethod
+ def join_audios(files, output_file):
+ masterfile = output_file + "_master.mp3"
+ print(
+ f"{fg.BBLUE}Create a master file {fg.BMAGENTA}{masterfile}{RESET}",
+ end="\r",
+ )
+ # Create a list to store files
+ ogg_files = []
+ # loop through the directory while adding the ogg files to the list
+ for filename in files:
+ print(f"Join {fg.BBLUE}{len(files)}{RESET} files")
+ # if filename.endswith('.ogg'):
+ # ogg_file = os.path.join(path, filename)
+ ogg_files.append(AudioSegment.from_file(filename))
+
+ # Concatenate the ogg files
+ combined_ogg = ogg_files[0]
+ for i in range(1, len(files)):
+ combined_ogg += ogg_files[i]
+
+ # Export the combined ogg to new mp3 file or ogg file
+ combined_ogg.export(output_file + "_master.ogg", format="ogg")
+ print(
+ f"{fg.BGREEN}Master file:Ok {RESET}"
+ )
+
+ def Synthesise(
+ self,
+ text: str,
+ output_file: str,
+ CHUNK_SIZE: int = 1_000,
+ _tmp_folder_: str = "tmp_dir",
+ thread_name: str = None,
+ max_retries: int = 30,
+ ) -> None:
+ """Converts given text to speech using Google Text-to-Speech API."""
+ # from rich.progress import (BarColumn, Progress, SpinnerColumn,TextColumn)
+
+ config = ConfigManager()
+ # Define directories and other useful variables for genrating output_file and checkpoint_file
+ out_dir = os.path.split(output_file)[0]
+
+ thread_name = f"thread_{os.path.split(output_file.split('.')[0])[-1]}"
+ _file_ = os.path.split(output_file)[1]
+
+ _tmp_folder_ = os.path.join(out_dir, _tmp_folder_)
+
+ # Remove temporary dir if it exists, rare-cases since file names are mostly unique
+ if os.path.exists(_tmp_folder_) and self.resume is False:
+ # query = input(f"{fg.BBLUE}Remove the {os.path.join(out_dir, _tmp_folder_)} directory (y/n)?{RESET} ").lower() in ('y', 'yes')
+ shutil.rmtree(_tmp_folder_)
+
+ # Create temporary folder to house chunks
+ if not os.path.exists(_tmp_folder_):
+ logger.info(
+ f"{fg.BYELLOW}Create temporary directory = {fg.BBLUE}{_tmp_folder_}{RESET}"
+ )
+ os.mkdir(_tmp_folder_)
+
+ _full_output_path_ = os.path.join(_tmp_folder_, _file_)
+
+ # Read reume chunk from the configuration file
+ start_chunk = int(config.read_config_file(thread_name)) * 1_000
+ start_chunk = 0 if start_chunk is None else start_chunk
+
+ """ If chunk is not 0 multiply the chunk by the highest decimal value of the chunk size
+ else set it to 0 meaning file is being operated on for the first time
+ """
+ resume_chunk_pos = start_chunk * 1_000 if start_chunk != 0 else start_chunk
+
+ try:
+ print(f"{fg.BYELLOW}Start thread:: {thread_name}{RESET}")
+
+ total_chunks = math.ceil(len(text) / CHUNK_SIZE)
+
+ counter = (
+ math.ceil(resume_chunk_pos / CHUNK_SIZE) if resume_chunk_pos != 0 else 0
+ )
+
+ attempt = 0
+
+ while attempt <= max_retries:
+ try:
+ # Initialize progress bar for the overall process
+
+ for i in range(resume_chunk_pos, len(text), CHUNK_SIZE):
+ print(
+ f"Processing: chunk {fg.BMAGENTA}{counter}/{total_chunks} {fg.DCYAN}{counter / total_chunks * 100:.2f}%{RESET}\n",
+ end="\r",
+ )
+ chunk = text[i : i + CHUNK_SIZE]
+ # print(chunk)
+ if os.path.exists(f"{_full_output_path_}_{counter}.ogg"):
+ if counter == start_chunk:
+ print(
+ f"{fg.CYAN}Chunk vs file confict: {fg.BLUE}Resolving{RESET}"
+ )
+ os.remove(f"{_full_output_path_}_{counter}.ogg")
+ output_filename = f"{_full_output_path_}_{counter}.ogg"
+
+ # Remove empty file
+ elif (
+ os.path.getsize(f"{_full_output_path_}_{counter}.ogg")
+ != 0
+ ):
+ os.remove(f"{_full_output_path_}_{counter}.ogg")
+ output_filename = f"{_full_output_path_}_{counter}.ogg"
+
+ else:
+ output_filename = (
+ f"{_full_output_path_}_{counter + 1}.ogg"
+ )
+
+ else:
+ output_filename = f"{_full_output_path_}_{counter}.ogg"
+
+ tts = gTTS(text=chunk, lang="en", slow=False)
+
+ tts.save(output_filename)
+
+ # Update current_chunk in the configuration
+ config.update_config_entry(thread_name, current_chunk=counter)
+
+ counter += 1
+
+ except FileNotFoundError as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+
+ except (
+ requests.exceptions.ConnectionError
+ ): # Handle connectivity/network error
+ logger.error(f"{fg.RED}ConnectionError{RESET}")
+
+ # Exponential backoff for retries
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ # Increament the attempts
+ f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+
+ attempt += 1
+
+ # Read chunk from configuration
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except (
+ requests.exceptions.HTTPError
+ ) as e: # Exponential backoff for retries
+ logger.error(f"HTTP error: {e.status_code} - {e.reason}")
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except requests.exceptions.RequestException as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ f"{fg.BWHITE}Resume in {fg.BBLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except (
+ ConnectionError,
+ ConnectionAbortedError,
+ ConnectionRefusedError,
+ ConnectionResetError,
+ ):
+ logger.error(f"{fg.RED}Connection at attempt{RESET}")
+
+ for _sec_ in range(2**attempt, 0, -1):
+ print(
+ f"{fg.BWHITE}Resume in {fg.BLUE}{_sec_}{RESET}",
+ end="\r",
+ )
+
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ except MarkupError as e:
+ logger.error(f"{fg.RED}{e}{RESET}")
+ except Exception as e: # Handle all other types of exceptions
+ logger.error(
+ f"{fg.BMAGENTA}{attempt + 1}/{max_retries}:{fg.RED}{e}{RESET}"
+ )
+
+ for _sec_ in range(2**attempt, 0, -1):
+ pass
+
+ attempt += 1
+
+ resume_chunk_pos = int(config.read_config_file(thread_name)) * 1_000
+
+ else:
+ print(
+ f"{fg.BMAGENTA}Conversion success✅. \n {fg.CYAN}INFO\t Create masterfile{RESET}"
+ )
+
+ if (
+ len(os.listdir(_tmp_folder_)) > 2
+ ): # Combine generated gTTS objects
+ from .JoinAudios import JoinAudios
+
+ joiner = JoinAudios(_tmp_folder_, masterfile=output_file)
+ joiner.worker()
+ # Remove temporary files
+ shutil.rmtree(_tmp_folder_)
+
+ break # Exit the retry loop if successfull
+
+ else:
+ print(
+ f"{fg.RED}Maximum retries reached. Unable to complete the operation after {fg.BMAGENTA} {max_retries} attempts.{RESET}"
+ )
+ sys.exit(2)
+
+ finally:
+ pass
+
+ @staticmethod
+ def pdf_to_text(pdf_path):
+ logger.info(f"{fg.GREEN} Initializing pdf to text conversion{RESET}")
+ try:
+ with open(pdf_path, "rb") as file:
+ pdf_reader = PyPDF2.PdfReader(file)
+ text = ""
+ _pg_ = 0
+ print(f"{fg.YELLOW}Convert pages..{RESET}")
+ for page_num in range(len(pdf_reader.pages)):
+ _pg_ += 1
+ logger.info(
+ f"Page {fg.BBLUE}{_pg_}{RESET}/{len(pdf_reader.pages)}"
+ )
+ page = pdf_reader.pages[page_num]
+ text += page.extract_text()
+ print(f"{fg.BGREEN}Ok{RESET}\n")
+ return text
+ except Exception as e:
+ logger.error(
+ f"{fg.RED}Failed to extract text from '{fg.YELLOW}{pdf_path}'{RESET}:\n {e}"
+ )
+
+ @staticmethod
+ def text_file(input_file):
+ try:
+ with open(input_file, "r", errors="ignore") as file:
+ text = file.read().replace("\n", " ")
+ return text
+ except FileNotFoundError:
+ logger.error("File '{}' was not found.📁".format(input_file))
+ except Exception as e:
+ logger.error(f"{fg.RED}{str(e)}{RESET}")
+
+ @staticmethod
+ def docx_to_text(docx_path):
+ try:
+ logger.info(f"{fg.BLUE} Converting {docx_path} to text{RESET}")
+ doc = Document(docx_path)
+ paragraphs = [paragraph.text for paragraph in doc.paragraphs]
+ return "\n".join(paragraphs)
+ except FileNotFoundError:
+ logger.error(f"File '{docx_path}' was not found.📁")
+ except Exception as e:
+ logger.error(
+ f"{fg.RED}Error converting {docx_path} to text: {e} {RESET}"
+ )
+
+ class ThreadClient:
+ def __init__(self, instance):
+ self.instance = instance
+ self.lock = Lock()
+ self.config = ConfigManager()
+
+ def audiofy(self, num_threads=3):
+ ls = ("pdf", "docx", "doc", "txt", "ppt", "pptx")
+
+ def create_thread(item, thread_name):
+ # Create a unique temp dir for each file
+ temp_dir = f"tmp_dir_{os.path.split(item.split('.')[0])[-1]}"
+
+ # Ensure proper locking when adding config entry
+ with self.lock:
+ # Record config entry for each item
+ self.config.add_config_entry(
+ thread_name, f"{item.split('.')[0]}", temp_dir, 0
+ )
+
+ # Create and return the thread
+ return Thread(
+ target=self.worker,
+ args=(item, temp_dir, thread_name),
+ name=thread_name,
+ )
+
+ threads = []
+ processed_items = 0
+
+ # Process a list of files
+ def process_batch():
+ for thread in threads:
+ thread.start()
+ for thread in threads:
+ thread.join()
+ threads.clear() # Clear thread list after batch is done
+
+ # Handle files provided as a list
+ if isinstance(self.instance.obj, list):
+ for item in self.instance.obj:
+ item = os.path.abspath(item)
+ if os.path.isfile(item) and item.endswith(ls):
+ thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}"
+ thread = create_thread(item, thread_name)
+ threads.append(thread)
+ processed_items += 1
+
+ # Process threads in batches of 'num_threads'
+ if processed_items % num_threads == 0:
+ process_batch()
+
+ # Process remaining threads in case the list isn't a perfect multiple of num_threads
+ if threads:
+ process_batch()
+
+ # Handle a single file
+ elif os.path.isfile(self.instance.obj):
+ item = os.path.abspath(self.instance.obj)
+ if item.endswith(ls):
+ thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}"
+ thread = create_thread(item, thread_name)
+ threads.append(thread)
+ process_batch() # Process immediately for single file
+
+ # Handle a directory of files
+ elif os.path.isdir(self.instance.obj):
+ for item in os.listdir(self.instance.obj):
+ item = os.path.abspath(item)
+ if os.path.isfile(item) and item.endswith(ls):
+ thread_name = f"thread_{os.path.split(item.split('.')[0])[-1]}"
+ thread = create_thread(item, thread_name)
+ threads.append(thread)
+ processed_items += 1
+
+ # Process threads in batches
+ if processed_items % num_threads == 0:
+ process_batch()
+
+ # Process remaining threads
+ if threads:
+ process_batch()
+
+ def worker(self, input_file, _temp_dir_, thread_name):
+ output_file = os.path.split(input_file)[-1].split(".")[0] + ".ogg"
+ print(f"Thread {thread_name} processing file: {input_file}")
+
+ try:
+ # Extract text based on file type
+ if input_file.endswith(".pdf"):
+ text = GoogleTTS.pdf_to_text(input_file)
+ elif input_file.lower().endswith(tuple(_ext_word)):
+ text = GoogleTTS.docx_to_text(input_file)
+ elif input_file.endswith(".txt"):
+ text = GoogleTTS.text_file(input_file)
+ elif input_file.split(".")[-1] in ("ppt", "pptx"):
+ conv = DocumentConverter(input_file)
+ word = conv.ppt_to_word()
+ conv = DocumentConverter(word)
+ text = GoogleTTS.text_file(conv.word_to_txt())
+ else:
+ raise ValueError(
+ "Unsupported file format. Please provide a PDF, txt, or Word document."
+ )
+
+ # Synthesize audio using the extracted text
+ self.instance.Synthesise(
+ text, output_file, _tmp_folder_=_temp_dir_, thread_name=thread_name
+ )
+ print(f"Thread {thread_name} completed processing {input_file}")
+
+ except Exception as e:
+ print(f"Error in thread {thread_name}: {e}")
+ except KeyboardInterrupt:
+ print(f"Thread {thread_name} interrupted.")
+ sys.exit(1)
+
+
+class ConfigManager:
+ def __init__(self, config_path="filewarp_config.json"):
+ self.config_path = config_path
+
+ def create_config_file(self, config_data):
+ """
+ Create or overwrite a configuration file to record thread names, associated file names, and current chunks.
+
+ Args:
+ config_data(list): A list of dictionaries containing thread name, associated file name, temp dir, and current chunk.
+ """
+ try:
+ # Ensure the output directory exists
+ output_dir = os.path.dirname(self.config_path)
+ if output_dir and not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ # Write the configuration data to a JSON file
+ with open(self.config_path, "w") as config_file:
+ json.dump(config_data, config_file, indent=4)
+
+ print(f"Configuration file '{self.config_path}' created successfully.")
+ except Exception as e:
+ print(f"Error creating configuration file: {e}")
+
+ def read_config_file(self, thread=None):
+ """
+ Read the configuration file and return the data or a specific thread's current chunk.
+
+ Args:
+ thread (str): The thread name to search for in the config. If None, returns the full config.
+
+ Returns:
+ dict/list: Returns a specific entry for the thread or the full configuration data.
+ None: If the file doesn't exist or thread is not found.
+ """
+ try:
+ if not os.path.exists(self.config_path):
+ print(f"Configuration file '{self.config_path}' not found.")
+ return None
+
+ with open(self.config_path, "r") as config_file:
+ config = json.load(config_file)
+
+ if thread is None:
+ return config # Return entire configuration
+
+ # Search for specific thread's current chunk
+ for entry in config:
+ if entry["thread_name"] == thread:
+ return entry.get("current_chunk", None)
+
+ print(f"Entry for thread '{thread}' not found.")
+ return None
+
+ except Exception as e:
+ print(f"Error reading configuration file: {e}")
+ return None
+
+ def add_config_entry(self, thread_name, associated_file, tmp_dir, current_chunk):
+ """
+ Add a new entry to the configuration file.
+
+ Args:
+ thread_name (str): The name of the thread to be added.
+ associated_file (str): The associated file name for the thread.
+ tmp_dir (str): Temporary directory for the thread.
+ current_chunk (int): The current chunk number for the thread.
+ """
+ try:
+ # Read existing config data or create a new list if the file doesn't exist
+ config_data = self.read_config_file() or []
+
+ # Check if the thread already exists in the configuration
+ for entry in config_data:
+ if entry["thread_name"] == thread_name:
+ print(
+ f"Thread '{thread_name}' already exists. Use 'update_config_entry' to update it."
+ )
+ return
+
+ # Add the new entry
+ config_data.append(
+ {
+ "thread_name": thread_name,
+ "associated_file": associated_file,
+ "tmp_dir": tmp_dir,
+ "current_chunk": current_chunk,
+ }
+ )
+
+ # Save the updated configuration
+ self.create_config_file(config_data)
+
+ except Exception as e:
+ print(f"Error adding config entry: {e}")
+
+ def update_config_entry(
+ self, thread_name, associated_file=None, tmp_dir=None, current_chunk=None
+ ):
+ """
+ Update an existing entry in the configuration file.
+
+ Args:
+ thread_name (str): The name of the thread to update.
+ associated_file (str, optional): The updated associated file name. Defaults to None.
+ tmp_dir (str, optional): The updated temporary directory. Defaults to None.
+ current_chunk (int, optional): The updated current chunk number. Defaults to None.
+ """
+ try:
+ # Read existing config data
+ config_data = self.read_config_file() or []
+
+ # Find the entry to update
+ for entry in config_data:
+ if entry["thread_name"] == thread_name:
+ if associated_file:
+ entry["associated_file"] = associated_file
+ if tmp_dir:
+ entry["tmp_dir"] = tmp_dir
+ if current_chunk is not None:
+ entry["current_chunk"] = current_chunk
+
+ # Save the updated configuration
+ self.create_config_file(config_data)
+ print(f"Thread '{thread_name}' updated successfully.")
+ return True
+
+ print(f"Thread '{thread_name}' not found in the configuration.")
+
+ except Exception as e:
+ print(f"Error updating config entry: {e}")
diff --git a/filewarp/core/validator.py b/filewarp/core/validator.py
new file mode 100644
index 0000000..51b70be
--- /dev/null
+++ b/filewarp/core/validator.py
@@ -0,0 +1,19 @@
+from typing import Tuple
+from pathlib import Path
+
+
+class SystemValidator:
+ """Validates system requirements and dependencies."""
+
+ @staticmethod
+ def validate_file_permissions(temp_dir: Path) -> Tuple[bool, str]:
+ """Validate write permissions in temporary directory."""
+ try:
+ if temp_dir.is_file():
+ temp_dir = temp_dir.parent
+ test_file = temp_dir / "permission_test.txt"
+ test_file.write_text("test")
+ test_file.unlink()
+ return True, "Write permissions verified"
+ except (OSError, IOError) as e:
+ return False, f"Insufficient permissions: {str(e)}"
diff --git a/filewarp/core/video/Editor.py b/filewarp/core/video/Editor.py
new file mode 100644
index 0000000..a603caf
--- /dev/null
+++ b/filewarp/core/video/Editor.py
@@ -0,0 +1,527 @@
+import subprocess
+import tempfile
+import json
+import re
+import shutil
+from pathlib import Path
+from typing import Union, List, Tuple, Optional
+
+# from concurrent.futures import ThreadPoolExecutor
+from threading import Lock
+from tqdm import tqdm
+from .models import VideoCodec, AudioCodec, VideoQuality, VideoInfo, TrimRange
+from ...utils.logging_utils import logger
+
+
+class VideoEditor:
+ """
+ Optimized video editor with progress tracking and frame-accurate seeking.
+ """
+
+ def __init__(self, ffmpeg_path: str = "ffmpeg", ffprobe_path: str = "ffprobe"):
+ self.ffmpeg_path = ffmpeg_path
+ self.ffprobe_path = ffprobe_path
+ self._temp_dir = Path(tempfile.mkdtemp(prefix="video_editor_"))
+ self._check_ffmpeg()
+
+ # Thread lock for progress bar safety if using ThreadPoolExecutor
+ self._progress_lock = Lock()
+
+ def _check_ffmpeg(self):
+ """Verify FFmpeg and FFprobe are accessible"""
+ try:
+ subprocess.run(
+ [self.ffmpeg_path, "-version"], capture_output=True, check=True
+ )
+ subprocess.run(
+ [self.ffprobe_path, "-version"], capture_output=True, check=True
+ )
+ except (subprocess.CalledProcessError, FileNotFoundError) as e:
+ raise RuntimeError(
+ "FFmpeg/FFprobe not found. Please install FFmpeg and ensure it's in PATH."
+ ) from e
+
+ def get_video_info(self, video_path: Union[str, Path]) -> VideoInfo:
+ """Extract comprehensive video information using ffprobe."""
+ video_path = Path(video_path)
+ if not video_path.exists():
+ raise FileNotFoundError(f"Video file not found: {video_path}")
+
+ cmd = [
+ self.ffprobe_path,
+ "-v",
+ "quiet",
+ "-print_format",
+ "json",
+ "-show_format",
+ "-show_streams",
+ str(video_path),
+ ]
+
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+ data = json.loads(result.stdout)
+
+ duration = float(data.get("format", {}).get("duration", 0))
+ file_size = int(data.get("format", {}).get("size", 0))
+
+ video_stream = None
+ audio_stream = None
+
+ for stream in data.get("streams", []):
+ if stream.get("codec_type") == "video" and not video_stream:
+ video_stream = stream
+ elif stream.get("codec_type") == "audio" and not audio_stream:
+ audio_stream = stream
+
+ width = height = 0
+ fps = 0
+ video_codec = ""
+ bitrate = 0
+
+ if video_stream:
+ width = int(video_stream.get("width", 0))
+ height = int(video_stream.get("height", 0))
+
+ avg_frame_rate = video_stream.get("avg_frame_rate", "0/0").split("/")
+ if len(avg_frame_rate) == 2 and float(avg_frame_rate[1]) != 0:
+ fps = float(avg_frame_rate[0]) / float(avg_frame_rate[1])
+
+ video_codec = video_stream.get("codec_name", "")
+ bitrate = int(video_stream.get("bit_rate", 0))
+
+ audio_codec = None
+ audio_channels = None
+ if audio_stream:
+ audio_codec = audio_stream.get("codec_name", "")
+ audio_channels = int(audio_stream.get("channels", 0))
+
+ return VideoInfo(
+ path=video_path,
+ duration=duration,
+ width=width,
+ height=height,
+ fps=fps,
+ codec=video_codec,
+ bitrate=bitrate,
+ audio_codec=audio_codec,
+ audio_channels=audio_channels,
+ file_size=file_size,
+ has_video=video_stream is not None,
+ has_audio=audio_stream is not None,
+ )
+
+ def trim_video(
+ self,
+ input_path: Union[str, Path],
+ output_path: Union[str, Path],
+ trim_ranges: Union[
+ TrimRange, List[TrimRange], Tuple[float, float], List[Tuple[float, float]]
+ ],
+ video_codec: VideoCodec = VideoCodec.H264,
+ audio_codec: AudioCodec = AudioCodec.AAC,
+ quality: VideoQuality = VideoQuality.MEDIUM,
+ crf: Optional[int] = None,
+ preserve_audio: bool = True,
+ copy_streams: bool = False,
+ show_progress: bool = True,
+ seek_buffer: float = 5.0, # Seconds before target for accurate seeking
+ ) -> Path:
+ """
+ Trim video with progress tracking and frame-accurate seeking.
+
+ Args:
+ seek_buffer: Seconds to seek before target for accurate keyframe alignment (default 5s)
+ """
+ input_path = Path(input_path)
+ output_path = Path(output_path)
+
+ ranges = self._normalize_trim_ranges(trim_ranges)
+ self._validate_trim_ranges(input_path, ranges)
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ if len(ranges) == 1:
+ return self._trim_single_range(
+ input_path,
+ output_path,
+ ranges[0],
+ video_codec,
+ audio_codec,
+ quality,
+ crf,
+ preserve_audio,
+ copy_streams,
+ show_progress,
+ seek_buffer,
+ )
+ else:
+ return self._trim_multiple_ranges(
+ input_path,
+ output_path,
+ ranges,
+ video_codec,
+ audio_codec,
+ quality,
+ crf,
+ preserve_audio,
+ copy_streams,
+ show_progress,
+ seek_buffer,
+ )
+
+ def _build_ffmpeg_command(
+ self,
+ input_path: Path,
+ output_path: Path,
+ trim_range: Optional[TrimRange] = None,
+ video_codec: VideoCodec = VideoCodec.H264,
+ audio_codec: AudioCodec = AudioCodec.AAC,
+ quality: VideoQuality = VideoQuality.MEDIUM,
+ crf: Optional[int] = None,
+ preserve_audio: bool = True,
+ copy_streams: bool = False,
+ seek_buffer: float = 5.0,
+ ) -> List[str]:
+ """
+ Build optimized FFmpeg command using double -ss technique:
+ 1. Fast seek to keyframe before target (input seeking)
+ 2. Accurate seek to exact frame (output seeking)
+ 3. Timestamp correction to prevent frozen frames
+ """
+ cmd = [self.ffmpeg_path, "-hide_banner", "-y"]
+
+ # Input seeking (fast, inaccurate to keyframe)
+ if trim_range and trim_range.start > 0:
+ # Seek to buffer seconds before target to ensure we hit a keyframe
+ seek_pos = max(0, trim_range.start - seek_buffer)
+ cmd.extend(["-ss", str(seek_pos)])
+
+ cmd.extend(["-i", str(input_path)])
+
+ # Output seeking (accurate, from keyframe to exact frame)
+ if trim_range:
+ if trim_range.start > 0:
+ # Skip the buffer we added earlier
+ cmd.extend(["-ss", str(seek_buffer)])
+
+ # Duration of actual content to extract
+ cmd.extend(["-t", str(trim_range.duration)])
+
+ # Video encoding options
+ if copy_streams:
+ cmd.extend(["-c:v", "copy"])
+ # CRITICAL: Fix timestamp gaps when copying streams
+ cmd.extend(
+ ["-avoid_negative_ts", "make_zero", "-fflags", "+genpts", "-async", "1"]
+ )
+ else:
+ quality_value = quality if isinstance(quality, str) else quality.value
+ cmd.extend(
+ [
+ "-c:v",
+ video_codec.value,
+ "-preset",
+ quality_value,
+ "-pix_fmt",
+ "yuv420p", # Ensure compatibility
+ ]
+ )
+
+ if crf:
+ cmd.extend(["-crf", str(crf)])
+ elif video_codec == VideoCodec.H264:
+ cmd.extend(["-crf", "23"])
+
+ # Audio encoding options
+ if preserve_audio and audio_codec != AudioCodec.NONE:
+ if copy_streams:
+ cmd.extend(["-c:a", "copy"])
+ else:
+ cmd.extend(["-c:a", audio_codec.value])
+ else:
+ cmd.extend(["-an"])
+
+ # Additional flags to prevent "long video with short content" bug
+ if copy_streams:
+ cmd.extend(["-vsync", "cfr"]) # Constant frame rate to fix timing
+
+ cmd.extend([str(output_path)])
+ return cmd
+
+ def _parse_ffmpeg_time(self, line: str) -> Optional[float]:
+ """Parse time from FFmpeg stderr output (format: time=00:01:23.45)"""
+ match = re.search(r"time=(\d+):(\d+):(\d+\.\d+)", line)
+ if match:
+ hours, minutes, seconds = map(float, match.groups())
+ return hours * 3600 + minutes * 60 + seconds
+ return None
+
+ def _trim_single_range(
+ self,
+ input_path: Path,
+ output_path: Path,
+ trim_range: TrimRange,
+ video_codec: VideoCodec,
+ audio_codec: AudioCodec,
+ quality: VideoQuality,
+ crf: Optional[int],
+ preserve_audio: bool,
+ copy_streams: bool,
+ show_progress: bool,
+ seek_buffer: float,
+ ) -> Path:
+ """Process single trim range with progress tracking."""
+
+ cmd = self._build_ffmpeg_command(
+ input_path,
+ output_path,
+ trim_range,
+ video_codec,
+ audio_codec,
+ quality,
+ crf,
+ preserve_audio,
+ copy_streams,
+ seek_buffer,
+ )
+
+ logger.debug(f"FFmpeg command: {' '.join(cmd)}")
+
+ try:
+ process = subprocess.Popen(
+ cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT, # FFmpeg outputs to stderr, capture both
+ universal_newlines=True,
+ bufsize=1,
+ )
+
+ if show_progress:
+ # Create progress bar
+ pbar = tqdm(
+ total=int(trim_range.duration),
+ desc=f"Trimming {input_path.name[:20]}",
+ unit="s",
+ bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}s [{elapsed}<{remaining}]",
+ )
+
+ last_update = 0
+
+ for line in process.stdout:
+ current_time = self._parse_ffmpeg_time(line)
+ if current_time is not None:
+ # Update progress (cap at duration)
+ progress = min(int(current_time), int(trim_range.duration))
+ if progress > last_update:
+ pbar.update(progress - last_update)
+ last_update = progress
+
+ pbar.close()
+ else:
+ # Just wait for completion without progress
+ process.communicate()
+
+ return_code = process.wait()
+
+ if return_code != 0:
+ raise RuntimeError(f"FFmpeg exited with code {return_code}")
+
+ # Verify output file exists and has size
+ if not output_path.exists() or output_path.stat().st_size == 0:
+ raise RuntimeError("Output file is empty or was not created")
+
+ except Exception as e:
+ # Clean up partial output on failure
+ if output_path.exists():
+ try:
+ output_path.unlink()
+ except:
+ pass
+ raise RuntimeError(f"Failed to trim video: {e}") from e
+
+ return output_path
+
+ def _trim_multiple_ranges(
+ self,
+ input_path: Path,
+ output_path: Path,
+ ranges: List[TrimRange],
+ video_codec: VideoCodec,
+ audio_codec: AudioCodec,
+ quality: VideoQuality,
+ crf: Optional[int],
+ preserve_audio: bool,
+ copy_streams: bool,
+ show_progress: bool,
+ seek_buffer: float,
+ ) -> Path:
+ """Extract multiple ranges and concatenate with progress tracking."""
+ temp_files = []
+ concat_file = None
+
+ try:
+ # Process segments sequentially (FFmpeg isn't thread-safe for encoding)
+ total_duration = sum(r.duration for r in ranges)
+
+ with tqdm(
+ total=int(total_duration), desc="Total Progress", unit="s"
+ ) as main_pbar:
+ for i, trim_range in enumerate(ranges):
+ temp_file = self._temp_dir / f"segment_{i:03d}.mp4"
+ temp_files.append(temp_file)
+
+ # Process segment
+ self._trim_single_range(
+ input_path,
+ temp_file,
+ trim_range,
+ video_codec,
+ audio_codec,
+ quality,
+ crf,
+ preserve_audio,
+ copy_streams,
+ False,
+ seek_buffer, # Disable individual progress
+ )
+
+ # Update main progress
+ main_pbar.update(int(trim_range.duration))
+
+ # Create concat list
+ concat_file = self._temp_dir / "concat.txt"
+ with open(concat_file, "w") as f:
+ for temp_file in temp_files:
+ # Escape single quotes in path for FFmpeg concat demuxer
+ path_str = str(temp_file.absolute()).replace("'", "'\\''")
+ f.write(f"file '{path_str}'\n")
+
+ # Concatenate with progress
+ concat_cmd = [
+ self.ffmpeg_path,
+ "-hide_banner",
+ "-y",
+ "-f",
+ "concat",
+ "-safe",
+ "0",
+ "-i",
+ str(concat_file),
+ "-c",
+ "copy",
+ "-avoid_negative_ts",
+ "make_zero",
+ str(output_path),
+ ]
+
+ logger.debug(f"Concat command: {' '.join(concat_cmd)}")
+
+ result = subprocess.run(
+ concat_cmd, capture_output=True, text=True, check=True
+ )
+
+ except Exception as e:
+ if output_path.exists():
+ output_path.unlink()
+ raise RuntimeError(f"Failed to concatenate segments: {e}") from e
+
+ finally:
+ # Cleanup temp files
+ for temp_file in temp_files:
+ if temp_file.exists():
+ temp_file.unlink()
+ if concat_file and concat_file.exists():
+ concat_file.unlink()
+
+ return output_path
+
+ def _normalize_trim_ranges(self, ranges):
+ """Normalize various input formats to list of TrimRange objects."""
+ if isinstance(ranges, TrimRange):
+ return [ranges]
+ elif isinstance(ranges, tuple) and len(ranges) == 2:
+ return [TrimRange(ranges[0], ranges[1])]
+ elif isinstance(ranges, list):
+ normalized = []
+ for r in ranges:
+ if isinstance(r, TrimRange):
+ normalized.append(r)
+ elif isinstance(r, tuple) and len(r) == 2:
+ normalized.append(TrimRange(r[0], r[1]))
+ else:
+ raise ValueError(f"Invalid trim range format: {r}")
+ return normalized
+ else:
+ raise ValueError(f"Invalid trim ranges format: {ranges}")
+
+ def _validate_trim_ranges(self, input_path: Path, ranges: List[TrimRange]):
+ """Validate trim ranges against video duration."""
+ video_info = self.get_video_info(input_path)
+
+ for i, trim_range in enumerate(ranges):
+ if trim_range.start < 0:
+ raise ValueError(f"Trim range {i}: start time cannot be negative")
+ if trim_range.start >= video_info.duration:
+ raise ValueError(
+ f"Trim range {i}: start time {trim_range.start}s "
+ f"exceeds video duration {video_info.duration}s"
+ )
+ if trim_range.end > video_info.duration:
+ logger.warning(
+ f"Trim range {i}: end time {trim_range.end}s "
+ f"exceeds video duration {video_info.duration}s. "
+ f"Truncating to video end."
+ )
+ trim_range.end = video_info.duration
+ if trim_range.end <= trim_range.start:
+ raise ValueError(
+ f"Trim range {i}: end time must be greater than start time"
+ )
+
+ def batch_trim(
+ self,
+ video_paths: List[Union[str, Path]],
+ output_dir: Union[str, Path],
+ trim_specs: Union[TrimRange, List[TrimRange], dict],
+ **kwargs,
+ ) -> List[Path]:
+ """Batch trim with global progress tracking."""
+ output_dir = Path(output_dir)
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ output_paths = []
+
+ for video_path in tqdm(video_paths, desc="Processing videos", unit="file"):
+ video_path = Path(video_path)
+
+ if isinstance(trim_specs, dict):
+ trim_spec = trim_specs.get(str(video_path), trim_specs.get(video_path))
+ if trim_spec is None:
+ raise ValueError(f"No trim specification for video: {video_path}")
+ else:
+ trim_spec = trim_specs
+
+ stem = video_path.stem
+ output_path = output_dir / f"{stem}_trimmed{video_path.suffix}"
+
+ try:
+ result = self.trim_video(
+ video_path, output_path, trim_spec, show_progress=True, **kwargs
+ )
+ output_paths.append(result)
+ except Exception as e:
+ logger.error(f"Failed to process {video_path.name}: {e}")
+ raise
+
+ return output_paths
+
+ def __del__(self):
+ """Cleanup temporary directory on object destruction."""
+ try:
+ if (
+ hasattr(self, "_temp_dir")
+ and self._temp_dir
+ and self._temp_dir.exists()
+ ):
+ shutil.rmtree(self._temp_dir, ignore_errors=True)
+ except Exception as e:
+ logger.warning(f"Failed to clean up temp directory: {e}")
diff --git a/filewarp/core/video/avEditor.py b/filewarp/core/video/avEditor.py
new file mode 100644
index 0000000..6582dd6
--- /dev/null
+++ b/filewarp/core/video/avEditor.py
@@ -0,0 +1,42 @@
+import av
+
+
+def trim_pyav(input_path, output_path, start_time, end_time):
+ input_container = av.open(str(input_path))
+ output_container = av.open(str(output_path), mode="w")
+
+ # Setup streams
+ in_streams = [
+ stream
+ for stream in input_container.streams
+ if stream.type in ("video", "audio")
+ ]
+ out_streams = {
+ s: output_container.add_stream(codec_name="libx264") for s in in_streams
+ }
+
+ # Seek to nearest keyframe before start
+ input_container.seek(int(start_time * av.time_base), any_frame=False)
+
+ for packet in input_container.demux(in_streams):
+ if packet.pts is None:
+ continue
+
+ time = float(packet.pts * packet.time_base)
+
+ if time < start_time:
+ continue
+ if time > end_time:
+ break
+
+ packet.stream = out_streams[packet.stream]
+ output_container.mux(packet)
+
+ output_container.close()
+ input_container.close()
+
+
+if __name__ == "__main__":
+ input_path = "/home/skye/Videos/Im.Nobody.S01E21.1080p.x264-[T4TSA.cc].mkv"
+ output_path = "/home/skye/Videos/trimed.mkv"
+ trim_pyav(input_path, output_path, 166, 350)
diff --git a/filewarp/core/video/core.py b/filewarp/core/video/core.py
new file mode 100644
index 0000000..d0e7377
--- /dev/null
+++ b/filewarp/core/video/core.py
@@ -0,0 +1,182 @@
+"""
+Convert video file to from one format to another
+"""
+
+import os
+import subprocess
+import sys
+
+import cv2
+from moviepy import VideoFileClip
+from pydub import AudioSegment
+from tqdm import tqdm
+
+from ...utils.colors import fg, bg, rs
+from ...utils.formats import SUPPORTED_VIDEO_FORMATS, Video_codecs
+
+
+RESET = rs
+
+
+class VideoConverter:
+ def __init__(self, input_file, out_format=None):
+ self.input_file = input_file
+ self.out_format = out_format
+
+ def preprocess(self):
+ if self.out_format is None:
+ return None
+ files_to_process = []
+
+ if os.path.isfile(self.input_file):
+ files_to_process.append(self.input_file)
+ elif os.path.isdir(self.input_file):
+ if os.listdir(self.input_file) is None:
+ print(f"{bg.RED}Cannot work with empty folder{RESET}")
+ sys.exit(1)
+ for file in os.listdir(self.input_file):
+ file_path = os.path.join(self.input_file, file)
+ if os.path.isfile(file_path):
+ files_to_process.append(file_path)
+
+ return files_to_process
+
+ def ffmpeg_merger(self, obj: list = None):
+ video_list = self.preprocess(), obj
+ for input_video in video_list:
+ base, ext = input_video.split(".", 1)
+ output_file = f"{base}_new_.{ext}"
+
+ # keep the original video quality by using -c:v copy, which avoids re-encoding.
+ subprocess.run(
+ [
+ "ffmpeg",
+ "-i",
+ input_video,
+ "-i",
+ "audio.mp3",
+ "-c:v",
+ "copy",
+ "-c:a",
+ "aac",
+ "-strict",
+ "experimental",
+ output_file,
+ ]
+ )
+
+ def pydub_merger(self, obj: list = None):
+ video_list = self.preprocess() or obj
+ for input_video in video_list:
+ output_file = [f"{_}_new_.{ext}" for _, ext in [input_video.split(".", 1)]][
+ 0
+ ]
+ # Process or manipulate audio with Pydub (e.g., adjust volume)
+ audio = AudioSegment.from_file("audio.mp3")
+ audio = audio + 6 # Increase volume by 6 dB
+ audio.export("processed_audio.mp3", format="mp3")
+
+ # Merge processed audio with video using FFmpeg
+ subprocess.run(
+ [
+ "ffmpeg",
+ "-i",
+ input_video,
+ "-i",
+ "processed_audio.mp3",
+ "-c:v",
+ "copy",
+ "-c:a",
+ "aac",
+ output_file,
+ ]
+ )
+
+ def cv2_merger(self, obj: list = None):
+ video_list = self.preprocess(), obj
+ for input_video in video_list:
+ # Read video and save frames (without audio)
+ cap = cv2.VideoCapture(input_video)
+
+ # Retrieve width and height from the video
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
+
+ # _, ext = input_video.split('.')[0]
+ # output_file = f"{_}_new{ext}"
+ output_file = [f"{_}_new_.{ext}" for _, ext in [input_video.split(".", 1)]][
+ 0
+ ]
+ # Define the VideoWriter with the video dimensions
+ out = cv2.VideoWriter(
+ output_file, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
+ )
+
+ # Read frames from the original video and write them to the output
+ while cap.isOpened():
+ ret, frame = cap.read()
+ if not ret:
+ break
+ out.write(frame)
+
+ # Release resources
+ cap.release()
+ out.release()
+
+ # Merge with audio using FFmpeg
+ subprocess.run(
+ [
+ "ffmpeg",
+ "-i",
+ "video_no_audio.mp4",
+ "-i",
+ "audio.mp3",
+ "-c:v",
+ "copy",
+ "-c:a",
+ "aac",
+ output_file,
+ ]
+ )
+
+ def CONVERT_VIDEO(self):
+ try:
+ input_list = self.preprocess()
+ out_f = self.out_format.upper()
+ input_list = [
+ item
+ for item in input_list
+ if any(item.upper().endswith(ext) for ext in SUPPORTED_VIDEO_FORMATS)
+ ]
+
+ for file in tqdm(input_list):
+ if out_f.upper() in Video_codecs.keys():
+ _, ext = os.path.splitext(file)
+ output_filename = _ + "." + out_f.lower()
+ # print(output_filename)
+ elif (
+ out_f.upper() in SUPPORTED_VIDEO_FORMATS
+ and out_f.upper() not in Video_codecs.keys()
+ ):
+ print(
+ f"{fg.RED}Unsupported output format --> Pending Implementation{RESET}"
+ )
+ sys.exit(1)
+ else:
+ print(f"{fg.RED}Unsupported output format{RESET}")
+ sys.exit(1)
+
+ """Load the video file"""
+ video = VideoFileClip(file)
+
+ """Export the video to a different format"""
+ video.write_videofile(output_filename, codec=Video_codecs[out_f])
+
+ """Close the video file"""
+ video.close()
+ except KeyboardInterrupt:
+ print("\nQuit❕")
+ sys.exit(1)
+ except Exception as e:
+ print(e)
diff --git a/filewarp/core/video/models.py b/filewarp/core/video/models.py
new file mode 100644
index 0000000..50ea156
--- /dev/null
+++ b/filewarp/core/video/models.py
@@ -0,0 +1,68 @@
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+from pathlib import Path
+
+
+class VideoCodec(Enum):
+ """Supported video codecs for encoding"""
+
+ H264 = "libx264"
+ H265 = "libx265"
+ VP9 = "libvpx-vp9"
+ COPY = "copy" # Stream copy (no re-encoding)
+
+
+class AudioCodec(Enum):
+ """Supported audio codecs for encoding"""
+
+ AAC = "aac"
+ MP3 = "libmp3lame"
+ COPY = "copy" # Stream copy (no re-encoding)
+ NONE = "none" # Remove audio
+
+
+class VideoQuality(Enum):
+ """Preset quality settings"""
+
+ ULTRA_FAST = "ultrafast" # Fastest encoding, largest file
+ FAST = "fast"
+ MEDIUM = "medium" # Default balance
+ SLOW = "slow" # Better compression, slower encoding
+ VERYS_LOW = "veryslow" # Best compression, very slow
+
+
+@dataclass
+class VideoInfo:
+ """Container for video metadata"""
+
+ path: Path
+ duration: float
+ width: int
+ height: int
+ fps: float
+ codec: str
+ bitrate: int
+ audio_codec: Optional[str]
+ audio_channels: Optional[int]
+ file_size: int
+ has_video: bool
+ has_audio: bool
+
+
+@dataclass
+class TrimRange:
+ """Represents a trim range in seconds"""
+
+ start: float
+ end: float
+
+ def __post_init__(self):
+ if self.start < 0:
+ raise ValueError("Start time cannot be negative")
+ if self.end <= self.start:
+ raise ValueError("End time must be greater than start time")
+
+ @property
+ def duration(self) -> float:
+ return self.end - self.start
diff --git a/filewarp/core/video/moviepyEditor.py b/filewarp/core/video/moviepyEditor.py
new file mode 100644
index 0000000..d6b22cd
--- /dev/null
+++ b/filewarp/core/video/moviepyEditor.py
@@ -0,0 +1,25 @@
+# from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+from moviepy import VideoFileClip
+# import os
+
+
+def trim_moviepy(input_path, output_path, start, end):
+ # This has progress bar support built-in
+ clip = VideoFileClip(str(input_path))
+ subclip = clip.subclipped(start, end)
+
+ # Progress bar appears automatically
+ subclip.write_videofile(
+ str(output_path),
+ codec="libx264",
+ audio_codec="aac",
+ # verbose=True, # Shows progress
+ # logger=None, # Or use 'bar' for tqdm style
+ )
+ clip.close()
+
+
+if __name__ == "__main__":
+ input_path = "/home/skye/Videos/Im.Nobody.S01E21.1080p.x264-[T4TSA.cc].mkv"
+ output_path = "/home/skye/Videos/trimed.mkv"
+ trim_moviepy(input_path, output_path, 166, 350)
diff --git a/filewarp/core/warning.py b/filewarp/core/warning.py
new file mode 100644
index 0000000..eece477
--- /dev/null
+++ b/filewarp/core/warning.py
@@ -0,0 +1,13 @@
+import warnings
+
+
+def default_supressor():
+ # warnings.filterwarnings(action="ignore", category=warnings.defaultaction, module="numexpr")
+ warnings.simplefilter("ignore", RuntimeWarning)
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore",
+ message="Your system is avx2 capable but pygame was not built with support for it.",
+ category=RuntimeWarning,
+ )
+ return True
diff --git a/filewarp/miscellaneous/VKITPro.py b/filewarp/miscellaneous/VKITPro.py
new file mode 100644
index 0000000..77b7c9c
--- /dev/null
+++ b/filewarp/miscellaneous/VKITPro.py
@@ -0,0 +1,135 @@
+#!/usr/bin/python3
+import logging
+import os
+
+import cv2
+from colorama import Fore, Style, init
+from moviepy import AudioFileClip, VideoFileClip
+
+# import numpy as np
+from tqdm import tqdm
+
+# Initialize colorama
+init(autoreset=True)
+
+# Custom formatter class to add colors
+
+
+class CustomFormatter(logging.Formatter):
+ COLORS = {
+ logging.DEBUG: Fore.BLUE,
+ logging.INFO: Fore.GREEN,
+ logging.WARNING: Fore.YELLOW,
+ logging.ERROR: Fore.RED,
+ logging.CRITICAL: Fore.MAGENTA,
+ }
+
+ def format(self, record):
+ log_color = self.COLORS.get(record.levelno, Fore.WHITE)
+ log_message = super().format(record)
+ return f"{log_color}{log_message}{Style.RESET_ALL}"
+
+
+# Set up logging
+logger = logging.getLogger("colored_logger")
+handler = logging.StreamHandler()
+handler.setFormatter(CustomFormatter("- %(levelname)s - %(message)s"))
+
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+
+
+class AudioMan:
+ def __init__(self, obj):
+ self.obj = obj
+ # Load the video file
+ self.video = VideoFileClip(self.obj)
+ basename, _ = os.path.splitext(self.obj)
+ self.outfile = basename + ".wav"
+
+ def Extract_audio(self):
+ # audio = video.audio
+ self.video.audio.write_audiofile(self.outfile)
+
+ def Write_audio(self, outfile):
+ # Load the audio file
+ audio = AudioFileClip(outfile)
+ new = self.video.set_audio(audio)
+ # Export the final video
+ return new.write_videofile(
+ "output_@vkitpro.mp4", codec="libx264", audio_codec="aac", bitrate="125.4k"
+ )
+
+
+class VideoRepair:
+ def __init__(self, obj):
+ self.obj = obj
+
+ logger.info("Open the file")
+ self.cap = cv2.VideoCapture(obj)
+ if not self.cap.isOpened():
+ logger.error("Could not open video file.")
+ return
+
+ # Collect file metadata
+ self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = self.cap.get(cv2.CAP_PROP_FPS)
+
+ logger.info(
+ "File info:\n"
+ f"\tFrames: \033[95m{self.frame_count}\033[0;32m\n"
+ f"\tFrame Width: \033[0;95m{width}\033[0;32m\n"
+ f"\tFrame Height: \033[0;95m{height}\033[0;32m\n"
+ f"\tFPS: \033[0;95m{fps}\033[0m"
+ )
+
+ def get_frame_size_in_bytes(frame):
+ return frame.nbytes # Get the size of the frame in bytes
+
+ def Repair(self, batch: int = 2):
+ logger.info("Find missing frames and index them")
+ """batch_size = batch * 1024 * 1024
+ l_frame = None
+ r_frame = None
+ current_batch_size = 0
+ frames_batch = []"""
+
+ for _ in tqdm(range(self.frame_count), desc="Progress"):
+ ret, frame = self.cap.read()
+ if not ret:
+ # If no frame is captured, break the loop
+ self.frames.append(None)
+ else:
+ self.frames.append(frame)
+
+ self.cap.release()
+
+
+class cv2Repair:
+ def __init__(self):
+ self = self
+
+ def preprocessor(input_video_path):
+ cap = cv2.VideoCapture(input_video_path)
+
+ while cap.isOpened():
+ ret, frame = cap.read()
+ if not ret:
+ pass
+ else:
+ yield frame # Yield frame one by one (lazy loading)
+
+ cap.release()
+
+ def repair(self):
+ # Process the frames using the generator
+ for frame in tqdm(self.preprocessor("/home/skye/Videos/FixedSupercar.mp4")):
+ run = AudioMan()
+ run.Write_audio()
+
+
+if __name__ == "__main__":
+ run = AudioMan("/home/skye/Videos/FixedSupercar.mp4")
+ run.Write_audio("/home/skye/Videos/supercar.wav")
diff --git a/filewarp/miscellaneous/VRKit.py b/filewarp/miscellaneous/VRKit.py
new file mode 100644
index 0000000..07e511f
--- /dev/null
+++ b/filewarp/miscellaneous/VRKit.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python3
+import logging
+import cv2
+from colorama import Fore, Style, init
+
+# import numpy as np
+from tqdm import tqdm
+
+# Initialize colorama
+init(autoreset=True)
+
+# Custom formatter class to add colors
+
+
+class CustomFormatter(logging.Formatter):
+ COLORS = {
+ logging.DEBUG: Fore.BLUE,
+ logging.INFO: Fore.GREEN,
+ logging.WARNING: Fore.YELLOW,
+ logging.ERROR: Fore.RED,
+ logging.CRITICAL: Fore.MAGENTA,
+ }
+
+ def format(self, record):
+ log_color = self.COLORS.get(record.levelno, Fore.WHITE)
+ log_message = super().format(record)
+ return f"{log_color}{log_message}{Style.RESET_ALL}"
+
+
+# Set up logging
+logger = logging.getLogger("colored_logger")
+handler = logging.StreamHandler()
+handler.setFormatter(CustomFormatter("- %(levelname)s - %(message)s"))
+
+logger.addHandler(handler)
+logger.setLevel(logging.INFO)
+
+
+def detect_missing_frames(frames):
+ """Implementation for missing frame detection and index them, append index
+ of missing frames to a list"""
+ missing_frames = []
+ logger.info("Index missing frames")
+ for i in tqdm(range(1, len(frames) - 1), desc="Progress"):
+ if frames[i] is None:
+ missing_frames.append(i)
+
+ # Exit when no missing frames are found
+ if not missing_frames:
+ exit(0)
+ return missing_frames
+
+
+def interpolate_frame(prev_frame, next_frame):
+ """Based on number and size of missing frames use this logic to create a
+ dummy frame by interpolating.
+ combine the frame before and after the missing frame and find the missing
+ frame by calculating middle weight."""
+ logger.info("Interpolating")
+ return cv2.addWeighted(prev_frame, 0.5, next_frame, 0.5, 0)
+
+
+def repair_video(input_path, output_path):
+ logger.info("Open the file")
+ cap = cv2.VideoCapture(input_path)
+ if not cap.isOpened():
+ logger.error("Could not open video file.")
+ return
+
+ # Collect file metadata
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = cap.get(cv2.CAP_PROP_FPS)
+
+ logger.info(
+ "File info:\n"
+ f"\tFrames: \033[95m{frame_count}\033[0;32m\n"
+ f"\tFrame Width: \033[0;95m{width}\033[0;32m\n"
+ f"\tFPS: \033[0;95m{fps}\033[0m"
+ )
+
+ frames = []
+ # Remove missing frames
+ logger.info("Find missing frames and index them")
+ for _ in tqdm(range(frame_count), desc="Progress"):
+ ret, frame = cap.read()
+ if not ret:
+ frames.append(None)
+ else:
+ frames.append(frame)
+
+ cap.release()
+
+ """ Call function to detect missing frames and decide on the method to apply
+ depending on number of missing frames. If number is larger than frame_count * 0.1
+remove the missing frames else interpolate."""
+
+ missing_frames = detect_missing_frames(frames)
+ if (
+ len(missing_frames) > frame_count * 0.1
+ ): # Arbitrary threshold for many missing frames
+ frames = [f for f in frames if f is not None]
+ else:
+ for i in missing_frames:
+ """ Based on missing frame `i` find previous frame `frames[i-1]` and preceeding frame `frames[i+1]` wher both previous and preceeding are not missing. Use them to create the middle frame."""
+ if (
+ i > 0
+ and i < frame_count - 1
+ and frames[i - 1] is not None
+ and frames[i + 1] is not None
+ ):
+ frames[i] = interpolate_frame(frames[i - 1], frames[i + 1])
+ else:
+ """Where ..."""
+ frames[i] = (
+ frames[i - 1] if frames[i - 1] is not None else frames[i + 1]
+ )
+
+ # Create writer objectfor the frames
+ out = cv2.VideoWriter(
+ output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
+ )
+
+ # Write the new video to file
+ for frame in frames:
+ "Don't write empty frames"
+ if frame is not None:
+ out.write(frame)
+
+ out.release()
+ print("Video repair complete and saved to:", output_path)
+
+
+# Usage
+input_video_path = "/home/skye/Videos/supercar.mp4"
+output_video_path = "output_video.mp4"
+repair_video(input_video_path, output_video_path)
diff --git a/filewarp/miscellaneous/video_analyzer.py b/filewarp/miscellaneous/video_analyzer.py
new file mode 100644
index 0000000..6394c8d
--- /dev/null
+++ b/filewarp/miscellaneous/video_analyzer.py
@@ -0,0 +1,121 @@
+"""A basic/simple file analyzer"""
+
+import sys
+import cv2
+import numpy as np
+from ..utils.colors import fg, rs
+import ffmpeg
+
+RESET = rs
+
+
+class SimpleAnalyzer:
+ """Video - video object subject for analysis
+ return video`s: duration, total_area and frame_count"""
+
+ def __init__(self, video):
+ self.video = video
+
+ @staticmethod
+ def get_metadata(input_file):
+ """Fetch the original bitrate of the video file using ffmpeg."""
+ try:
+ probe = ffmpeg.probe(input_file)
+ # print(probe.get("streams")[1])
+ bitrate = None
+ # Iterate over the streams and find the video stream
+ for stream in probe["streams"]:
+ bitrate = (
+ stream.get("bit_rate", None)
+ if stream["codec_type"] == "video"
+ else None
+ )
+ aspect_ratio = (
+ stream.get("sample_aspect_ratio")
+ if stream["sample_aspect_ratio"]
+ else None
+ )
+ codec_name = stream.get("codec_name") if stream["codec_name"] else None
+ channels = stream.get("channels")
+
+ encoder = stream.get("encoder") if stream.get("encoder") else None
+ break
+ return bitrate, aspect_ratio, codec_name, channels, encoder
+ except ffmpeg.Error as e:
+ raise
+ print(f"Error: {e}")
+ except Exception as e:
+ raise
+ print(f"Error: {e}")
+
+ def analyze(self):
+ """Read the video file/obj
+ Increase frame count and accumulate area
+ Calculate current frame duration
+ Display the resulting frame"""
+
+ try:
+ # Read the video file
+ cap = cv2.VideoCapture(self.video)
+ # print(f"{fg.BYELLOW}Initializing..{RESET}")
+ # Initialize variables
+ # Frame rate (fps)
+ bitrate, aspect_ratio, codec_name, channels, encoder = self.get_metadata(
+ self.video
+ )
+ frame_count = 0
+ total_area = 0
+ duration = 0
+
+ # print(f"{fg.DCYAN}Working on it{RESET}")
+ while True:
+ ret, frame = cap.read()
+
+ if not ret:
+ break
+ # Increase frame count and accumulate area
+ frame_count += 1
+ total_area += np.prod(frame.shape[:2])
+
+ # Calculate current frame duration
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ duration += 1 / fps
+
+ # Display the resulting frame
+ cv2.imshow("Frame", frame)
+
+ # Break the loop after pressing 'q'
+ if cv2.waitKey(1) == ord("q"):
+ break
+
+ # Release the video capture object and close all windows
+ cap.release()
+ cv2.destroyAllWindows()
+
+ # Print results
+ # print(f"Size {fg.BGREEN}{size}{RESET}Kb")
+ print(f"Channels: {fg.BGREEN}{channels}{RESET}")
+ print(f"Encoder {fg.BGREEN}{encoder}{RESET}")
+ print(f"Bitrate {fg.BGREEN}{bitrate}{RESET}")
+ print(f"Aspect ratio{fg.BGREEN}{aspect_ratio}{RESET}")
+ print(f"Codec name {fg.BGREEN}{codec_name}{RESET}")
+ print(f"Total Frames: {fg.BGREEN}{frame_count}{RESET}")
+ print(
+ f"Average Frame Area: {fg.BGREEN}{total_area / frame_count}{RESET}"
+ )
+ print(f"Duration: {fg.BGREEN}{duration:.2f}{RESET} seconds")
+ return frame_count, total_area, duration
+ except KeyboardInterrupt:
+ print("\nExiting")
+ sys.exit(1)
+ except TypeError:
+ pass
+ except Exception as e:
+ print(e)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ vi = SimpleAnalyzer("/home/skye/Videos/demo.mkv")
+ # SimpleAnalyzer.get_metadata("/home/skye/Videos/demo.mkv")
+ vi.analyze()
diff --git a/filewarp/utils/__init__.py b/filewarp/utils/__init__.py
new file mode 100644
index 0000000..b814662
--- /dev/null
+++ b/filewarp/utils/__init__.py
@@ -0,0 +1,3 @@
+from . import formats
+
+__all__ = ["formats"]
diff --git a/filewarp/utils/colors.py b/filewarp/utils/colors.py
new file mode 100644
index 0000000..def6fc7
--- /dev/null
+++ b/filewarp/utils/colors.py
@@ -0,0 +1,271 @@
+import os
+
+from colorama import Fore, Style, init
+
+init(autoreset=True)
+
+
+class ForegroundColor:
+ if os.name == "posix":
+ RESET = "\033[0m" # Reset to default text color
+
+ # Red Variants
+ RED = "\033[91m" # Normal RED
+ BRED = "\033[1;91m" # Deep RED
+ FRED = "\033[2;91m" # Faint red
+ IRED = "\033[3;91m" # Indented RED
+ LRED = "\033[4;91m" # Underlined RED
+ URED = "\033[5;91m" # Blinking RED
+
+ # Green Variants
+ GREEN = "\033[92m" # Normal green
+ BGREEN = "\033[1;92m" # Deep green
+ FGREEN = "\033[2;92m" # Faint green
+ IGREEN = "\033[3;92m" # Indented GREEN
+ LGREEN = "\033[4;92m" # Underlined GREEN
+ UGREEN = "\033[5;92m" # Blinking GREEN
+
+ # Yellow Variants
+ YELLOW = "\033[93m" # Normal yellow
+ BYELLOW = "\033[1;93m" # Deep YELLOW
+ FYELLOW = "\033[2;93m" # Faint YELLOW
+ IYELLOW = "\033[3;93m" # Indented YELLOW
+ LYELLOW = "\033[4;93m" # Underlined YELLOW
+ UYELLOW = "\033[5;93m" # Blinking YELLOW
+
+ # Blue Variants
+ BLUE = "\033[94m" # Normal BLUE
+ BBLUE = "\033[1;94m" # Deep BLUE
+ FBLUE = "\033[2;94m" # Faint Blue
+ IBLUE = "\033[3;94m" # Indented BLUE
+ LBLUE = "\033[4;94m" # Underlined BLUE
+ UBLUE = "\033[5;94m" # Blinking BLUE
+
+ # Magenta Variants
+ MAGENTA = "\033[95m" # Normal MAGENTA
+ BMAGENTA = "\033[1;95m" # Deep MAGENTA
+ FMAGENTA = "\033[2;95m" # Faint MAGENTA
+ IMAGENTA = "\033[3;95m" # Indented MAGENTA
+ LMAGENTA = "\033[4;95m" # Underlined MAGENTA
+ UMAGENTA = "\033[5;95m" # Blinking MAGENTA
+
+ # Cyan Variants
+ CYAN = "\033[96m" # Normal cyan
+ DCYAN = "\033[1;96m" # Deep CYAN
+ FCYAN = "\033[2;96m" # Faint cyan
+ ICYAN = "\033[3;96m" # Indented CYAN
+ LCYAN = "\033[4;96m" # Underlined CYAN
+ UCYAN = "\033[5;96m" # Blinking CYAN
+
+ # White Variants
+ BWHITE = "\033[1m" # Bold white
+ BBWHITE = "\033[5;97;1m" # Bold Blinking white
+ WHITE = "\033[97m" # Normal white
+ DWHITE = "\033[1;97m" # Deep white
+ FWHITE = "\033[2;97m" # Faint white
+ IWHITE = "\033[3;97m" # Indented white
+ LWHITE = "\033[4;97m" # Underlined white
+ UWHITE = "\033[5;97m" # Blinking white
+
+ if os.name == "nt":
+ RESET = Style.RESET_ALL
+
+ # Red Variants
+ RED = Fore.LIGHTRED_EX
+ BRED = Fore.RED
+ FRED = Fore.RED
+ IRED = Fore.RED
+ LRED = Fore.LIGHTRED_EX # Underlined RED
+ URED = Fore.RED # Blinking not directly supported, using RED
+
+ # Green Variants
+ GREEN = Fore.LIGHTGREEN_EX
+ BGREEN = Fore.GREEN
+ FGREEN = Fore.GREEN
+ IGREEN = Fore.GREEN
+ LGREEN = Fore.LIGHTGREEN_EX # Underlined GREEN
+ UGREEN = Fore.GREEN # Blinking not directly supported, using GREEN
+
+ # Yellow Variants
+ YELLOW = Fore.LIGHTYELLOW_EX
+ BYELLOW = Fore.YELLOW
+ FYELLOW = Fore.YELLOW
+ IYELLOW = Fore.YELLOW
+ LYELLOW = Fore.LIGHTYELLOW_EX # Underlined YELLOW
+ UYELLOW = Fore.YELLOW # Blinking not directly supported, using YELLOW
+
+ # Blue Variants
+ BLUE = Fore.LIGHTBLUE_EX
+ BBLUE = Fore.BLUE
+ FBLUE = Fore.BLUE
+ IBLUE = Fore.BLUE
+ LBLUE = Fore.LIGHTBLUE_EX # Underlined BLUE
+ UBLUE = Fore.BLUE # Blinking not directly supported, using BLUE
+
+ # Magenta Variants
+ MAGENTA = Fore.LIGHTMAGENTA_EX
+ BMAGENTA = Fore.MAGENTA
+ FMAGENTA = Fore.MAGENTA
+ IMAGENTA = Fore.LIGHTMAGENTA_EX
+ LMAGENTA = Fore.LIGHTMAGENTA_EX # Underlined MAGENTA
+ UMAGENTA = Fore.MAGENTA # Blinking not directly supported, using MAGENTA
+
+ # Cyan Variants
+ CYAN = Fore.LIGHTCYAN_EX
+ DCYAN = Fore.CYAN
+ ICYAN = Fore.WHITE # Indented CYAN
+ FCYAN = Fore.CYAN
+ LCYAN = Fore.LIGHTCYAN_EX # Underlined CYAN
+ UCYAN = Fore.CYAN # Blinking not directly supported, using CYAN
+
+ # White Variants
+ BWHITE = Fore.WHITE
+ BBWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+ WHITE = Fore.WHITE
+ DWHITE = Fore.WHITE # Deep white (not distinct in colorama)
+ FWHITE = Fore.WHITE # Faint white (not distinct in colorama)
+ IWHITE = Fore.WHITE # Indented white (not distinct in colorama)
+ LWHITE = Fore.WHITE # Underlined white (not distinct in colorama)
+ UWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+
+
+class BackgroundColor:
+ if os.name == "posix":
+ RESET = "\033[0m" # Reset to default text color
+
+ # Red Variants
+ RED = "\033[91m" # Normal RED
+ BRED = "\033[1;41m" # Deep RED
+ FRED = "\033[2;41m" # Faint red
+ IRED = "\033[3;41m" # Indented RED
+ LRED = "\033[4;41m" # Underlined RED
+ URED = "\033[5;41m" # Blinking RED
+
+ # Green Variants
+ GREEN = "\033[42m" # Normal green
+ BGREEN = "\033[1;42m" # Deep green
+ FGREEN = "\033[2;42m" # Faint green
+ IGREEN = "\033[3;42m" # Indented GREEN
+ LGREEN = "\033[4;42m" # Underlined GREEN
+ UGREEN = "\033[5;42m" # Blinking GREEN
+
+ # Yellow Variants
+ YELLOW = "\033[43m" # Normal yellow
+ BYELLOW = "\033[1;43m" # Deep YELLOW
+ FYELLOW = "\033[2;43m" # Faint YELLOW
+ IYELLOW = "\033[3;43m" # Indented YELLOW
+ LYELLOW = "\033[4;43m" # Underlined YELLOW
+ UYELLOW = "\033[5;43m" # Blinking YELLOW
+
+ # Blue Variants
+ BLUE = "\033[44m" # Normal BLUE
+ BBLUE = "\033[1;44m" # Deep BLUE
+ FBLUE = "\033[2;44m" # Faint Blue
+ IBLUE = "\033[3;44m" # Indented BLUE
+ LBLUE = "\033[4;44m" # Underlined BLUE
+ UBLUE = "\033[5;44m" # Blinking BLUE
+
+ # Magenta Variants
+ MAGENTA = "\033[45m" # Normal MAGENTA
+ BMAGENTA = "\033[1;45m" # Deep MAGENTA
+ FMAGENTA = "\033[2;45m" # Faint MAGENTA
+ IMAGENTA = "\033[3;45m" # Indented MAGENTA
+ LMAGENTA = "\033[4;45m" # Underlined MAGENTA
+ UMAGENTA = "\033[5;45m" # Blinking MAGENTA
+
+ # Cyan Variants
+ CYAN = "\033[46m" # Normal cyan
+ DCYAN = "\033[1;46m" # Deep CYAN
+ FCYAN = "\033[2;46m" # Faint cyan
+ ICYAN = "\033[3;46m" # Indented CYAN
+ LCYAN = "\033[4;46m" # Underlined CYAN
+ UCYAN = "\033[5;46m" # Blinking CYAN
+
+ # White Variants
+ BWHITE = "\033[1m" # Bold white
+ BBWHITE = "\033[5;47;1m" # Bold Blinking white
+ WHITE = "\033[47m" # Normal white
+ DWHITE = "\033[1;47m" # Deep white
+ FWHITE = "\033[2;47m" # Faint white
+ IWHITE = "\033[3;47m" # Indented white
+ LWHITE = "\033[4;47m" # Underlined white
+ UWHITE = "\033[5;47m" # Blinking white
+
+ BLACK = "\033[40m" # Black Background
+
+ if os.name == "nt":
+ RESET = Style.RESET_ALL
+
+ # Red Variants
+ RED = Fore.LIGHTRED_EX
+ BRED = Fore.RED
+ FRED = Fore.RED
+ IRED = Fore.RED
+ LRED = Fore.LIGHTRED_EX # Underlined RED
+ URED = Fore.RED # Blinking not directly supported, using RED
+
+ # Green Variants
+ GREEN = Fore.LIGHTGREEN_EX
+ BGREEN = Fore.GREEN
+ FGREEN = Fore.GREEN
+ IGREEN = Fore.GREEN
+ LGREEN = Fore.LIGHTGREEN_EX # Underlined GREEN
+ UGREEN = Fore.GREEN # Blinking not directly supported, using GREEN
+
+ # Yellow Variants
+ YELLOW = Fore.LIGHTYELLOW_EX
+ BYELLOW = Fore.YELLOW
+ FYELLOW = Fore.YELLOW
+ IYELLOW = Fore.YELLOW
+ LYELLOW = Fore.LIGHTYELLOW_EX # Underlined YELLOW
+ UYELLOW = Fore.YELLOW # Blinking not directly supported, using YELLOW
+
+ # Blue Variants
+ BLUE = Fore.LIGHTBLUE_EX
+ BBLUE = Fore.BLUE
+ FBLUE = Fore.BLUE
+ IBLUE = Fore.BLUE
+ LBLUE = Fore.LIGHTBLUE_EX # Underlined BLUE
+ UBLUE = Fore.BLUE # Blinking not directly supported, using BLUE
+
+ # Magenta Variants
+ MAGENTA = Fore.LIGHTMAGENTA_EX
+ BMAGENTA = Fore.MAGENTA
+ FMAGENTA = Fore.MAGENTA
+ IMAGENTA = Fore.LIGHTMAGENTA_EX
+ LMAGENTA = Fore.LIGHTMAGENTA_EX # Underlined MAGENTA
+ UMAGENTA = Fore.MAGENTA # Blinking not directly supported, using MAGE
+
+ # Cyan Variants
+ CYAN = Fore.LIGHTCYAN_EX
+ DCYAN = Fore.CYAN
+ ICYAN = Fore.WHITE # Indented CYAN
+ FCYAN = Fore.CYAN
+ LCYAN = Fore.LIGHTCYAN_EX # Underlined CYAN
+ UCYAN = Fore.CYAN # Blinking not directly supported, using CYAN
+
+ # White Variants
+ BWHITE = Fore.WHITE
+ BBWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+ WHITE = Fore.WHITE
+ DWHITE = Fore.WHITE # Deep white (not distinct in colorama)
+ FWHITE = Fore.WHITE # Faint white (not distinct in colorama)
+ IWHITE = Fore.WHITE # Indented white (not distinct in colorama)
+ LWHITE = Fore.WHITE # Underlined white (not distinct in colorama)
+ UWHITE = Fore.WHITE # Blinking not directly supported, using WHITE
+
+
+fg = ForegroundColor()
+bg = BackgroundColor()
+rs = fg.RESET
+
+
+class OutputFormater:
+ """ANSI styles for output display"""
+
+ INFO = f"{fg.BLUE}[i]{rs}"
+ WARN = f"{fg.YELLOW}[!]{rs}"
+ ERR = f"{fg.RED}[x]{rs}"
+ EXP = f"{fg.MAGENTA}[⁉️]{rs}" # For exceptios
+ OK = f"{fg.GREEN}[✓]{rs}"
+ RESET = rs
diff --git a/filewarp/utils/config.py b/filewarp/utils/config.py
new file mode 100644
index 0000000..b923bbb
--- /dev/null
+++ b/filewarp/utils/config.py
@@ -0,0 +1,11 @@
+from pathlib import Path
+import os
+
+BASE_DIR = Path(__file__).resolve().home()
+
+OUTPUT_DIR = BASE_DIR / "Documents"
+
+CACHE_DIR = BASE_DIR / "tmp/filewarp"
+
+# Ensure cache dir exists
+os.makedirs(CACHE_DIR, exist_ok=True)
diff --git a/filewarp/utils/decorators.py b/filewarp/utils/decorators.py
new file mode 100644
index 0000000..65c4652
--- /dev/null
+++ b/filewarp/utils/decorators.py
@@ -0,0 +1,25 @@
+class Decorators:
+ def __init__(self):
+ pass
+
+ @staticmethod
+ def for_loop_decorator(data_list):
+ """
+ A decorator that calls the decorated function with each element
+ from the provided list or tuple.
+
+ Args:
+ data_list: A list or tuple of data to iterate over.
+ """
+
+ def decorator(func):
+ def wrapper(self, *args, **kwargs):
+ for item in data_list:
+ func(self, item, *args, **kwargs)
+
+ return wrapper
+
+ return decorator
+
+
+for_loop = Decorators.for_loop_decorator
diff --git a/filewarp/utils/file_utils.py b/filewarp/utils/file_utils.py
new file mode 100644
index 0000000..a0e741b
--- /dev/null
+++ b/filewarp/utils/file_utils.py
@@ -0,0 +1,352 @@
+"""
+File utility functions for filewarp.
+"""
+
+import fnmatch
+import os
+import shutil
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Iterator, List, Optional, Union
+
+from tqdm.auto import tqdm
+
+# from .colors import fg, rs
+from ..core.exceptions import FileSystemError
+from .colors import OutputFormater as OF
+from .config import OUTPUT_DIR
+from .formats import SUPPORTED_IMAGE_FORMATS
+from .simple import logger
+
+
+def dirbuster(_dir_):
+ try:
+ target = []
+ for root, dirs, files in os.walk(_dir_):
+ for file in files:
+ ext = file.split(".")[-1]
+
+ _path_ = os.path.join(root, file)
+ if os.path.exists(_path_) and ext.lower() in ("pdf", "doc", "docx"):
+ target.append(_path_)
+ return target
+ except FileNotFoundError as e:
+ print(e)
+
+ except KeyboardInterrupt:
+ print("\nQuit!")
+ return
+
+
+def generate_filename(
+ basedir: Path, suffix: str, postfix: Optional[str] = "filewarp"
+) -> Path:
+ """
+ Generate Filename given its extension
+ Args:
+ suffix-> str file extension
+ basedir-> Path
+ postfix = str string preceding name
+ prefix - string before name
+ Returns:
+ path
+ """
+
+ filename = basedir / f"{uuid.uuid4().hex}-{postfix}.{suffix}"
+
+ return filename
+
+
+class FileSystemHandler:
+ """
+ Encapsulates file handling utilities required by cleaner
+ """
+
+ def __init__(self, ignore: list | tuple = None):
+ self.ignore = ignore
+
+ def find_files(self, paths, patterns, recursive=True) -> list:
+ try:
+ candidates = []
+ for path in paths:
+ path_obj = Path(path).expanduser().resolve()
+ if not path_obj.exists():
+ continue
+ if recursive:
+ for file in tqdm(
+ path_obj.rglob("*"), desc="Searching", leave=False
+ ):
+ if file.is_file() and any(
+ fnmatch.fnmatch(file.name, pat) for pat in patterns
+ ):
+ candidates.append(file)
+ else:
+ for file in tqdm(path_obj.glob("*"), desc="Searching", leave=False):
+ if file.is_file() and any(
+ fnmatch.fnmatch(file.name, pat) for pat in patterns
+ ):
+ candidates.append(file)
+ return self.ignore_pattern(candidates)
+ except Exception as e:
+ raise FileSystemError(e)
+
+ def find_directories(self, paths, patterns, recursive=True, empty=True) -> list:
+ try:
+ candidates = []
+ for path in paths:
+ path_obj = Path(path).expanduser().resolve()
+ if not path_obj.exists():
+ continue
+ if recursive:
+ for root, dirs, files in tqdm(
+ os.walk(path_obj, followlinks=True),
+ desc="Searching",
+ leave=False,
+ ):
+ for dir in dirs:
+ if len(os.listdir(os.path.join(root, dir))) == 0:
+ candidates.append(Path(root) / dir)
+
+ else:
+ for item in tqdm(
+ os.listdir(path_obj), desc="Searching", leave=False
+ ):
+ if os.path.isdir(item) and len(os.listdir(item)) == 0:
+ candidates.append(path_obj / item)
+
+ return self.ignore_pattern(candidates)
+ except Exception as e:
+ raise FileSystemError(e)
+
+ def ignore_pattern(self, items: list | tuple, ignore: list | tuple = None) -> list:
+ ignore = self.ignore if not ignore else ignore
+ candidates = []
+ for item in items:
+ for ig in ignore:
+ _ig = ig.lower()
+ if _ig in item.as_uri().lower().split(
+ "/"
+ ) + item.as_uri().lower().split("\\"):
+ continue
+
+ candidates.append(item)
+
+ return candidates
+
+ @staticmethod
+ def _find_files(pattern: str, recursive: bool = True) -> Iterator[Path]:
+ """Find files matching pattern."""
+ path = Path(pattern)
+
+ if path.exists() and path.is_file():
+ yield path
+ return
+
+ # Handle glob patterns
+ if recursive:
+ yield from Path(".").rglob(pattern)
+ else:
+ yield from Path(".").glob(pattern)
+
+ @staticmethod
+ def delete_files(files) -> bool:
+ try:
+ for f in files:
+ if f.exists():
+ f.unlink()
+ print(f"{OF.OK} Deleted: {f}")
+ return True
+ except (PermissionError, OSError) as e:
+ raise FileSystemError(e)
+ except Exception as e:
+ print(f"{OF.ERR} Failed to delete {f}: {e}")
+ return False
+
+ @staticmethod
+ def delete_folders(files) -> bool:
+ try:
+ for f in files:
+ if f.exists():
+ f.rmdir()
+ print(f"{OF.OK} Deleted: {f}")
+ return True
+ except (PermissionError, OSError) as e:
+ raise FileSystemError(e)
+ except Exception as e:
+ print(f"{OF.ERR} Failed to delete {f}: {e}")
+ return False
+
+ @staticmethod
+ def ensure_directory(path: Path) -> Path:
+ """Ensure directory exists, create if necessary."""
+ try:
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+ except OSError as e:
+ raise FileSystemError(f"Failed to create directory {path}: {str(e)}")
+
+ @staticmethod
+ def safe_filename(name: str, max_length: int = 255) -> str:
+ """Convert string to safe filename."""
+ # Replace unsafe characters
+ safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in name)
+
+ # Remove extra spaces and underscores
+ safe_name = "_".join(filter(None, safe_name.split()))
+
+ # Trim to max length
+ if len(safe_name) > max_length:
+ name_hash = str(hash(safe_name))[-8:]
+ safe_name = safe_name[: max_length - 9] + "_" + name_hash
+
+ return safe_name
+
+
+class TemporaryFileManager:
+ """Manages temporary files with proper cleanup."""
+
+ def __init__(self, prefix: str = "kcleaner_"):
+ self.temp_files = []
+ self.temp_dirs = []
+ self.prefix = prefix
+
+ def create_temp_file(self, suffix: str, content: str = "") -> Path:
+ """Create a temporary file with the given suffix and content."""
+ try:
+ with tempfile.NamedTemporaryFile(
+ mode="w",
+ suffix=suffix,
+ prefix=self.prefix,
+ encoding="utf-8",
+ delete=False,
+ ) as f:
+ if content:
+ f.write(content)
+ temp_path = Path(f.name)
+
+ self.temp_files.append(temp_path)
+ return temp_path
+
+ except (OSError, IOError) as e:
+ raise FileSystemError(f"Failed to create temporary file: {str(e)}")
+
+ def create_temp_dir(self) -> Path:
+ """Create a temporary directory."""
+ try:
+ temp_dir = Path(tempfile.mkdtemp(prefix=self.prefix))
+ self.temp_dirs.append(temp_dir)
+ return temp_dir
+ except OSError as e:
+ raise FileSystemError(f"Failed to create temporary directory: {str(e)}")
+
+ def cleanup(self):
+ """Clean up all temporary files and directories."""
+ for temp_file in self.temp_files:
+ try:
+ if temp_file.exists():
+ temp_file.unlink()
+ except OSError as e:
+ logger.warning(f"Failed to delete temporary file {temp_file}: {e}")
+
+ for temp_dir in self.temp_dirs:
+ try:
+ if temp_dir.exists():
+ shutil.rmtree(temp_dir)
+ except OSError as e:
+ logger.warning(f"Failed to delete temporary directory {temp_dir}: {e}")
+
+ self.temp_files.clear()
+ self.temp_dirs.clear()
+
+
+class DirectoryScanner:
+ def __init__(self, input_obj: Optional[Union[str, list[str], os.PathLike]]):
+ self.input_obj = input_obj
+
+ def get_dir_files(self):
+ """
+ Get file path list given dir/folder
+
+ -------
+ Args:
+ path: path to the directory/folder
+ Returns:
+ -------
+ list
+ """
+ files = [
+ os.path.join(self.input_obj, f)
+ for f in os.listdir(self.input_obj)
+ if os.path.isfile(os.path.join(self.input_obj, f))
+ and self._is_supported_image(f)
+ ]
+ if not files: # Check for empty directory *after* filtering
+ raise FileNotFoundError(
+ f"No supported image files found in: {self.input_obj}"
+ )
+ return files
+
+ def _is_supported_image(self, filename: str) -> bool:
+ """Checks if a file has a supported image extension."""
+ return filename.lower().endswith(tuple(SUPPORTED_IMAGE_FORMATS.values()))
+
+ def _get_image_files(self, files: list = None) -> List[str]:
+ """
+ Identifies image files to process, handling both single files and directories.
+
+ Returns:
+ A list of paths to image files. Raises FileNotFoundError if no
+ valid image files are found.
+ """
+ files = self.input_obj if not files else files
+
+ if isinstance(files, (str, os.PathLike)):
+ if os.path.isfile(files):
+ return [files]
+ else:
+ return self.get_dir_files(files)
+
+ files_to_process = []
+ for obj in files:
+ if os.path.isfile(obj):
+ if self._is_supported_image(obj):
+ files_to_process.append(obj)
+ else:
+ logger.warning(f"Skipping unsupported file: {obj}")
+
+ elif os.path.isdir(obj):
+ files = self.get_dir_files(obj)
+ if not files: # Check for empty directory *after* filtering
+ raise FileNotFoundError(f"No supported image files found in: {obj}")
+ files_to_process.extend(files)
+ else:
+ raise FileNotFoundError(
+ f"Input is not a valid file or directory: {obj}"
+ )
+ return files_to_process
+
+ def run(self):
+ supported_files = self._get_image_files(self.input_obj)
+ return supported_files
+
+
+def modify_filename_if_exists(filename):
+ """
+ Modifies the filename by adding "_filewarp" before the extension if the original filename exists.
+
+ Args:
+ filename (str): The filename to modify.
+
+ Returns:
+ str: The modified filename, or the original filename if it doesn't exist or has no extension.
+ """
+ if os.path.exists(filename):
+ parts = filename.rsplit(".", 1) # Split from the right, at most once
+ if len(parts) == 2:
+ base, ext = parts
+ return f"{base}_filewarp.{ext}"
+ else:
+ return f"{filename}_filewarp" # handle files with no extension.
+ else:
+ return filename
diff --git a/filewarp/utils/formats.py b/filewarp/utils/formats.py
new file mode 100644
index 0000000..c8ea2e9
--- /dev/null
+++ b/filewarp/utils/formats.py
@@ -0,0 +1,368 @@
+# multimedia_cli/formats.py
+from rich.table import Table
+from rich.panel import Panel
+from rich.text import Text
+from rich import box
+from rich.columns import Columns
+
+try:
+ from cli._entry_ import console
+except ImportError:
+ from rich.console import Console
+
+ console = Console()
+
+# Color mappings for consistent styling
+STYLES = {
+ "input": "bold cyan",
+ "output": "bold green",
+ "arrow": "yellow",
+ "pending": "dim italic red",
+ "header": "bold white on blue",
+ "format": "magenta",
+}
+
+
+def create_doc_formats_table():
+ """Create an elegant table for document formats"""
+ table = Table(
+ title="[bold]Document Format Conversions[/]",
+ title_style="bold cyan",
+ box=box.ROUNDED,
+ border_style="blue",
+ header_style="bold white on blue",
+ show_lines=True,
+ padding=(0, 2),
+ )
+
+ table.add_column("Input Format", style="bold cyan", justify="center")
+ table.add_column("→", style="yellow", justify="center", width=3)
+ table.add_column("Output Formats", style="green", justify="left")
+
+ conversions = [
+ ("xlsx", "→", "csv, txt, doc/docx, db(sql)"),
+ ("doc/docx", "→", "txt, pdf, ppt/pptx, audio(ogg)"),
+ ("txt", "→", "pdf, docx/doc, audio(ogg)"),
+ ("pdf", "→", "doc/docx, txt, audio(ogg)"),
+ ("pptx/ppt", "→", "doc/docx"),
+ ]
+
+ for in_fmt, arrow, out_fmt in conversions:
+ table.add_row(in_fmt, arrow, out_fmt)
+
+ return table
+
+
+def create_audio_formats_table():
+ """Create an elegant table for audio formats"""
+ table = Table(
+ title="[bold]Supported Audio Formats[/]",
+ title_style="bold cyan",
+ box=box.ROUNDED,
+ border_style="magenta",
+ header_style="bold white on magenta",
+ show_header=False,
+ padding=(0, 3),
+ )
+
+ table.add_column("Format", style="bold magenta", justify="center")
+ table.add_column("Status", style="white", justify="center")
+
+ audio_formats = [
+ ("WAV", "✅ Supported"),
+ ("MP3", "✅ Supported"),
+ ("OGG", "✅ Supported"),
+ ("FLV", "✅ Supported"),
+ ("OGV", "✅ Supported"),
+ ("MOV", "✅ Supported"),
+ ("WEBM", "✅ Supported"),
+ ("AAC", "⏳ Pending Implementation"),
+ ("BPF", "⏳ Pending Implementation"),
+ ("M4A", "✅ Supported"),
+ ("RAW", "✅ Supported"),
+ ("AIFF", "✅ Supported"),
+ ("FLAC", "✅ Supported"),
+ ]
+
+ for fmt, status in audio_formats:
+ table.add_row(fmt, status)
+
+ return table
+
+
+def create_video_formats_table():
+ """Create an elegant table for video formats"""
+ table = Table(
+ title="[bold]Supported Video Formats[/]",
+ title_style="bold cyan",
+ box=box.ROUNDED,
+ border_style="green",
+ header_style="bold white on green",
+ show_lines=True,
+ )
+
+ table.add_column("Format", style="bold green", justify="center")
+ table.add_column("Codec", style="cyan", justify="center")
+ table.add_column("Status", style="white", justify="center")
+
+ video_formats = [
+ ("MP4", "mpeg4", "✅ Supported"),
+ ("AVI", "rawvideo", "✅ Supported"),
+ ("OGV", "avc", "⏳ Pending Implementation"),
+ ("WEBM", "libvpx", "✅ Supported"),
+ ("MOV", "mpeg4", "✅ Supported"),
+ ("MKV", "mpeg4", "✅ Supported"),
+ ("FLV", "flv", "✅ Supported"),
+ ("WMV", "WMV", "⏳ Pending Implementation"),
+ ]
+
+ for fmt, codec, status in video_formats:
+ table.add_row(fmt, codec, status)
+
+ return table
+
+
+def create_image_formats_table():
+ """Create an elegant table for image formats"""
+ table = Table(
+ title="[bold]Supported Image Formats[/]",
+ title_style="bold cyan",
+ box=box.ROUNDED,
+ border_style="yellow",
+ header_style="bold white on yellow",
+ show_lines=True,
+ )
+
+ table.add_column("Format", style="bold yellow", justify="center")
+ table.add_column("Extension", style="cyan", justify="center")
+ table.add_column("Status", style="white", justify="center")
+
+ image_formats = [
+ ("JPEG", ".jpeg", "✅ Supported"),
+ ("JPG", ".jpg", "✅ Supported"),
+ ("PNG", ".png", "✅ Supported"),
+ ("GIF", ".gif", "✅ Supported"),
+ ("BMP", ".bmp", "✅ Supported"),
+ ("DIB", ".dib", "✅ Supported"),
+ ("TIFF", ".tiff", "✅ Supported"),
+ ("PIC", ".pic", "✅ Supported"),
+ ("PDF", ".pdf", "✅ Supported"),
+ ("WEBP", ".webp", "✅ Supported"),
+ ("ICNS", ".icns", "✅ Supported"),
+ ("EPS", ".eps", "✅ Supported"),
+ ("PSD", ".psd", "⏳ Pending Implementation"),
+ ("SVG", ".svg", "⏳ Pending Implementation"),
+ ("EXR", ".exr", "⏳ Pending Implementation"),
+ ("DXF", ".dxf", "⏳ Pending Implementation"),
+ ("PICT", ".pct", "⏳ Pending Implementation"),
+ ("PS", ".ps", "⏳ Pending Implementation"),
+ ("POSTSCRIPT", ".ps", "⏳ Pending Implementation"),
+ ]
+
+ for fmt, ext, status in image_formats:
+ table.add_row(fmt, ext, status)
+
+ return table
+
+
+def create_quick_reference():
+ """Create a quick reference panel with all formats"""
+ doc_table = create_doc_formats_table()
+ audio_table = create_audio_formats_table()
+ video_table = create_video_formats_table()
+ image_table = create_image_formats_table()
+
+ # Create panels for each category
+ doc_panel = Panel(
+ doc_table, title="📄 Documents", border_style="blue", padding=(1, 2)
+ )
+
+ audio_panel = Panel(
+ audio_table, title="🎵 Audio", border_style="magenta", padding=(1, 2)
+ )
+
+ video_panel = Panel(
+ video_table, title="🎬 Video", border_style="green", padding=(1, 2)
+ )
+
+ image_panel = Panel(
+ image_table, title="🖼️ Images", border_style="yellow", padding=(1, 2)
+ )
+
+ # Arrange in columns for compact display
+ top_row = Columns([doc_panel, audio_panel], equal=True, expand=True)
+ bottom_row = Columns([video_panel, image_panel], equal=True, expand=True)
+
+ return Panel(
+ Columns([top_row, bottom_row], equal=False),
+ title="[bold cyan]File Format Support Matrix[/]",
+ border_style="bright_white",
+ padding=(1, 2),
+ )
+
+
+def create_formats_help():
+ """Create a comprehensive help display for formats"""
+ help_text = Text()
+ help_text.append("\n📋 ", style="bold blue")
+ help_text.append("Format Conversion Guide\n\n", style="bold white")
+
+ help_text.append(" ✅ ", style="green")
+ help_text.append("Fully implemented and tested\n", style="white")
+
+ help_text.append(" ⏳ ", style="yellow")
+ help_text.append("Pending implementation (coming soon)\n\n", style="dim")
+
+ help_text.append(" 🔄 ", style="cyan")
+ help_text.append("Batch conversions supported\n", style="white")
+
+ help_text.append(" 🎯 ", style="magenta")
+ help_text.append("Preserves metadata where applicable\n\n", style="white")
+
+ help_text.append(" 💡 ", style="bright_yellow")
+ help_text.append("Tip: Use ", style="white")
+ help_text.append("--help ", style="bold cyan")
+ help_text.append("with any command for specific format options", style="white")
+
+ return Panel(
+ help_text, title="[bold]Format Help[/]", border_style="cyan", padding=(1, 2)
+ )
+
+
+# Export the table creation functions
+__all__ = [
+ "create_doc_formats_table",
+ "create_audio_formats_table",
+ "create_video_formats_table",
+ "create_image_formats_table",
+ "create_quick_reference",
+ "create_formats_help",
+]
+
+# For backward compatibility, also provide the original constants
+# But now as formatted strings for legacy code
+SUPPORTED_DOC_FORMATS_SHOW = """
+Document Format Conversions:
+ xlsx → csv, txt, doc/docx, db(sql)
+ doc/docx → txt, pdf, ppt/pptx, audio(ogg)
+ txt → pdf, docx/doc, audio(ogg)
+ pdf → doc/docx, txt, audio(ogg)
+ pptx/ppt → doc/docx
+"""
+
+SUPPORTED_AUDIO_FORMATS = [
+ "wav",
+ "mp3",
+ "ogg",
+ "flv",
+ "ogv",
+ "webm",
+ "aiff",
+ "flac",
+ "m4a",
+ "raw",
+ "bpf",
+ "aac",
+]
+
+SUPPORTED_AUDIO_FORMATS_DIRECT = [
+ "mp3",
+ "wav",
+ "raw",
+ "ogg",
+ "aiff",
+ "flac",
+ "flv",
+ "webm",
+ "ogv",
+]
+
+SUPPORTED_AUDIO_FORMATS_SHOW = """
+Supported Audio Formats:
+ • WAV • MP3 • OGG • FLV • OGV
+ • MOV • WEBM • AAC* • BPF* • M4A
+ • RAW • AIFF • FLAC
+
+ * Pending Implementation
+"""
+
+SUPPORTED_VIDEO_FORMATS = ["MP4", "AVI", "OGV", "WEBM", "MOV", "MKV", "FLV", "WMV"]
+
+Video_codecs = {
+ "MP4": "mpeg4",
+ "AVI": "rawvideo",
+ "WEBM": "libvpx",
+ "MOV": "mpeg4",
+ "MKV": "mpeg4",
+ "FLV": "flv",
+}
+
+SUPPORTED_VIDEO_FORMATS_SHOW = """
+Supported Video Formats:
+ • MP4 (mpeg4) • AVI (rawvideo)
+ • OGV* (avc) • WEBM (libvpx)
+ • MOV (mpeg4) • MKV (mpeg4)
+ • FLV (flv) • WMV* (WMV)
+
+ * Pending Implementation
+"""
+
+SUPPORTED_IMAGE_FORMATS = {
+ "JPEG": ".jpeg",
+ "JPG": ".jpg",
+ "PNG": ".png",
+ "GIF": ".gif",
+ "BMP": ".bmp",
+ "DIB": ".dib",
+ "TIFF": ".tiff",
+ "PIC": ".pic",
+ "PDF": ".pdf",
+ "WEBP": ".webp",
+ "EPS": ".eps",
+ "ICNS": ".icns",
+ "PSD": ".psd",
+ "SVG": ".svg",
+ "EXR": ".exr",
+ "DXF": ".dxf",
+ "PICT": ".pct",
+ "PS": ".ps",
+ "POSTSCRIPT": ".ps",
+}
+
+SUPPORTED_IMAGE_FORMATS_SHOW = """
+Supported Image Formats:
+ • JPEG (.jpeg) • JPG (.jpg) • PNG (.png)
+ • GIF (.gif) • BMP (.bmp) • DIB (.dib)
+ • TIFF (.tiff) • PIC (.pic) • PDF (.pdf)
+ • WEBP (.webp) • ICNS (.icns) • EPS (.eps)
+ • PSD* (.psd) • SVG* (.svg) • EXR* (.exr)
+ • DXF* (.dxf) • PICT*(.pct) • PS* (.ps)
+
+ * Pending Implementation
+"""
+
+SUPPORTED_DOCUMENT_FORMATS = [
+ "pdf",
+ "doc",
+ "docx",
+ "csv",
+ "xlsx",
+ "xls",
+ "ppt",
+ "pptx",
+ "txt",
+ "ogg",
+ "mp3",
+ "audio",
+]
+
+
+# Function to display all formats in a beautiful layout
+def display_all_formats():
+ """Display all format tables in a beautiful layout"""
+
+ console.print("\n")
+ console.print(create_quick_reference())
+ console.print("\n")
+ console.print(create_formats_help())
+ console.print("\n")
diff --git a/filewarp/utils/formats_old.py b/filewarp/utils/formats_old.py
new file mode 100644
index 0000000..9ab7aac
--- /dev/null
+++ b/filewarp/utils/formats_old.py
@@ -0,0 +1,168 @@
+# multimedia_cli/formats
+from .colors import fg, bg, rs
+
+
+RESET = rs
+
+SUPPORTED_DOC_FORMATS = f"""
+|---------------------------------------------------------------------------
+|{bg.BBLUE}Input format{RESET} |{bg.BBLUE}Output format{RESET} |
+|________________________________|__________________________________________|
+| xlsx {fg.BYELLOW}-------------------->{RESET}|csv txt doc/docx db(sql) |
+| | |
+| doc/docx{fg.BYELLOW}-------------------->{RESET}|txt pdf ppt/pptx audio(ogg) |
+| | |
+| txt {fg.BYELLOW}-------------------->{RESET}|pdf docx/doc audio(ogg) |
+| | |
+| pdf {fg.BYELLOW}-------------------->{RESET}|doc/docx txt audio(ogg) |
+| | |
+| pptx/ppt{fg.BYELLOW}-------------------->{RESET}|doc/docx |
+| |
+|___________________________________________________________________________|
+"""
+
+
+# Add supported input and output formats for each media type
+SUPPORTED_AUDIO_FORMATS = [
+ "wav", # Waveform Audio File Format
+ "mp3", # MPEG Audio Layer III
+ "ogg",
+ "flv",
+ "ogv",
+ "webm",
+ "aiff",
+ "flac",
+ "m4a",
+ "raw",
+ "bpf",
+ "aac",
+] # Advanced Audio Codec] (Free Lossless Audio Codec)
+
+SUPPORTED_AUDIO_FORMATS_DIRECT = [
+ "mp3",
+ "wav",
+ "raw",
+ "ogg",
+ "aiff",
+ "flac",
+ "flv", # Flash Video
+ "webm",
+ "ogv",
+] # Video
+SUPPORTED_AUDIO_FORMATS_SHOW = f"""
+|==============================|
+| {bg.BBLUE}Supported I/O formats {RESET} |
+|==============================|
+| {fg.CYAN} wav {fg.BYELLOW} |
+| {fg.CYAN} mp3 {fg.BYELLOW} |
+| {fg.CYAN} ogg {fg.BYELLOW} |
+| {fg.CYAN} flv {fg.BYELLOW} |
+| {fg.CYAN} ogv {fg.BYELLOW} |
+| {fg.CYAN} mov {fg.BYELLOW} |
+| {fg.CYAN} webm {fg.BYELLOW} |
+| {fg.CYAN} aac {fg.BYELLOW}-------------->|{bg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+| {fg.CYAN} bpf {fg.BYELLOW}-------------->|{bg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+| {fg.CYAN} m4a {fg.BYELLOW} |
+| {fg.CYAN} raw {fg.BYELLOW} |
+| {fg.CYAN} aiff {fg.BYELLOW} |
+--------------------------------
+
+"""
+
+SUPPORTED_VIDEO_FORMATS = [
+ "MP4", # MPEG-4 part 14 Bitrate - 860kb/s
+ "AVI", # Audio Video Interleave
+ "OGV",
+ "WEBM",
+ "MOV", # QuickTime video Bitrate - 1.01mb/s
+ "MKV", # Matroska video - MKV is known for its support of high-quality content. Bitrate-1.01mb/s
+ "FLV", # Flash video Bitrate
+ "WMV",
+]
+
+
+Video_codecs = {
+ "MP4": "mpeg4",
+ "AVI": "rawvideo",
+ # "OGV": "avc",
+ "WEBM": "libvpx",
+ "MOV": "mpeg4", # QuickTime video
+ "MKV": "mpeg4", # Matroska video
+ "FLV": "flv",
+ # "WMV": "WMV"
+}
+SUPPORTED_VIDEO_FORMATS_SHOW = f"""
+,_______________________________________,
+|x| {bg.BBLUE}Supported I/O formats{RESET} |x|
+|x|-----------------------------------{fg.BYELLOW}|x|
+|x| {fg.BMAGENTA} MP4 {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} AVI {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} OGV {fg.BYELLOW}-------------->|x|{fg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} WEBM{fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} MOV {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} MKV {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} FLV {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} WMV {fg.BYELLOW}-------------->|x|{fg.IMAGENTA}Pending Implementation{RESET}{fg.BYELLOW}
+|,|___________________.BMAGENTA________________|,|{fg.BYELLOW}
+"""
+
+SUPPORTED_IMAGE_FORMATS = {
+ "JPEG": ".jpeg", # Joint Photographic Experts Group -Lossy compression
+ "JPG": ".jpg", # Joint Photographic Experts Group - not lossy
+ "PNG": ".png",
+ "GIF": ".gif", # Graphics Interchange Format
+ "BMP": ".bmp", # Windows BMP image
+ "DIB": ".dib", # Windows BMP image
+ "TIFF": ".tiff", # Tagged Image File Format A flexible and high-quality image format that supports lossless compression
+ "PIC": ".pic",
+ "PDF": ".pdf",
+ "WEBP": ".webp",
+ "EPS": ".eps",
+ "ICNS": ".icns", # MacOS X icon
+ # Waiting Implementation 👇
+ "PSD": ".psd",
+ "SVG": ".svg", # Scalable vector Graphics
+ "EXR": ".exr",
+ "DXF": ".dxf", # Autocad format 2D
+ "PICT": ".pct",
+ "PS": ".ps", # PostSciript
+ "POSTSCRIPT": ".ps",
+}
+
+SUPPORTED_IMAGE_FORMATS_SHOW = f"""
+__________________________________________
+|x|{bg.BBLUE}Supported I/O formats{RESET} |x|
+|x|_____________________________________{fg.BYELLOW}|x|
+|x| {fg.BMAGENTA} JPEG {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} PNG {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} GIF {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} BMP {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} DIB {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} TIFF {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} PIC {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} EXR {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} PDF {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} WebP {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} ICNS {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} PSD {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} SVG {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} EPS {fg.BYELLOW} |x|
+|x| {fg.BMAGENTA} Postscript {fg.FMAGENTA}---------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|x| {fg.BMAGENTA} PICT {fg.FMAGENTA}---------------->|x|{fg.FCYAN} Pending Implementation{RESET}{fg.BYELLOW}
+|_|_____________________________________|x|
+"""
+
+SUPPORTED_DOCUMENT_FORMATS = [
+ "pdf",
+ "doc",
+ "docx",
+ "csv",
+ "xlsx",
+ "xls",
+ "ppt",
+ "pptx",
+ "txt",
+ "ogg",
+ "mp3",
+ "audio",
+]
diff --git a/filewarp/utils/helpmaster.py b/filewarp/utils/helpmaster.py
new file mode 100644
index 0000000..6971426
--- /dev/null
+++ b/filewarp/utils/helpmaster.py
@@ -0,0 +1,25 @@
+from .utils.colors import fg, rs
+
+
+RESET = rs
+
+
+def pdf_combine_help():
+ options = f"""
+ _________________________
+ {fg.BWHITE}|Linear: {fg.YELLOW}AA/BB/AAB/BBA{RESET} |
+ {fg.BWHITE}|Shifted: {fg.YELLOW}AB/BA/ABA/BAB{RESET} |
+ _________________________"""
+
+ helper = f"""\n\t---------------------------------------------------------------------------------------------
+ {fg.BWHITE}|Currently There are 2 supported methods: {fg.FCYAN}Linear and Alternating/shifting.{RESET}\t\t |
+ |-------------------------------------------------------------------------------------------|
+ {fg.BWHITE}|->Linear pages are ordered in form of: {fg.CYAN}File1Page1,...Fil1Pagen{RESET} then {fg.CYAN}File2Page1,...Fil2Pagen{RESET}|\n\t{fg.BWHITE}|File2 is joined at the end of the file1.\t\t\t\t\t\t |
+ |-------------------------------------------------------------------------------------------|
+ {fg.BWHITE}|->Shifting method Picks: {fg.CYAN}File1Page1, File2Page1...File1pagen,File2Pagen{RESET}\t\t |
+ |--------------------------------------------------------------------------------------------"""
+
+ ex = f"""\t_____________________________________________________
+ \t|->{fg.BBLUE}filewarp --pdfjoin file1.pdf file2.pdf --order AAB{RESET}|
+ \t-----------------------------------------------------"""
+ return options, helper, ex
diff --git a/filewarp/utils/logging_utils.py b/filewarp/utils/logging_utils.py
new file mode 100644
index 0000000..4851bce
--- /dev/null
+++ b/filewarp/utils/logging_utils.py
@@ -0,0 +1,76 @@
+"""
+Logging configuration for Filemac.
+"""
+
+import logging
+import sys
+from typing import Optional
+
+
+def setup_logging(
+ level: int = logging.INFO,
+ format_string: Optional[str] = None,
+ log_file: Optional[str] = None,
+) -> logging.Logger:
+ """
+ Setup logging configuration for kcleaner.
+
+ Args:
+ level: Logging level
+ format_string: Custom format string
+ log_file: Optional log file path
+
+ Returns:
+ Configured logger
+ """
+ if format_string is None:
+ format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+ formatter = logging.Formatter(format_string)
+
+ # Root logger
+ logger = logging.getLogger("filewarp")
+ logger.setLevel(level)
+
+ # Clear existing handlers
+ for handler in logger.handlers[:]:
+ logger.removeHandler(handler)
+
+ # Console handler
+ console_handler = logging.StreamHandler(sys.stdout)
+ console_handler.setFormatter(formatter)
+ logger.addHandler(console_handler)
+
+ # File handler if specified
+ if log_file:
+ file_handler = logging.FileHandler(log_file, encoding="utf-8")
+ file_handler.setFormatter(formatter)
+ logger.addHandler(file_handler)
+
+ # Prevent propagation to root logger
+ logger.propagate = False
+
+ return logger
+
+
+class LoggingContext:
+ """Context manager for temporary logging configuration."""
+
+ def __init__(self, level: int = logging.INFO, log_file: Optional[str] = None):
+ self.level = level
+ self.log_file = log_file
+ self.original_level = None
+ self.file_handler = None
+
+ def __enter__(self):
+ self.original_level = logging.getLogger("filewarp").level
+ setup_logging(level=self.level, log_file=self.log_file)
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ setup_logging(level=self.original_level)
+ if self.file_handler:
+ logging.getLogger("filewarp").removeHandler(self.file_handler)
+
+
+logger = setup_logging()
diff --git a/filewarp/utils/screen.py b/filewarp/utils/screen.py
new file mode 100644
index 0000000..3c6d4de
--- /dev/null
+++ b/filewarp/utils/screen.py
@@ -0,0 +1,19 @@
+"""Provides screen actions like clearing screen etc."""
+
+import os
+import ctypes
+
+
+def clear_screen():
+ """
+ Clear the screen using ctypes in windows and os.system('clear') in unix systems
+ """
+ if os.name == "nt": # Windows system
+ ctypes.windll.kernel32.SetConsoleCursorPosition(
+ ctypes.windll.kernel32.GetStdHandle(-11), (0, 0)
+ )
+ ctypes.windll.kernel32.FillConsoleOutputCharacter(
+ ctypes.windll.kernel32.GetStdHandle(-11), b"\x00", 80 * 10, (0, 0)
+ )
+ else: # Unix/Linux/MacOS systems
+ os.system("clear")
diff --git a/filewarp/utils/security/vul_mitigate.py b/filewarp/utils/security/vul_mitigate.py
new file mode 100644
index 0000000..398fb6e
--- /dev/null
+++ b/filewarp/utils/security/vul_mitigate.py
@@ -0,0 +1,177 @@
+import os
+import subprocess
+import sqlite3
+
+# import shlex
+import json
+import tempfile
+import logging
+import html
+import requests
+from dotenv import load_dotenv
+from ...core.exceptions import ValidationError
+
+# from importlib import resources
+from ..colors import fg, rs
+
+RESET = rs
+
+pkg_resources = []
+
+
+class SecurePython:
+ def __init__(self):
+ """Initialize security mitigations."""
+ load_dotenv() # Load environment variables for secret management
+ logging.basicConfig(level=logging.INFO)
+
+ # ✅ 1. Prevent Command Injection
+ def secure_subprocess(self, command_list):
+ """Runs a secure subprocess command using a list format to prevent command injection."""
+ if not isinstance(command_list, list):
+ raise ValidationError("Command must be a list")
+ try:
+ result = subprocess.run(
+ command_list, check=True, capture_output=True, text=True
+ )
+ return result.stdout
+ except subprocess.CalledProcessError as e:
+ logging.error(f"Command failed: {e}")
+ return None
+
+ # ✅ 2. Prevent Path Traversal
+ def safe_filepath(self, base_dir, user_input_path):
+ """Prevents path traversal by restricting access to a safe base directory."""
+ full_path = os.path.abspath(os.path.join(base_dir, user_input_path))
+
+ if not full_path.startswith(os.path.abspath(base_dir)):
+ raise ValueError("Invalid file path: Path traversal attempt detected")
+ print(f"{fg.BBLUE}Return safe path: {fg.BGREEN}{full_path}{RESET}")
+ return full_path
+
+ # ✅ 3. Prevent SQL Injection
+ def safe_sql_query(self, db_path, query, params):
+ """Executes a parameterized SQL query to prevent SQL injection."""
+ conn = sqlite3.connect(db_path)
+ cursor = conn.cursor()
+ try:
+ cursor.execute(query, params)
+ result = cursor.fetchall()
+ conn.commit()
+ return result
+ except sqlite3.Error as e:
+ logging.error(f"SQL error: {e}")
+ return None
+ finally:
+ conn.close()
+
+ # ✅ 4. Secure File Handling
+ def secure_temp_file(self, content):
+ """Creates a secure temporary file to prevent race conditions."""
+ with tempfile.NamedTemporaryFile(delete=True) as temp_file:
+ temp_file.write(content.encode())
+ temp_file.flush()
+ return temp_file.name # Return temp file path for safe use
+
+ # ✅ 5. Secure Secret Management
+ def get_secret(self, key):
+ """Fetches secrets from environment variables."""
+ secret = os.getenv(key)
+ if not secret:
+ logging.warning(f"Secret {key} is missing!")
+ return secret
+
+ # ✅ 6. Prevent Insecure Deserialization
+ def safe_json_load(self, json_string):
+ """Safely loads JSON instead of using pickle to avoid remote code execution."""
+ try:
+ return json.loads(json_string)
+ except json.JSONDecodeError as e:
+ logging.error(f"Invalid JSON: {e}")
+ return None
+
+ # ✅ 7. Prevent XSS Attacks
+ def sanitize_html(self, user_input):
+ """Escapes HTML to prevent XSS attacks."""
+ return html.escape(user_input)
+
+ # ✅ 8. Check Dependency Vulnerabilities
+ def check_dependencies(self):
+ """Checks installed dependencies for known vulnerabilities."""
+ try:
+ installed_packages = {
+ pkg.key: pkg.version for pkg in pkg_resources.working_set
+ }
+ response = requests.get("https://pyup.io/api/v1/safety/")
+ if response.status_code == 200:
+ vulnerable_packages = []
+ for package, version in installed_packages.items():
+ if package in response.json():
+ vulnerable_packages.append(package)
+ if vulnerable_packages:
+ logging.warning(
+ f"Vulnerable dependencies found: {vulnerable_packages}"
+ )
+ else:
+ logging.info("No known vulnerable dependencies detected.")
+ else:
+ logging.warning("Failed to fetch vulnerability database.")
+ except Exception as e:
+ logging.error(f"Error checking dependencies: {e}")
+
+ # ✅ 9. Secure Logging
+ def secure_logging(self, message):
+ """Logs messages securely without sensitive data exposure."""
+ sanitized_message = message.replace("password", "*****").replace(
+ "API_KEY", "*****"
+ )
+ logging.info(sanitized_message)
+
+ # ✅ 10. Run All Security Mitigations
+ def entry_run(self):
+ """Runs all security mitigations where applicable."""
+ logging.info("🔒 Running security mitigations...")
+
+ # Example secure execution
+ self.secure_subprocess(["echo", "Secure Execution"])
+
+ # Example secure file path usage
+ try:
+ safe_path = self.safe_filepath("/safe/directory", "../etc/passwd")
+ logging.info(f"Safe path resolved: {safe_path}")
+ except ValueError as e:
+ logging.error(e)
+
+ # Example secure SQL execution
+ self.safe_sql_query(":memory:", "CREATE TABLE test (id INTEGER, name TEXT)", ())
+ self.safe_sql_query(
+ ":memory:", "INSERT INTO test (id, name) VALUES (?, ?)", (1, "John Doe")
+ )
+
+ # Example secure file handling
+ temp_file = self.secure_temp_file("Secure data")
+ logging.info(f"Created secure temp file at {temp_file}")
+
+ # Example secret fetching
+ self.get_secret("API_KEY")
+
+ # Example safe JSON parsing
+ self.safe_json_load('{"key": "value"}')
+
+ # Example HTML sanitization
+ sanitized_html = self.sanitize_html("")
+ logging.info(f"Sanitized HTML: {sanitized_html}")
+
+ # Example dependency check
+ self.check_dependencies()
+
+ # Example secure logging
+ self.secure_logging("User attempted login with password: mypassword")
+
+ logging.info("✅ All security mitigations executed successfully!")
+
+
+# === Run SecurePython Class ===
+if __name__ == "__main__":
+ sp = SecurePython()
+ sp.entry_run()
diff --git a/filewarp/utils/simple.py b/filewarp/utils/simple.py
new file mode 100644
index 0000000..08ffceb
--- /dev/null
+++ b/filewarp/utils/simple.py
@@ -0,0 +1,8 @@
+import logging
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(levelname)s - %(message)s",
+)
+logger = logging.getLogger("filewarp")
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..5422189
--- /dev/null
+++ b/index.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+ FileMAC - Multimedia File Operation Kit
+
+
+
+
+
+
+
+
FileMAC
+
+ A Comprehensive Multimedia File Operation Kit
+
+
+
+
+
+
+
+
+
Introduction
+
+ FileMAC is a Python-based command-line interface (CLI) utility
+ designed for efficient file conversion, manipulation, and analysis. It
+ supports various multimedia operations, including document conversion,
+ file analysis, and text-to-speech conversion using Google's
+ Text-to-Speech (gTTS) library.
+
+
+
+
+
+
Features
+
+
Convert documents between various formats.
+
Analyze and manipulate multimedia files.
+
Generate audio files from text using gTTS.
+
+ Command-line interface for easy integration into scripts and
+ workflows.
+
+
Supports Linux operating systems.
+
+ Encapsulates reputable multimedia elements for robust performance.
+
+
+
+
+
+
+
Installation
+
Install FileMAC using pip:
+
pip install filewarp
+
+ Alternatively, install directly from the GitHub repository:
+