FerTeo · fernandodonea · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/.env.example b/.env.example
@@ -5,5 +5,8 @@ AI_PROVIDER=ollama
 # Daca AI_PROVIDER este 'google', introdu aici cheia ta secreta de API
 GOOGLE_API_KEY=your_google_api_key_here
 
+# Modelul implicit daca folosesti google
+GOOGLE_MODEL_NAME=gemini-2.5-flash
+
 # URL-ul de baza pentru containerul local de ollama
-OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_BASE_URL=http://localhost:11434
diff --git a/README.md b/README.md
@@ -139,24 +139,48 @@ To ensure consistent code quality and formatting, this project is configured to
 
 Once installed, `ruff` will automatically format your code on `git commit`. If changes are made by the formatter, the commit will abort—simply `git add` the updated files and run `git commit` again.
 
-## 🐳 Docker & Local AI Setup
+## 🐳 Environment & Local AI Setup
 
 The application leverages Docker to seamlessly run local AI models without complicating the host system.
 
-### Setup Instructions:
-1. Start the Docker containers:
+### Step-by-Step Setup Instructions:
+
+1. **Configure Environment Variables**:
    ```bash
-   docker-compose up -d
+   cp .env.example .env
    ```
-2. Create the custom AI model (Gemma 2 based) configured for precision:
+
+2. **Start the Docker container** (for Ollama):
+   ```bash
+   docker-compose up -d ollama
+   ```
+
+3. **Pull base model and Create Custom AI Models**:
+   ClutterKill uses two distinct models for processing (Classifier and Extractor):
    ```bash
+   # Pull the base model (Wait for the download to finish)
+   docker exec -it clutterkill_ollama ollama pull gemma2:2b
+   # Note: If you get a 'manifest does not exist' error on older machines, use 'gemma:2b' instead and update the Modelfiles.
+
+   # Create Agent 0 & 2 (Classifier)
    docker exec -it clutterkill_ollama ollama create ck-model -f /app/ai/Modelfile
+
+   # Create Agent 1 (Extractor)
+   docker exec -it clutterkill_ollama ollama create ck-extractor -f /app/ai/Modelfile.extractor
    ```
-3. Verify the model is running:
+
+4. **Verify the models are running**:
    ```bash
    curl http://localhost:11434/api/tags
    ```
 
-*Note: The `docker-compose.yml` configuration also provides environment variables (`AI_PROVIDER`, `GOOGLE_API_KEY`) to easily switch between local `ollama` processing and cloud-based alternatives like `google`.*
+5. **Run the Application**:
+   Activate your virtual environment and run the graphical interface:
+   ```bash
+   source .venv/bin/activate
+   python main.py
+   ```
+
+*Note: The project configuration also provides environment variables (`AI_PROVIDER`, `GOOGLE_API_KEY`) to easily switch between local `ollama` processing and cloud-based alternatives like `google`.*
 
 For more detailed DevOps and QA instructions, please refer to [README_ingineri.md](README_ingineri.md).
diff --git a/ai/agent_decider.py b/ai/agent_decider.py
@@ -72,9 +72,15 @@ def sanitize_filename(cls, v: str) -> str:
 Instructions:
 1. If the Document Summary MATCHES the Rule Category, your status must be "move".
 2. If it DOES NOT match, or if you are unsure, your status must be "quarantine".
-3. Calculate the new filename based on the Naming Convention. If the naming convention includes {{original_filename}}, replace it with the actual original filename.
-4. If the status is "quarantine", the folder must be "Quarantine".
-5. If the status is "quarantine", the suggested_name MUST be exactly the Original Filename.
+3. Build the new filename using the Naming Convention as a TEMPLATE:
+   - The Naming Convention may contain camelCase or descriptive placeholder words like "abreviereaMateriei", "NumarulCursului", "Data", "Emitent", "Suma", etc.
+   - YOU MUST extract the actual values from the Document Summary and substitute them into each placeholder.
+   - Example: If Naming Convention is "abreviereaMateriei_Curs_NumarulCursului_Data" and the document is about "Algoritmi Avansati, Cursul 4, 01.01.2026", the result must be "AlgoritmiAvansati_Curs_4_01012026".
+   - If a placeholder value cannot be determined from the Document Summary, use a sensible short abbreviation (e.g. "Unknown").
+   - If the Naming Convention is literally "{{original_filename}}", keep the original filename unchanged.
+4. CRITICAL: The new filename MUST keep the exact same file extension as the Original Filename (e.g. .pdf, .docx).
+5. CRITICAL: Do NOT include spaces in the filename. Use underscores (_) instead.
+6. If the status is "quarantine", the folder must be "Quarantine".
 
 IMPORTANT: You must return ONLY the raw JSON object containing the actual values. Do NOT return a JSON schema, and do NOT wrap your answer in markdown fences (like ```json).
 

diff --git a/ai/llm_config.py b/ai/llm_config.py
@@ -30,16 +30,24 @@
 
 import logging
 import os
+from dotenv import load_dotenv
 
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
 
+# Încarcă variabilele de mediu din fișierul .env, dacă există
+load_dotenv()
+
 logger = logging.getLogger(__name__)
 
 # ─── Defaults (match docker-compose.yml & .env.example) ─────────────
 _DEFAULT_PROVIDER = "ollama"
 _DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434"
 _DEFAULT_REQUEST_TIMEOUT = 120.0  # seconds — OCR docs can be big
 
+_DEFAULT_GOOGLE_MODEL = "gemini-2.0-flash"
+
 # Model registry — one entry per Modelfile
 MODEL_CLASSIFIER = "ck-model"  # ai/Modelfile
 MODEL_EXTRACTOR = "ck-extractor"  # ai/Modelfile.extractor
@@ -56,35 +64,48 @@ def get_llm(
     temperature: float | None = None,
     num_ctx: int | None = None,
     timeout: float | None = None,
-) -> ChatOllama:
-    """Return a ChatOllama instance for the requested model.
+) -> BaseChatModel:
+    """Return a chat model instance based on chosen AI_PROVIDER.
 
     Parameters
     ----------
     model : str
-        Ollama model name. Use the module constants:
-        ``MODEL_CLASSIFIER`` (default) or ``MODEL_EXTRACTOR``.
+        Ollama model name or placeholder. For Google, uses the
+        defined DEFAULT_GOOGLE_MODEL override.
     temperature : float, optional
-        Override sampling temperature (model default: 0.1).
+        Override sampling temperature (default: 0.1).
     num_ctx : int, optional
-        Override context-window size.
+        Override context-window size (Ollama only).
     timeout : float, optional
         Override HTTP request timeout (default: 120 s).
 
     Returns
     -------
-    ChatOllama
+    BaseChatModel
         A LangChain chat-model instance ready for ``.invoke()`` /
         ``.ainvoke()`` / agent binding.
     """
     provider = os.getenv("AI_PROVIDER", _DEFAULT_PROVIDER).lower()
 
     if provider == "google":
-        # Future: return ChatGoogleGenerativeAI(...)
-        raise NotImplementedError(
-            "Google AI provider is not yet implemented. "
-            "Set AI_PROVIDER=ollama or leave unset."
+        api_key = os.getenv("GOOGLE_API_KEY")
+        if not api_key:
+            raise ValueError("GOOGLE_API_KEY is required when AI_PROVIDER='google'")
+
+        google_model = os.getenv("GOOGLE_MODEL_NAME", _DEFAULT_GOOGLE_MODEL)
+        temp = temperature if temperature is not None else 0.1
+
+        logger.info(
+            "Initializing ChatGoogleGenerativeAI  model=%s",
+            google_model,
+        )
+
+        g_llm = ChatGoogleGenerativeAI(
+            model=google_model,
+            temperature=temp,
+            google_api_key=api_key,
         )
+        return g_llm
 
     if provider != "ollama":
         raise ValueError(

diff --git a/core/scan_worker.py b/core/scan_worker.py
@@ -0,0 +1,166 @@
+import logging
+from pathlib import Path
+
+from PyQt6.QtCore import QThread, pyqtSignal
+
+from ai.agent_compiler import CompilerAgent
+from ai.agent_extractor import ExtractorAgent
+from ai.agent_decider import DeciderAgent
+from ai.tools import extract_text_from_pdf, extract_text_from_image
+from core.quarantine_db import quarantine_db
+
+logger = logging.getLogger(__name__)
+
+
+class ScanWorker(QThread):
+    """
+    Thread real de scanare care folosește pipeline-ul de agenți AI:
+    1. Agent 0 (Compiler) transformă regula naturală.
+    2. Agent 1 (Extractor) citește fișierul și scoate un rezumat tehnic.
+    3. Agent 2 (Decider) aplică regula pe rezumat pentru o decizie de rutare.
+    4. Adaugă fiecare fișier în quarantine_db cu recomandările AI.
+    """
+
+    progress_updated = pyqtSignal(int)
+    log_updated = pyqtSignal(str)
+    scan_finished = pyqtSignal(int)
+
+    def __init__(self, source_dir: str, dest_dir: str, user_rule: str):
+        super().__init__()
+        self.source_dir = Path(source_dir)
+        self.dest_dir = Path(dest_dir)
+        self.user_rule = user_rule
+
+    def run(self):
+        # 1. Inițializăm agenții
+        self.log_updated.emit("🤖 Se încarcă agenții AI...")
+        try:
+            compiler = CompilerAgent()
+            extractor = ExtractorAgent()
+            decider = DeciderAgent()
+        except Exception as e:
+            self.log_updated.emit(f"❌ Eroare la inițializarea agenților: {e}")
+            self.scan_finished.emit(0)
+            return
+
+        # 2. Compilăm regula
+        if not self.user_rule.strip():
+            self.log_updated.emit("⚠️ Regula nu a fost completată!")
+            self.scan_finished.emit(0)
+            return
+
+        self.log_updated.emit(f"🧠 Compilare regulă: '{self.user_rule}'")
+        try:
+            compiled_rule = compiler.compile(self.user_rule)
+            self.log_updated.emit(
+                f"✅ Regulă compilată:\n{compiled_rule.model_dump_json(indent=2)}"
+            )
+        except Exception as e:
+            self.log_updated.emit(f"❌ Eroare la compilarea regulii: {e}")
+            self.scan_finished.emit(0)
+            return
+
+        # 3. Preluăm fișierele din sursă
+        files = [f for f in self.source_dir.rglob("*") if f.is_file()]
+        total = len(files)
+
+        if total == 0:
+            self.log_updated.emit("⚠️ Niciun fișier găsit în folderul sursă.")
+            self.scan_finished.emit(0)
+            return
+
+        self.log_updated.emit(f"🔍 {total} fișiere găsite. Se începe scanarea cu AI...")
+
+        added_count = 0
+        skipped_count = 0
+
+        existing_paths = {r["original_path"] for r in quarantine_db.get_all()}
+
+        for i, file_path in enumerate(files):
+            str_path = str(file_path)
+
+            if str_path in existing_paths:
+                skipped_count += 1
+                self.log_updated.emit(f"⏭️ {file_path.name} — deja în carantină, skip")
+            else:
+                self.log_updated.emit(f"📄 Procesare: {file_path.name}...")
+
+                # a. Extragere text
+                text = ""
+                ext = file_path.suffix.lower()
+                try:
+                    if ext == ".pdf":
+                        text = extract_text_from_pdf(file_path)
+                    elif ext in [".png", ".jpg", ".jpeg", ".bmp", ".tiff"]:
+                        text = extract_text_from_image(file_path)
+                    elif ext in [".txt", ".csv", ".md"]:
+                        text = file_path.read_text(errors="ignore")
+                    else:
+                        text = f"Fișier de tip necunoscut ({ext}). Conținut text nedisponibil."
+                except Exception as e:
+                    logger.warning(
+                        f"Eroare extragere text pentru {file_path.name}: {e}"
+                    )
+                    text = f"Eroare extracție: {e}"
+
+                # b. Agent 1 (Extragere)
+                try:
+                    extraction_result = extractor.extract(
+                        text or "Conținut gol sau necitibil"
+                    )
+                    summary = extraction_result.get_technical_summary()
+                except Exception as e:
+                    logger.error(f"Eroare ExtractorAgent: {e}")
+                    summary = f"Eroare procesare text: {e}"
+
+                # c. Agent 2 (Decizie)
+                try:
+                    decision = decider.decide(summary, file_path.name, compiled_rule)
+
+                    if decision.status == "move":
+                        proposed_folder = str(self.dest_dir / decision.suggested_folder)
+
+                        # Mutăm și redenumim fișierul fizic imediat
+                        try:
+                            from core.file_manager import move_and_rename_file
+
+                            move_and_rename_file(
+                                str_path, proposed_folder, decision.suggested_name
+                            )
+                            added_count += 1
+                            self.log_updated.emit(
+                                f"  ↳ MOVE: Mutat și redenumit cu succes în -> {proposed_folder}/{decision.suggested_name}"
+                            )
+                        except Exception as e:
+                            logger.error(
+                                f"Eroare la mutarea fișierului {file_path.name}: {e}"
+                            )
+                            self.log_updated.emit(f"  ↳ ❌ Eroare la mutare: {e}")
+
+                    else:
+                        proposed_folder = "Quarantine"
+
+                        # Adăugăm în carantină pentru intervenție manuală
+                        quarantine_db.add(
+                            original_path=str_path,
+                            ai_proposed_name=decision.suggested_name,
+                            ai_proposed_folder=proposed_folder,
+                            reason=f"Decizie AI ({decision.status}) bazată pe: {summary[:100]}...",
+                        )
+                        added_count += 1
+                        self.log_updated.emit(
+                            f"  ↳ QUARANTINE: Trimis în carantină. Nume sugerat: {decision.suggested_name}"
+                        )
+                except Exception as e:
+                    logger.error(f"Eroare DeciderAgent: {e}")
+                    self.log_updated.emit(f"  ↳ ❌ Eroare la luarea deciziei: {e}")
+
+            # Actualizăm progresul
+            progress = int((i + 1) / total * 100)
+            self.progress_updated.emit(progress)
+
+        self.log_updated.emit(
+            f"\n✅ Scanare AI completă! {added_count} fișiere noi trimise spre review, "
+            f"{skipped_count} existente ignorate."
+        )
+        self.scan_finished.emit(added_count)
diff --git a/requirements.txt b/requirements.txt
@@ -11,3 +11,5 @@ ruff
 fpdf
 python-docx
 Pillow
+langchain-google-genai
+python-dotenv