FerTeo · Rosca-Teodora · May 24, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -2,14 +2,19 @@ name: CI Pipeline
 
 on:
   push:
-    branches:
-      - master
+    branches: [master, main, develop]
   pull_request:
-    branches:
-      - master
+    branches: [master, main, develop]
+
+# Cancel in-flight runs for the same branch on new pushes
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
 
 jobs:
-  build-and-test:
+  # ─── Job 1: Lint ────────────────────────────────────────────────────────────
+  lint:
+    name: Lint (Ruff)
     runs-on: ubuntu-latest
 
     steps:
@@ -22,17 +27,106 @@ jobs:
           python-version: "3.13"
           cache: "pip"
 
-      - name: Install dependencies
+      - name: Install Ruff
+        run: pip install ruff
+
+      - name: Run Ruff check
+        run: ruff check --output-format=github .
+
+      - name: Run Ruff format check
+        run: ruff format --check .
+
+  # ─── Job 2: Unit & Integration Tests ────────────────────────────────────────
+  test:
+    name: Tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    needs: lint
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12", "3.13"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      # PyQt6 needs a display; Xvfb provides a virtual one
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y --no-install-recommends \
+            tesseract-ocr \
+            libgl1 \
+            xvfb \
+            libxkbcommon-x11-0 \
+            libxcb-icccm4 \
+            libxcb-image0 \
+            libxcb-keysyms1 \
+            libxcb-randr0 \
+            libxcb-render-util0 \
+            libxcb-xinerama0 \
+            libxcb-xfixes0
+
+      - name: Install Python dependencies
         run: |
           python -m pip install --upgrade pip
-          # Instalăm dependențele din requirements.txt dacă există
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-          # Adăugăm extensia pentru adnotări pytest pe GitHub
-          pip install ruff pytest pytest-github-actions-annotate-failures
+          pip install -r requirements.txt
+          pip install pytest pytest-github-actions-annotate-failures pytest-cov
 
-      - name: Run Ruff Check
-        run: ruff check --output-format=github .
+      # Create a minimal .env so the app doesn't crash on import
+      - name: Set up environment variables
+        run: |
+          cp .env.example .env
+          # Override to use a dummy Google key (Ollama not available in CI)
+          sed -i 's/AI_PROVIDER=ollama/AI_PROVIDER=google/' .env
+          sed -i 's/GOOGLE_API_KEY=your_google_api_key_here/GOOGLE_API_KEY=ci-dummy-key/' .env
+        env:
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+
+      - name: Run unit tests (core + ai models — no live AI calls)
+        run: |
+          xvfb-run --auto-servernum --server-args="-screen 0 1920x1080x24" \
+            pytest tests/ \
+              --ignore=tests/evals \
+              --ignore=tests/test_agent_decider.py \
+              -v \
+              --tb=short \
+              --cov=core \
+              --cov=ai \
+              --cov-report=term-missing \
+              --cov-report=xml:coverage.xml
+        env:
+          AI_PROVIDER: google
+          GOOGLE_API_KEY: ci-dummy-key
+          PYTHONDONTWRITEBYTECODE: "1"
+          QT_QPA_PLATFORM: offscreen
+
+      - name: Upload coverage report
+        if: matrix.python-version == '3.13'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: coverage.xml
+          retention-days: 7
 
-      - name: Run tests with Pytest
-        if: always() # Rulează testele chiar dacă Ruff a picat, ca să vezi TOATE erorile
-        run: pytest tests/
+  # ─── Job 3: Summary gate (required by branch protection rule) ───────────────
+  build-and-test:
+    name: build-and-test
+    runs-on: ubuntu-latest
+    needs: [lint, test]
+    if: always()
+    steps:
+      - name: Check all jobs passed
+        run: |
+          if [[ "${{ needs.lint.result }}" != "success" || "${{ needs.test.result }}" != "success" ]]; then
+            echo "One or more required jobs failed."
+            exit 1
+          fi
+          echo "All checks passed."
diff --git a/Quarantine/Saptamana2LaboratorMDS.pdf b/Quarantine/Saptamana2LaboratorMDS.pdf
diff --git a/ai/agent_compiler.py b/ai/agent_compiler.py
@@ -48,21 +48,21 @@ class CompiledRule(BaseModel):
 You are an expert rule translator for the ClutterKill system.
 Your job is to translate a user's natural language instruction about where and how to save files into a structured JSON rule.
 
+{format_instructions}
+
 User instruction: "{user_prompt}"
 
 Extract the category, folder structure, and naming convention.
 If the naming convention is not explicitly stated, use a default placeholder like "{{original_filename}}" or infer a sensible one if the context implies it.
 
-IMPORTANT: You must return ONLY the raw JSON object containing the actual values. Do NOT return a JSON schema, and do NOT wrap your answer in "properties".
+CRITICAL: You must return ONLY the raw JSON object containing the ACTUAL values based on the user instruction. Do NOT return a JSON schema. Do NOT return properties definitions. DO NOT echo back the format instructions.
 
-Example of valid output:
+Example of expected valid output:
 {{
   "category": "factura",
   "folder_structure": "Facturi",
   "naming_convention": "factura_data.pdf"
 }}
-
-{format_instructions}
 """
 
 

diff --git a/ai/agent_decider.py b/ai/agent_decider.py
@@ -60,6 +60,8 @@ def sanitize_filename(cls, v: str) -> str:
 You are an expert decision-making agent for the ClutterKill system.
 Your job is to analyze a document summary and a set of organization rules, and decide if the document should be moved to the correct folder or placed in quarantine.
 
+{format_instructions}
+
 Rule Category: {rule_category}
 Target Folder: {rule_folder}
 Naming Convention: {rule_naming}
@@ -82,9 +84,7 @@ def sanitize_filename(cls, v: str) -> str:
 5. CRITICAL: Do NOT include spaces in the filename. Use underscores (_) instead.
 6. If the status is "quarantine", the folder must be "Quarantine".
 
-IMPORTANT: You must return ONLY the raw JSON object containing the actual values. Do NOT return a JSON schema, and do NOT wrap your answer in markdown fences (like ```json).
-
-{format_instructions}
+CRITICAL: You must return ONLY the raw JSON object containing the ACTUAL values based on your decision. Do NOT return a JSON schema. Do NOT return properties definitions. DO NOT echo back the format instructions.
 """
 
 _REPAIR_PROMPT = ChatPromptTemplate.from_messages(
@@ -215,7 +215,7 @@ def decide(
     test_filename = "doc_scanned_123.pdf"
 
     print(f"\n{'=' * 60}")
-    print("TEST 1: Sanitizare și Retry")
+    print("TEST 1: Sanitizare si Retry")
     try:
         decision1 = agent.decide(test_summary_match, test_filename, test_rule)
         print("Output JSON (observă cum / a fost înlocuit):")

diff --git a/ai/tools.py b/ai/tools.py
@@ -2,7 +2,7 @@
 Extraction Tools — ai/tools.py
 
 Acest modul conține funcții utilitare pentru extragerea textului din
-diferite tipuri de fișiere (PDF, imagini), folosite ulterior de către
+diferite tipuri de fișiere (PDF, imagini, Word), folosite ulterior de către
 agenții AI pentru procesare.
 """
 
@@ -12,6 +12,7 @@
 from pathlib import Path
 from typing import Union
 
+import docx  # python-docx
 import fitz  # PyMuPDF
 import pytesseract
 from PIL import Image
@@ -91,3 +92,29 @@ def extract_text_from_image(path: Union[str, Path]) -> str:
     except Exception as e:
         logger.error(f"Eroare la extragerea textului din imagine ({file_path}): {e}")
         return ""
+
+
+def extract_text_from_docx(path: Union[str, Path]) -> str:
+    """
+    Extrage textul dintr-un fișier Word (.docx) folosind python-docx.
+
+    Parcurge toate paragrafele documentului și le concatenează cu newline.
+
+    Args:
+        path: Calea către fișierul .docx.
+
+    Returns:
+        Textul extras din document ca string. Returnează un string gol în caz de eroare.
+    """
+    file_path = Path(path)
+    if not file_path.exists():
+        logger.error(f"Fișierul Word nu a fost găsit: {file_path}")
+        return ""
+
+    try:
+        doc = docx.Document(str(file_path))
+        paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
+        return "\n".join(paragraphs).strip()
+    except Exception as e:
+        logger.error(f"Eroare la extragerea textului din Word ({file_path}): {e}")
+        return ""
diff --git a/core/quarantine_db.py b/core/quarantine_db.py
@@ -179,9 +179,7 @@ def remove(self, record_id: int) -> bool:
         """
         conn = self._get_connection()
         try:
-            cursor = conn.execute(
-                "DELETE FROM quarantine WHERE id = ?", (record_id,)
-            )
+            cursor = conn.execute("DELETE FROM quarantine WHERE id = ?", (record_id,))
             conn.commit()
             return cursor.rowcount > 0
         finally:

diff --git a/core/scan_worker.py b/core/scan_worker.py
@@ -6,7 +6,11 @@
 from ai.agent_compiler import CompilerAgent
 from ai.agent_extractor import ExtractorAgent
 from ai.agent_decider import DeciderAgent
-from ai.tools import extract_text_from_pdf, extract_text_from_image
+from ai.tools import (
+    extract_text_from_pdf,
+    extract_text_from_image,
+    extract_text_from_docx,
+)
 from core.file_manager import move_and_rename_file
 from core.quarantine_db import quarantine_db
 
@@ -94,6 +98,8 @@ def run(self):
                         text = extract_text_from_pdf(file_path)
                     elif ext in [".png", ".jpg", ".jpeg", ".bmp", ".tiff"]:
                         text = extract_text_from_image(file_path)
+                    elif ext == ".docx":
+                        text = extract_text_from_docx(file_path)
                     elif ext in [".txt", ".csv", ".md"]:
                         text = file_path.read_text(errors="ignore")
                     else:

diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -0,0 +1,13 @@
+{
+  "pythonVersion": "3.13",
+  "pythonPlatform": "Windows",
+  "pythonPath": "C:\\Python313\\python.exe",
+  "venvPath": ".",
+  "include": [
+    "."
+  ],
+  "extraPaths": [
+    "."
+  ],
+  "typeCheckingMode": "basic"
+}