FerTeo · dariabulacu · May 24, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/.env.example b/.env.example
@@ -1,12 +1,13 @@
-# Model Configuration
-# Poate fi 'ollama' sau 'google'
+# ─── AI Provider ──────────────────────────────────────────────────────────────
+# Poate fi 'ollama' (local, gratuit) sau 'google' (cloud, necesita API key)
 AI_PROVIDER=ollama
 
-# Daca AI_PROVIDER este 'google', introdu aici cheia ta secreta de API
+# ─── Google Gemini (folosit doar cand AI_PROVIDER=google) ─────────────────────
 GOOGLE_API_KEY=your_google_api_key_here
+GOOGLE_MODEL_NAME=gemini-2.0-flash
 
-# Modelul implicit daca folosesti google
-GOOGLE_MODEL_NAME=gemini-2.5-flash
-
-# URL-ul de baza pentru containerul local de ollama
+# ─── Ollama (folosit cand AI_PROVIDER=ollama) ─────────────────────────────────
+# Pentru rulare locala (python main.py direct):
 OLLAMA_BASE_URL=http://localhost:11434
+# Pentru rulare din interiorul Docker (docker-compose run --rm app pytest):
+# OLLAMA_BASE_URL=http://ollama:11434
diff --git a/ai/agent_extractor.py b/ai/agent_extractor.py
@@ -26,7 +26,6 @@
 import logging
 from typing import Any
 
-
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from pydantic import BaseModel, Field, ValidationError

diff --git a/ai/tools.py b/ai/tools.py
@@ -7,6 +7,8 @@
 """
 
 import logging
+import platform
+import shutil
 from pathlib import Path
 from typing import Union
 
@@ -16,6 +18,20 @@
 
 logger = logging.getLogger(__name__)
 
+# Detectare automată cale Tesseract pe Windows
+if platform.system() == "Windows":
+    _win_paths = [
+        r"C:\Program Files\Tesseract-OCR\tesseract.exe",
+        r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
+        r"C:\Users\User\AppData\Local\Programs\Tesseract-OCR\tesseract.exe",
+    ]
+    _tesseract_found = shutil.which("tesseract")
+    if not _tesseract_found:
+        for _p in _win_paths:
+            if Path(_p).exists():
+                pytesseract.pytesseract.tesseract_cmd = _p
+                break
+
 
 def extract_text_from_pdf(path: Union[str, Path], max_pages: int = 10) -> str:
     """

diff --git a/core/scan_worker.py b/core/scan_worker.py
@@ -7,6 +7,7 @@
 from ai.agent_extractor import ExtractorAgent
 from ai.agent_decider import DeciderAgent
 from ai.tools import extract_text_from_pdf, extract_text_from_image
+from core.file_manager import move_and_rename_file
 from core.quarantine_db import quarantine_db
 
 logger = logging.getLogger(__name__)
@@ -122,8 +123,6 @@ def run(self):
 
                         # Mutăm și redenumim fișierul fizic imediat
                         try:
-                            from core.file_manager import move_and_rename_file
-
                             move_and_rename_file(
                                 str_path, proposed_folder, decision.suggested_name
                             )

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -10,18 +10,49 @@ services:
       - ollama_data:/root/.ollama
       - ./ai:/app/ai
     restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "ollama", "list"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+      start_period: 30s
+
+  ollama-setup:
+    image: ollama/ollama
+    container_name: clutterkill_ollama_setup
+    depends_on:
+      ollama:
+        condition: service_healthy
+    volumes:
+      - ollama_data:/root/.ollama
+      - ./ai:/app/ai
+    environment:
+      - OLLAMA_HOST=http://ollama:11434
+    entrypoint: >
+      sh -c "
+        echo '=== ClutterKill: Initializing AI models ===' &&
+        ollama pull gemma2:2b &&
+        ollama create ck-model -f /app/ai/Modelfile &&
+        ollama create ck-extractor -f /app/ai/Modelfile.extractor &&
+        echo '=== All models ready ==='
+      "
+    restart: "no"
 
   app:
     build: .
     container_name: clutterkill_app
     depends_on:
-      - ollama
+      ollama:
+        condition: service_healthy
     volumes:
       - .:/app
     working_dir: /app
     env_file:
       - .env
-    command: tail -f /dev/null # Keep-alive container pentru rulare scripturi/teste
+    environment:
+      # Suprascrie URL-ul din .env cu hostname-ul intern Docker
+      - OLLAMA_BASE_URL=http://ollama:11434
+    command: tail -f /dev/null
 
 volumes:
   ollama_data:
diff --git a/requirements.txt b/requirements.txt
@@ -3,13 +3,13 @@ langchain
 langchain-community
 langchain-ollama
 langchain-core
+langchain-google-genai
 PyMuPDF
 pytesseract
 pydantic
 pytest
 ruff
-fpdf
+fpdf2
 python-docx
 Pillow
-langchain-google-genai
 python-dotenv
diff --git a/scripts/create_test_pdf.py b/scripts/create_test_pdf.py
@@ -1,26 +1,24 @@
-from fpdf import FPDF
 import os
 
+from fpdf import FPDF
+
 
 def create_fake_pdf():
-    # Ne asigurăm că există directorul (în caz că vrem să organizăm mai târziu)
     os.makedirs("test_data/source", exist_ok=True)
 
     pdf = FPDF()
     pdf.add_page()
-
-    # FPDF default include doar câteva fonturi. Arial e un alias pentru Helvetica.
     pdf.set_font("Helvetica", size=12)
 
-    pdf.cell(200, 10, txt="Universitatea X - Curs MDS", align="C")
+    pdf.cell(200, 10, text="Universitatea X - Curs MDS", align="C")
     pdf.ln(10)
-    pdf.cell(200, 10, txt="Semestrul 2 - Note de Curs", align="C")
+    pdf.cell(200, 10, text="Semestrul 2 - Note de Curs", align="C")
     pdf.ln(10)
-    pdf.cell(200, 10, txt="Acesta este un document generat automat pentru testare.")
+    pdf.cell(200, 10, text="Acesta este un document generat automat pentru testare.")
 
-    file_path = "Curs_MDS_Sem2.pdf"
+    file_path = os.path.join("test_data", "source", "Curs_MDS_Sem2.pdf")
     pdf.output(file_path)
-    print(f"✅ Fișierul PDF de test a fost creat cu succes: {file_path}")
+    print(f"Fisierul PDF de test a fost creat cu succes: {file_path}")
 
 
 if __name__ == "__main__":

diff --git a/setup.bat b/setup.bat
@@ -0,0 +1,73 @@
+@echo off
+REM ClutterKill — First-time setup script (Windows)
+setlocal enabledelayedexpansion
+
+echo === ClutterKill Setup (Windows) ===
+
+REM 1. Python virtualenv
+if not exist ".venv\" (
+    echo [1/5] Creating virtual environment...
+    python -m venv .venv
+)
+call .venv\Scripts\activate.bat
+
+REM 2. Dependencies
+echo [2/5] Installing Python dependencies...
+pip install --upgrade pip -q
+pip install -r requirements.txt -q
+
+REM 3. .env
+if not exist ".env" (
+    echo [3/5] Creating .env from template...
+    copy .env.example .env
+) else (
+    echo [3/5] .env already exists -- skipping.
+)
+
+REM 4. Ollama models
+echo [4/5] Setting up Ollama models...
+where ollama >nul 2>&1
+if %ERRORLEVEL% == 0 (
+    ollama pull gemma2:2b
+    ollama create ck-model -f ai\Modelfile
+    ollama create ck-extractor -f ai\Modelfile.extractor
+    echo   Models created locally.
+) else (
+    echo   Ollama not found. Trying Docker...
+    where docker >nul 2>&1
+    if %ERRORLEVEL% == 0 (
+        docker-compose up -d ollama
+        echo   Waiting for Ollama to start...
+        timeout /t 20 /nobreak >nul
+        docker exec clutterkill_ollama ollama pull gemma2:2b
+        docker exec clutterkill_ollama ollama create ck-model -f /app/ai/Modelfile
+        docker exec clutterkill_ollama ollama create ck-extractor -f /app/ai/Modelfile.extractor
+        echo   Models created inside Docker container.
+    ) else (
+        echo   ERROR: Neither Ollama nor Docker is available.
+        echo   Install Ollama from https://ollama.com
+        echo   Or Docker Desktop from https://docker.com
+        pause
+        exit /b 1
+    )
+)
+
+REM 5. Tesseract check
+echo [5/5] Checking Tesseract OCR...
+where tesseract >nul 2>&1
+if %ERRORLEVEL% == 0 (
+    echo   Tesseract found.
+) else (
+    if exist "C:\Program Files\Tesseract-OCR\tesseract.exe" (
+        echo   Tesseract found at default path.
+    ) else (
+        echo   Tesseract not found (OCR on images will be disabled).
+        echo   Download from: https://github.com/UB-Mannheim/tesseract/wiki
+    )
+)
+
+echo.
+echo === Setup complete! Run the app with: ===
+echo   .venv\Scripts\activate
+echo   python main.py
+pause
diff --git a/setup.sh b/setup.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+# ClutterKill — First-time setup script (Linux / macOS)
+set -e
+
+echo "=== ClutterKill Setup ==="
+
+# 1. Python virtualenv
+if [ ! -d ".venv" ]; then
+  echo "[1/5] Creating virtual environment..."
+  python3 -m venv .venv
+fi
+source .venv/bin/activate
+
+# 2. Dependencies
+echo "[2/5] Installing Python dependencies..."
+pip install --upgrade pip -q
+pip install -r requirements.txt -q
+
+# 3. .env
+if [ ! -f ".env" ]; then
+  echo "[3/5] Creating .env from template..."
+  cp .env.example .env
+else
+  echo "[3/5] .env already exists — skipping."
+fi
+
+# 4. Ollama models
+echo "[4/5] Setting up Ollama models..."
+if ! command -v ollama &>/dev/null; then
+  echo "  Ollama not found. Trying Docker..."
+  if command -v docker &>/dev/null && docker info &>/dev/null; then
+    docker-compose up -d ollama
+    echo "  Waiting for Ollama to start..."
+    sleep 15
+    docker exec clutterkill_ollama ollama pull gemma2:2b
+    docker exec clutterkill_ollama ollama create ck-model -f /app/ai/Modelfile
+    docker exec clutterkill_ollama ollama create ck-extractor -f /app/ai/Modelfile.extractor
+    echo "  Models created inside Docker container."
+  else
+    echo "  ERROR: Neither Ollama nor Docker is available."
+    echo "  Install Ollama from https://ollama.com or Docker from https://docker.com"
+    exit 1
+  fi
+else
+  # Ollama is installed locally
+  ollama pull gemma2:2b
+  ollama create ck-model -f ai/Modelfile
+  ollama create ck-extractor -f ai/Modelfile.extractor
+  echo "  Models created locally."
+fi
+
+# 5. Tesseract (optional, for OCR on images)
+echo "[5/5] Checking Tesseract OCR..."
+if command -v tesseract &>/dev/null; then
+  echo "  Tesseract found: $(tesseract --version 2>&1 | head -1)"
+else
+  echo "  Tesseract not found (OCR on images will be disabled)."
+  if [[ "$OSTYPE" == "darwin"* ]]; then
+    echo "  Install with: brew install tesseract"
+  else
+    echo "  Install with: sudo apt-get install tesseract-ocr"
+  fi
+fi
+
+echo ""
+echo "=== Setup complete! Run the app with: ==="
+echo "  source .venv/bin/activate"
+echo "  python main.py"