Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ This module contains the Python components for the IOC.EAssistant project, inclu
- Python 3.7 or higher
- pip package manager

### Install requiments
### Install requirements

```bash
cd python
Expand Down Expand Up @@ -122,11 +122,11 @@ The system supports two model providers:

```bash
# Start the web server
python web.py
python app.py
```

The server will start on `http://localhost:8000` and provide:
- **Swagger UI**: `http://localhost:8000/apidocs/` for interactive API documentation
The server will start on `http://localhost:8080` and provide:
- **Swagger UI**: `http://localhost:8080/apidocs/` for interactive API documentation
- **RESTful API**: Endpoints for RAG-powered chat with conversation history

#### API Endpoints
Expand All @@ -139,7 +139,7 @@ Check if the service is running and get model information.

**Example:**
```bash
curl http://localhost:8000/health
curl http://localhost:8080/health
```

**Response:**
Expand Down Expand Up @@ -253,7 +253,7 @@ python/
├── rag_agent.py # RAG Agent with LangChain (stateless)
├── utils.py # Utility functions (GPU config, formatting)
├── vectorize_documents.py # Document vectorization script
├── web.py # Flask API server (stateless)
├── app.py # Flask API server (stateless)
├── requirements.txt # Python dependencies
├── README.md # This file
├── data/ # Crawled data storage (JSON files)
Expand Down
43 changes: 35 additions & 8 deletions python/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import os
import sys
import subprocess
import asyncio
from datetime import datetime
import tiktoken

# --- Flask + Swagger setup ---
app = Flask(__name__)
Expand All @@ -32,24 +32,28 @@ def check_and_setup_data():
# If data doesn't exist, run crawler
if not data_exists:
print("crawling data...")
subprocess.run(
result = subprocess.run(
[sys.executable, "crawler.py"],
cwd=os.path.dirname(os.path.abspath(__file__)),
capture_output=True,
text=True,
timeout=600 # 10 minutes timeout
)
if result.returncode != 0:
print(f"Error running crawler.py: {result.stderr}", file=sys.stderr)

# If ChromaDB doesn't exist, run vectorize_documents
if not chroma_db_exists:
print("vectorizing documents...")
subprocess.run(
result = subprocess.run(
[sys.executable, "vectorize_documents.py"],
cwd=os.path.dirname(os.path.abspath(__file__)),
capture_output=True,
text=True,
timeout=600 # 10 minutes timeout
)
if result.returncode != 0:
print(f"Error running vectorize_documents.py: {result.stderr}", file=sys.stderr)

# --- Check and setup data before initializing RAG Agent ---
print("Checking prerequisites...")
Expand Down Expand Up @@ -172,14 +176,30 @@ def chat():
type: string
"""
try:
data = request.get_json(force=True)
if not request.is_json:
return jsonify({"error": "Content-Type must be application/json"}), 400
data = request.get_json()
messages = data.get("messages", [])
model_config = data.get("modelConfig", {})
metadata = data.get("metadata", {})

if not messages:
return jsonify({"error": "messages field required"}), 400

# Input validation: limit conversation history length to prevent abuse
MAX_MESSAGES = 50
MAX_CONTENT_LENGTH = 100000 # 100KB total content

if len(messages) > MAX_MESSAGES:
return jsonify({"error": f"Maximum {MAX_MESSAGES} messages allowed in conversation history"}), 400

# Calculate total content length
total_content = sum(
len(msg.get("question", "")) + len(msg.get("answer", ""))
for msg in messages
)
if total_content > MAX_CONTENT_LENGTH:
return jsonify({"error": f"Total content length exceeds {MAX_CONTENT_LENGTH} characters"}), 400

# Get the last message (current question)
last_message = messages[-1]
current_question = last_message.get("question", "").strip()
Expand Down Expand Up @@ -209,9 +229,16 @@ def chat():
end_time = datetime.now()
processing_time = int((end_time - start_time).total_seconds() * 1000)

# Estimate token usage (rough approximation)
prompt_tokens = sum(len(q.split()) + len(a.split()) for q, a in conversation_history) + len(current_question.split())
completion_tokens = len(answer.split())
# Use tiktoken for accurate token counting
try:
encoding = tiktoken.encoding_for_model("gpt-4")
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")

# Calculate prompt tokens from conversation history and current question
prompt_text = "".join(q + a for q, a in conversation_history) + current_question
prompt_tokens = len(encoding.encode(prompt_text))
completion_tokens = len(encoding.encode(answer))
total_tokens = prompt_tokens + completion_tokens

# Return response in the expected format
Expand Down
2 changes: 1 addition & 1 deletion python/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ async def extract_page_content(self, page):
page_data = {
"title": title_text,
"content": content_text,
"type": "noticia" if "latest-news" in page else "general",
"type": "noticia" if "latest-news" in self.page.url else "general",
}

os.makedirs("data", exist_ok=True)
Expand Down
141 changes: 82 additions & 59 deletions python/rag_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ def retrieve_general_context(query: str):
k=k,
filter={"type": "general"},
)
# For some backends higher score is better; if None, keep the doc
# For ChromaDB, lower distance scores are better (0 = identical)
retrieved_docs = [
doc for doc, score in with_scores if (score is None or score >= score_threshold)
doc for doc, score in with_scores if (score is None or score <= score_threshold)
]
except Exception:
# Fallback to basic similarity search
Expand Down Expand Up @@ -189,8 +189,9 @@ def retrieve_noticia_context(query: str):
k=k,
filter={"type": "noticia"},
)
# For ChromaDB, lower distance scores are better (0 = identical)
retrieved_docs = [
doc for doc, score in with_scores if (score is None or score >= score_threshold)
doc for doc, score in with_scores if (score is None or score <= score_threshold)
]
except Exception:
retrieved_docs = vector_store.similarity_search(
Expand Down Expand Up @@ -266,6 +267,77 @@ def _initialize_agent(self) -> None:
print("Agent mode not supported, langchain version may be outdated.")
self.use_agent = False

# ---------------------------- Helper methods ----------------------------
def _fallback_retrieval(self, question: str, verbose: bool = True) -> list:
"""
Fallback retrieval method using diversified retrieval across both document types.

Args:
question: The question to search for
verbose: Whether to print debug information

Returns:
List of retrieved documents
"""
ctx_docs = []
try:
ctx_docs += self.vector_store.max_marginal_relevance_search(
question,
k=self.k_results,
fetch_k=max(20, self.k_results * self.fetch_k_multiplier),
filter={"type": "general"},
)
except Exception:
ctx_docs += self.vector_store.similarity_search(
question, k=self.k_results
)
try:
ctx_docs += self.vector_store.max_marginal_relevance_search(
question,
k=self.k_results,
fetch_k=max(20, self.k_results * self.fetch_k_multiplier),
filter={"type": "noticia"},
)
except Exception as e:
if verbose:
print(f"Noticia MMR search failed, skipping: {e}")

return ctx_docs

def _create_llm_with_temperature(self, temperature: float):
"""
Create a new LLM instance with the specified temperature.
This is thread-safe compared to modifying the LLM's temperature attribute.

Args:
temperature: The temperature to use for the new LLM instance

Returns:
A new LLM instance with the specified temperature
"""
if self.provider == "openai":
api_key = os.getenv("OPENAI_API_KEY")
return ChatOpenAI(
model=self.llm.model_name,
temperature=temperature,
openai_api_key=api_key,
)
elif self.provider == "ollama":
try:
import torch
num_gpu_param = -1 if torch.cuda.is_available() else 0
except Exception:
num_gpu_param = 0
return ChatOllama(
model=self.llm.model,
temperature=temperature,
num_gpu=num_gpu_param,
num_ctx=getattr(self.llm, 'num_ctx', 8192),
)
else:
# Fallback to the existing LLM if provider is unknown
return self.llm

# ---------------------------- Query path -------------------------------
def query(self, question: str, verbose: bool = True) -> str:
"""
Expand All @@ -281,28 +353,8 @@ def query(self, question: str, verbose: bool = True) -> str:
if verbose:
print(f"Agent invocation failed, using simple RAG fallback: {e}")

# Simple RAG fallback: attempt diversified retrieval across both types
ctx_docs = []
try:
ctx_docs += self.vector_store.max_marginal_relevance_search(
question,
k=self.k_results,
fetch_k=max(20, self.k_results * self.fetch_k_multiplier),
filter={"type": "general"},
)
except Exception:
ctx_docs += self.vector_store.similarity_search(
question, k=self.k_results
)
try:
ctx_docs += self.vector_store.max_marginal_relevance_search(
question,
k=self.k_results,
fetch_k=max(20, self.k_results * self.fetch_k_multiplier),
filter={"type": "noticia"},
)
except Exception as e:
print(f"Noticia MMR search failed, skipping: {e}")
# Use the helper method for fallback retrieval
ctx_docs = self._fallback_retrieval(question, verbose)

context_blob = "\n\n".join(
[f"Source: {d.metadata}\nContent: {d.page_content}" for d in ctx_docs]
Expand Down Expand Up @@ -343,11 +395,8 @@ def query_with_history(
# Current question
messages.append(HumanMessage(content=question))

# Temporarily override temperature if provided
original_temp = None
if temperature is not None:
original_temp = self.llm.temperature
self.llm.temperature = temperature
# Create a new LLM with custom temperature if provided (thread-safe)
llm_to_use = self._create_llm_with_temperature(temperature) if temperature is not None else self.llm

try:
response = self.agent.invoke({"messages": messages})
Expand All @@ -356,42 +405,16 @@ def query_with_history(
if verbose:
print(f"Agent invocation failed, using simple RAG fallback: {e}")

# Simple RAG fallback: attempt diversified retrieval across both types
ctx_docs = []
try:
ctx_docs += self.vector_store.max_marginal_relevance_search(
question,
k=self.k_results,
fetch_k=max(20, self.k_results * self.fetch_k_multiplier),
filter={"type": "general"},
)
except Exception:
ctx_docs += self.vector_store.similarity_search(
question, k=self.k_results
)
try:
ctx_docs += self.vector_store.max_marginal_relevance_search(
question,
k=self.k_results,
fetch_k=max(20, self.k_results * self.fetch_k_multiplier),
filter={"type": "noticia"},
)
except Exception:
# Failure to retrieve "noticia" type documents is non-fatal;
# fallback will proceed with whatever documents were retrieved.
pass
# Use the helper method for fallback retrieval
ctx_docs = self._fallback_retrieval(question, verbose)

context_blob = "\n\n".join(
[f"Source: {d.metadata}\nContent: {d.page_content}" for d in ctx_docs]
)

# Build simple prompt with context
prompt = f"Context:\n{context_blob}\n\nQuestion: {question}\n\nAnswer:"
response_text = self.llm.invoke(prompt).content
finally:
# Restore original temperature if it was overridden
if original_temp is not None:
self.llm.temperature = original_temp
response_text = llm_to_use.invoke(prompt).content

return response_text

Expand Down
1 change: 1 addition & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ jq===1.10.0
langchain-ollama==1.0.0
langchain-openai==1.0.2
openai==2.7.2
tiktoken==0.7.0
duckduckgo-search==8.1.1
waitress==3.0.2
4 changes: 2 additions & 2 deletions python/vectorize_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ def vectorize_and_persist(

# Use optimized text splitter with separators that respect document structure
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=700,
chunk_overlap=120,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
separators=["\n\n", "\n", ". ", " ", ""],
)

Expand Down