From 11cf5fd04207eea065fd86b6252c8d055b929419 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 28 Nov 2025 21:51:28 +0000 Subject: [PATCH 1/2] Initial plan From 6211be464e21e444a952cb128025d9de4846e526 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 28 Nov 2025 21:56:57 +0000 Subject: [PATCH 2/2] Address code review feedback from copilot-pull-request-reviewer Co-authored-by: xujiongze <31494901+xujiongze@users.noreply.github.com> --- python/README.md | 12 +-- python/app.py | 43 +++++++++-- python/crawler.py | 2 +- python/rag_agent.py | 141 ++++++++++++++++++++-------------- python/requirements.txt | 1 + python/vectorize_documents.py | 4 +- 6 files changed, 127 insertions(+), 76 deletions(-) diff --git a/python/README.md b/python/README.md index 5208693..0c3d883 100644 --- a/python/README.md +++ b/python/README.md @@ -8,7 +8,7 @@ This module contains the Python components for the IOC.EAssistant project, inclu - Python 3.7 or higher - pip package manager -### Install requiments +### Install requirements ```bash cd python @@ -122,11 +122,11 @@ The system supports two model providers: ```bash # Start the web server -python web.py +python app.py ``` -The server will start on `http://localhost:8000` and provide: -- **Swagger UI**: `http://localhost:8000/apidocs/` for interactive API documentation +The server will start on `http://localhost:8080` and provide: +- **Swagger UI**: `http://localhost:8080/apidocs/` for interactive API documentation - **RESTful API**: Endpoints for RAG-powered chat with conversation history #### API Endpoints @@ -139,7 +139,7 @@ Check if the service is running and get model information. **Example:** ```bash -curl http://localhost:8000/health +curl http://localhost:8080/health ``` **Response:** @@ -253,7 +253,7 @@ python/ ├── rag_agent.py # RAG Agent with LangChain (stateless) ├── utils.py # Utility functions (GPU config, formatting) ├── vectorize_documents.py # Document vectorization script -├── web.py # Flask API server (stateless) +├── app.py # Flask API server (stateless) ├── requirements.txt # Python dependencies ├── README.md # This file ├── data/ # Crawled data storage (JSON files) diff --git a/python/app.py b/python/app.py index a8c7180..7d54b78 100644 --- a/python/app.py +++ b/python/app.py @@ -4,8 +4,8 @@ import os import sys import subprocess -import asyncio from datetime import datetime +import tiktoken # --- Flask + Swagger setup --- app = Flask(__name__) @@ -32,24 +32,28 @@ def check_and_setup_data(): # If data doesn't exist, run crawler if not data_exists: print("crawling data...") - subprocess.run( + result = subprocess.run( [sys.executable, "crawler.py"], cwd=os.path.dirname(os.path.abspath(__file__)), capture_output=True, text=True, timeout=600 # 10 minutes timeout ) + if result.returncode != 0: + print(f"Error running crawler.py: {result.stderr}", file=sys.stderr) # If ChromaDB doesn't exist, run vectorize_documents if not chroma_db_exists: print("vectorizing documents...") - subprocess.run( + result = subprocess.run( [sys.executable, "vectorize_documents.py"], cwd=os.path.dirname(os.path.abspath(__file__)), capture_output=True, text=True, timeout=600 # 10 minutes timeout ) + if result.returncode != 0: + print(f"Error running vectorize_documents.py: {result.stderr}", file=sys.stderr) # --- Check and setup data before initializing RAG Agent --- print("Checking prerequisites...") @@ -172,14 +176,30 @@ def chat(): type: string """ try: - data = request.get_json(force=True) + if not request.is_json: + return jsonify({"error": "Content-Type must be application/json"}), 400 + data = request.get_json() messages = data.get("messages", []) model_config = data.get("modelConfig", {}) - metadata = data.get("metadata", {}) if not messages: return jsonify({"error": "messages field required"}), 400 + # Input validation: limit conversation history length to prevent abuse + MAX_MESSAGES = 50 + MAX_CONTENT_LENGTH = 100000 # 100KB total content + + if len(messages) > MAX_MESSAGES: + return jsonify({"error": f"Maximum {MAX_MESSAGES} messages allowed in conversation history"}), 400 + + # Calculate total content length + total_content = sum( + len(msg.get("question", "")) + len(msg.get("answer", "")) + for msg in messages + ) + if total_content > MAX_CONTENT_LENGTH: + return jsonify({"error": f"Total content length exceeds {MAX_CONTENT_LENGTH} characters"}), 400 + # Get the last message (current question) last_message = messages[-1] current_question = last_message.get("question", "").strip() @@ -209,9 +229,16 @@ def chat(): end_time = datetime.now() processing_time = int((end_time - start_time).total_seconds() * 1000) - # Estimate token usage (rough approximation) - prompt_tokens = sum(len(q.split()) + len(a.split()) for q, a in conversation_history) + len(current_question.split()) - completion_tokens = len(answer.split()) + # Use tiktoken for accurate token counting + try: + encoding = tiktoken.encoding_for_model("gpt-4") + except KeyError: + encoding = tiktoken.get_encoding("cl100k_base") + + # Calculate prompt tokens from conversation history and current question + prompt_text = "".join(q + a for q, a in conversation_history) + current_question + prompt_tokens = len(encoding.encode(prompt_text)) + completion_tokens = len(encoding.encode(answer)) total_tokens = prompt_tokens + completion_tokens # Return response in the expected format diff --git a/python/crawler.py b/python/crawler.py index f423518..6044294 100644 --- a/python/crawler.py +++ b/python/crawler.py @@ -174,7 +174,7 @@ async def extract_page_content(self, page): page_data = { "title": title_text, "content": content_text, - "type": "noticia" if "latest-news" in page else "general", + "type": "noticia" if "latest-news" in self.page.url else "general", } os.makedirs("data", exist_ok=True) diff --git a/python/rag_agent.py b/python/rag_agent.py index 4f4c34a..209efd4 100644 --- a/python/rag_agent.py +++ b/python/rag_agent.py @@ -147,9 +147,9 @@ def retrieve_general_context(query: str): k=k, filter={"type": "general"}, ) - # For some backends higher score is better; if None, keep the doc + # For ChromaDB, lower distance scores are better (0 = identical) retrieved_docs = [ - doc for doc, score in with_scores if (score is None or score >= score_threshold) + doc for doc, score in with_scores if (score is None or score <= score_threshold) ] except Exception: # Fallback to basic similarity search @@ -189,8 +189,9 @@ def retrieve_noticia_context(query: str): k=k, filter={"type": "noticia"}, ) + # For ChromaDB, lower distance scores are better (0 = identical) retrieved_docs = [ - doc for doc, score in with_scores if (score is None or score >= score_threshold) + doc for doc, score in with_scores if (score is None or score <= score_threshold) ] except Exception: retrieved_docs = vector_store.similarity_search( @@ -266,6 +267,77 @@ def _initialize_agent(self) -> None: print("Agent mode not supported, langchain version may be outdated.") self.use_agent = False + # ---------------------------- Helper methods ---------------------------- + def _fallback_retrieval(self, question: str, verbose: bool = True) -> list: + """ + Fallback retrieval method using diversified retrieval across both document types. + + Args: + question: The question to search for + verbose: Whether to print debug information + + Returns: + List of retrieved documents + """ + ctx_docs = [] + try: + ctx_docs += self.vector_store.max_marginal_relevance_search( + question, + k=self.k_results, + fetch_k=max(20, self.k_results * self.fetch_k_multiplier), + filter={"type": "general"}, + ) + except Exception: + ctx_docs += self.vector_store.similarity_search( + question, k=self.k_results + ) + try: + ctx_docs += self.vector_store.max_marginal_relevance_search( + question, + k=self.k_results, + fetch_k=max(20, self.k_results * self.fetch_k_multiplier), + filter={"type": "noticia"}, + ) + except Exception as e: + if verbose: + print(f"Noticia MMR search failed, skipping: {e}") + + return ctx_docs + + def _create_llm_with_temperature(self, temperature: float): + """ + Create a new LLM instance with the specified temperature. + This is thread-safe compared to modifying the LLM's temperature attribute. + + Args: + temperature: The temperature to use for the new LLM instance + + Returns: + A new LLM instance with the specified temperature + """ + if self.provider == "openai": + api_key = os.getenv("OPENAI_API_KEY") + return ChatOpenAI( + model=self.llm.model_name, + temperature=temperature, + openai_api_key=api_key, + ) + elif self.provider == "ollama": + try: + import torch + num_gpu_param = -1 if torch.cuda.is_available() else 0 + except Exception: + num_gpu_param = 0 + return ChatOllama( + model=self.llm.model, + temperature=temperature, + num_gpu=num_gpu_param, + num_ctx=getattr(self.llm, 'num_ctx', 8192), + ) + else: + # Fallback to the existing LLM if provider is unknown + return self.llm + # ---------------------------- Query path ------------------------------- def query(self, question: str, verbose: bool = True) -> str: """ @@ -281,28 +353,8 @@ def query(self, question: str, verbose: bool = True) -> str: if verbose: print(f"Agent invocation failed, using simple RAG fallback: {e}") - # Simple RAG fallback: attempt diversified retrieval across both types - ctx_docs = [] - try: - ctx_docs += self.vector_store.max_marginal_relevance_search( - question, - k=self.k_results, - fetch_k=max(20, self.k_results * self.fetch_k_multiplier), - filter={"type": "general"}, - ) - except Exception: - ctx_docs += self.vector_store.similarity_search( - question, k=self.k_results - ) - try: - ctx_docs += self.vector_store.max_marginal_relevance_search( - question, - k=self.k_results, - fetch_k=max(20, self.k_results * self.fetch_k_multiplier), - filter={"type": "noticia"}, - ) - except Exception as e: - print(f"Noticia MMR search failed, skipping: {e}") + # Use the helper method for fallback retrieval + ctx_docs = self._fallback_retrieval(question, verbose) context_blob = "\n\n".join( [f"Source: {d.metadata}\nContent: {d.page_content}" for d in ctx_docs] @@ -343,11 +395,8 @@ def query_with_history( # Current question messages.append(HumanMessage(content=question)) - # Temporarily override temperature if provided - original_temp = None - if temperature is not None: - original_temp = self.llm.temperature - self.llm.temperature = temperature + # Create a new LLM with custom temperature if provided (thread-safe) + llm_to_use = self._create_llm_with_temperature(temperature) if temperature is not None else self.llm try: response = self.agent.invoke({"messages": messages}) @@ -356,30 +405,8 @@ def query_with_history( if verbose: print(f"Agent invocation failed, using simple RAG fallback: {e}") - # Simple RAG fallback: attempt diversified retrieval across both types - ctx_docs = [] - try: - ctx_docs += self.vector_store.max_marginal_relevance_search( - question, - k=self.k_results, - fetch_k=max(20, self.k_results * self.fetch_k_multiplier), - filter={"type": "general"}, - ) - except Exception: - ctx_docs += self.vector_store.similarity_search( - question, k=self.k_results - ) - try: - ctx_docs += self.vector_store.max_marginal_relevance_search( - question, - k=self.k_results, - fetch_k=max(20, self.k_results * self.fetch_k_multiplier), - filter={"type": "noticia"}, - ) - except Exception: - # Failure to retrieve "noticia" type documents is non-fatal; - # fallback will proceed with whatever documents were retrieved. - pass + # Use the helper method for fallback retrieval + ctx_docs = self._fallback_retrieval(question, verbose) context_blob = "\n\n".join( [f"Source: {d.metadata}\nContent: {d.page_content}" for d in ctx_docs] @@ -387,11 +414,7 @@ def query_with_history( # Build simple prompt with context prompt = f"Context:\n{context_blob}\n\nQuestion: {question}\n\nAnswer:" - response_text = self.llm.invoke(prompt).content - finally: - # Restore original temperature if it was overridden - if original_temp is not None: - self.llm.temperature = original_temp + response_text = llm_to_use.invoke(prompt).content return response_text diff --git a/python/requirements.txt b/python/requirements.txt index 347f5a8..4c8a53c 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -16,5 +16,6 @@ jq===1.10.0 langchain-ollama==1.0.0 langchain-openai==1.0.2 openai==2.7.2 +tiktoken==0.7.0 duckduckgo-search==8.1.1 waitress==3.0.2 \ No newline at end of file diff --git a/python/vectorize_documents.py b/python/vectorize_documents.py index 306a753..522375a 100644 --- a/python/vectorize_documents.py +++ b/python/vectorize_documents.py @@ -197,8 +197,8 @@ def vectorize_and_persist( # Use optimized text splitter with separators that respect document structure text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( - chunk_size=700, - chunk_overlap=120, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, separators=["\n\n", "\n", ". ", " ", ""], )