diff --git a/README.md b/README.md index 20beca5..cdd47a0 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,6 @@ async with CZeroEngineClient() as client: response = await client.chat( message="What are the key features?", use_rag=True, - chunk_limit=5, similarity_threshold=0.3 # Lower threshold for better recall ) @@ -99,7 +98,7 @@ async with CZeroEngineClient() as client: use_rag=False ) response_with_rag = await client.chat( - message="Explain semantic search", + message="Explain semantic search", use_rag=True, similarity_threshold=0.3 ) @@ -122,7 +121,7 @@ results = await client.semantic_search( # Use direct client for persona interactions async with CZeroEngineClient() as client: # Chat with default Gestalt persona - response = await client.chat_with_persona( + response = await client.persona_chat( persona_id="gestalt-default", # default persona message="Analyze the implications of AGI" ) @@ -130,7 +129,8 @@ async with CZeroEngineClient() as client: # Or use regular chat (defaults to Gestalt if no persona specified) response = await client.chat( message="What are the key features of CZero Engine?", - use_rag=True + use_rag=True, + workspace_filter="workspace-id" # Optional: Filter to specific workspace ) ``` @@ -187,7 +187,7 @@ from langchain_anthropic import ChatAnthropic # Use multiple LLMs in your workflow cloud_llm = ChatOpenAI(model="gpt-4") # Or Anthropic, Google, etc. -local_llm = CZeroEngineLLM() # Your local CZero Engine +local_llm = CZeroLLM() # Your local CZero Engine # The possibilities are endless! šŸš€ ``` @@ -269,7 +269,6 @@ uv run czero version | `/api/health` | GET | System health check | | `/api/chat/send` | POST | LLM chat with optional RAG | | `/api/vector/search/semantic` | POST | Semantic search with hierarchy | -| `/api/vector/search/similarity` | POST | Find similar chunks | | `/api/embeddings/generate` | POST | Generate text embeddings | | `/api/workspaces/create` | POST | Create workspace | | `/api/workspaces/process` | POST | Process documents | diff --git a/examples/03_persona_interactions.py b/examples/03_persona_interactions.py index 455d466..a60835a 100644 --- a/examples/03_persona_interactions.py +++ b/examples/03_persona_interactions.py @@ -161,13 +161,32 @@ async def persona_with_rag(): print("-" * 30) async with CZeroEngineClient() as client: - # Use persona chat with RAG context + # First, list workspaces to find one with documents + workspaces = await client.list_workspaces() + workspace_id = None + + if workspaces.workspaces: + # Use the first available workspace + workspace_id = workspaces.workspaces[0].id + print(f"šŸ“ Using workspace: {workspaces.workspaces[0].name}") + else: + print("āš ļø No workspaces found. Creating a sample workspace...") + # Create a sample workspace if none exist + import tempfile + with tempfile.TemporaryDirectory() as temp_dir: + workspace = await client.create_workspace( + name="Sample Workspace", + path=temp_dir + ) + workspace_id = workspace.id + + # Use persona chat with RAG context from workspace print("\nšŸ” Asking Gestalt with document context...\n") - # This would use any processed documents in your workspace response = await client.persona_chat( persona_id="gestalt-default", # Use real persona message="Based on the documents, what are the key features of CZero Engine?", + workspace_filter=workspace_id, # Enable RAG with this workspace max_tokens=100 # Moderate response ) diff --git a/examples/05_langgraph_integration.py b/examples/05_langgraph_integration.py index dc29892..3151f68 100644 --- a/examples/05_langgraph_integration.py +++ b/examples/05_langgraph_integration.py @@ -64,6 +64,7 @@ class CZeroEngineLLM(BaseChatModel): temperature: float = 0.7 base_url: str = "http://localhost:1421" persona_id: str = "gestalt-default" + workspace_id: Optional[str] = None # For RAG context class Config: arbitrary_types_allowed = True @@ -123,7 +124,8 @@ async def _agenerate( message=prompt, system_prompt_template=system_prompt, max_tokens=self.max_tokens, - temperature=self.temperature + temperature=self.temperature, + workspace_filter=self.workspace_id # Add RAG context if available ) else: response = await self.client.chat( @@ -131,7 +133,8 @@ async def _agenerate( use_rag=self.use_rag, system_prompt=system_prompt, max_tokens=self.max_tokens, - temperature=self.temperature + temperature=self.temperature, + workspace_filter=self.workspace_id # Add RAG context if available ) message = AIMessage(content=response.response) diff --git a/src/czero_engine/client.py b/src/czero_engine/client.py index d77d2ee..b745731 100644 --- a/src/czero_engine/client.py +++ b/src/czero_engine/client.py @@ -11,7 +11,6 @@ from .models import ( ChatRequest, ChatResponse, SemanticSearchRequest, SemanticSearchResponse, - SimilaritySearchRequest, RecommendationsRequest, DocumentsResponse, DocumentMetadata, DocumentFullTextResponse, EmbeddingRequest, EmbeddingResponse, WorkspaceCreateRequest, WorkspaceResponse, WorkspaceListResponse, WorkspaceInfo, @@ -197,72 +196,8 @@ async def semantic_search( response.raise_for_status() return SemanticSearchResponse(**response.json()) - async def find_similar_chunks( - self, - chunk_id: str, - limit: int = 5, - similarity_threshold: float = 0.5 - ) -> SemanticSearchResponse: - """ - Find chunks similar to a given chunk ID. - - Useful for finding related content or duplicates. - - Args: - chunk_id: ID of the reference chunk - limit: Maximum number of results - similarity_threshold: Minimum similarity score - - Returns: - SemanticSearchResponse with similar chunks - """ - request = SimilaritySearchRequest( - chunk_id=chunk_id, - limit=limit, - similarity_threshold=similarity_threshold - ) - - self._log(f"Finding similar to chunk: {chunk_id}") - response = await self.client.post( - f"{self.base_url}/api/vector/search/similarity", - json=request.model_dump() - ) - response.raise_for_status() - return SemanticSearchResponse(**response.json()) - - async def get_recommendations( - self, - positive_chunk_ids: List[str], - negative_chunk_ids: Optional[List[str]] = None, - limit: int = 10 - ) -> SemanticSearchResponse: - """ - Get content recommendations based on positive/negative examples. - - Uses vector math to find content similar to positive examples - and dissimilar to negative examples. - - Args: - positive_chunk_ids: Chunk IDs to find similar content to - negative_chunk_ids: Chunk IDs to avoid similarity to - limit: Maximum number of recommendations - - Returns: - SemanticSearchResponse with recommended chunks - """ - request = RecommendationsRequest( - positive_chunk_ids=positive_chunk_ids, - negative_chunk_ids=negative_chunk_ids or [], - limit=limit - ) - - self._log(f"Getting recommendations based on {len(positive_chunk_ids)} positive examples") - response = await self.client.post( - f"{self.base_url}/api/vector/recommendations", - json=request.model_dump() - ) - response.raise_for_status() - return SemanticSearchResponse(**response.json()) + # Note: find_similar_chunks and get_recommendations methods have been deprecated + # Use semantic_search or hierarchical_retrieve for similar functionality # ==================== Document Management ==================== @@ -511,12 +446,14 @@ async def persona_chat( system_prompt_template: Optional[str] = None, conversation_history: Optional[List[Dict[str, str]]] = None, max_tokens: int = 1024, - temperature: float = 0.7 + temperature: float = 0.7, + workspace_filter: Optional[str] = None ) -> PersonaChatResponse: """ Chat with a specific AI persona. Each persona has its own personality, expertise, and interaction style. + Now supports RAG context when workspace_filter is provided. Args: persona_id: ID of the persona to chat with @@ -526,6 +463,7 @@ async def persona_chat( conversation_history: Optional conversation history for context max_tokens: Maximum tokens to generate temperature: Temperature for generation + workspace_filter: Optional workspace ID for RAG context Returns: PersonaChatResponse with persona's response @@ -537,7 +475,8 @@ async def persona_chat( system_prompt_template=system_prompt_template, conversation_history=conversation_history, max_tokens=max_tokens, - temperature=temperature + temperature=temperature, + workspace_filter=workspace_filter ) self._log(f"Chatting with persona: {persona_id}") diff --git a/src/czero_engine/models.py b/src/czero_engine/models.py index 5139cd4..0c634ef 100644 --- a/src/czero_engine/models.py +++ b/src/czero_engine/models.py @@ -70,18 +70,8 @@ class SemanticSearchResponse(BaseModel): results: List[SearchResult] -class SimilaritySearchRequest(BaseModel): - """Request model for /api/vector/search/similarity endpoint.""" - chunk_id: str - limit: int = 5 - similarity_threshold: float = 0.5 - - -class RecommendationsRequest(BaseModel): - """Request model for /api/vector/recommendations endpoint.""" - positive_chunk_ids: List[str] - negative_chunk_ids: Optional[List[str]] = Field(default_factory=list) - limit: int = 10 +# Note: SimilaritySearchRequest and RecommendationsRequest have been deprecated +# Use SemanticSearchRequest or HierarchicalRetrievalRequest instead # Document Models @@ -205,6 +195,7 @@ class PersonaChatRequest(BaseModel): conversation_history: Optional[List[ConversationMessage]] = None max_tokens: Optional[int] = 1024 temperature: Optional[float] = 0.7 + workspace_filter: Optional[str] = None # For RAG context class PersonaChatResponse(BaseModel): diff --git a/tests/test_all_endpoints.py b/tests/test_all_endpoints.py index 4c42392..e4fef8b 100644 --- a/tests/test_all_endpoints.py +++ b/tests/test_all_endpoints.py @@ -215,26 +215,8 @@ async def run_all_tests(self): ) # Extract chunk_id for similarity search - if search_data and search_data.get("results"): - self.chunk_id = search_data["results"][0]["chunk_id"] - - # Similarity search - await self.test_endpoint( - "Similarity Search", "POST", "/api/vector/search/similarity", - { - "chunk_id": self.chunk_id, - "limit": 3 - } - ) - - # Recommendations - await self.test_endpoint( - "Get Recommendations", "POST", "/api/vector/recommendations", - { - "positive_chunk_ids": [self.chunk_id], - "limit": 5 - } - ) + # Note: Similarity search and recommendations endpoints are deprecated + # The new hierarchical retrieval system replaces these with direct query-based search # 7. Hierarchical Retrieval console.print("\n[bold yellow]═══ Hierarchical Retrieval ═══[/bold yellow]") @@ -274,9 +256,9 @@ async def run_all_tests(self): if persona_data and persona_data.get("persona_id"): self.persona_id = persona_data["persona_id"] - # Chat with persona + # Chat with persona (without RAG) await self.test_endpoint( - "Persona Chat", "POST", "/api/personas/chat", + "Persona Chat (No RAG)", "POST", "/api/personas/chat", { "persona_id": self.persona_id, "message": "Hello, test persona!", @@ -284,6 +266,17 @@ async def run_all_tests(self): } ) + # Chat with persona (with RAG) + await self.test_endpoint( + "Persona Chat (With RAG)", "POST", "/api/personas/chat", + { + "persona_id": self.persona_id, + "message": "Based on the test document, what can you tell me?", + "workspace_filter": self.workspace_id, + "max_tokens": 100 + } + ) + # Delete persona await self.test_endpoint( "Delete Persona", "DELETE", f"/api/personas/{self.persona_id}"