-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
131 lines (108 loc) · 5.34 KB
/
model.py
File metadata and controls
131 lines (108 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import chromadb
from dotenv import load_dotenv
# Import the specific, modern classes from their dedicated packages
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
# Imports for the Azure LLM
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential
def initialize_vector_store(persist_directory: str, embedding_function) -> Chroma:
"""
Initializes and loads a persistent Chroma vector store from disk.
"""
if not os.path.exists(persist_directory):
raise FileNotFoundError(
f"The vector database directory was not found at: '{persist_directory}'. "
"Please ensure you have run the embedding script first to create it."
)
print(f"Loading existing vector store from '{persist_directory}'...")
vector_store = Chroma(
persist_directory=persist_directory,
embedding_function=embedding_function
)
return vector_store
def perform_semantic_search(query: str, vector_store: Chroma, top_k: int = 5) -> list:
"""
Performs a similarity search on the vector store to find relevant chunks.
"""
print(f"\nPerforming semantic search for query: '{query}'")
return vector_store.similarity_search(query, k=top_k)
def initialize_llm_client():
"""
Initializes the Azure AI Inference client using environment variables.
"""
load_dotenv() # Load environment variables from a .env file
token = os.environ.get("GITHUB_TOKEN")
if not token:
raise ValueError("GITHUB_TOKEN not found in environment variables. Please create a .env file and add it.")
endpoint = "https://models.github.ai/inference"
client = ChatCompletionsClient(
endpoint=endpoint,
credential=AzureKeyCredential(token),
)
print("LLM client initialized successfully.")
return client
def create_prompt_with_context(query: str, relevant_chunks: list) -> str:
"""
Combines the user's query with the content of relevant chunks to create a prompt for the LLM.
"""
chunk_texts = [chunk.page_content for chunk in relevant_chunks]
context = "\n\n---\n\n".join(chunk_texts)
prompt = (
f"Based on the following document excerpts, please answer the user's query.\n\n"
f"--- CONTEXT ---\n{context}\n\n--- END CONTEXT ---\n\n"
f"User Query: {query}"
)
return prompt
# --- Main Execution Block ---
if __name__ == "__main__":
try:
# --- Setup Phase ---
db_directory = "db"
model_name = "openai/gpt-4o" # Using gpt-4o as specified in the logic
# 1. Initialize the embedding model for searching
print("Initializing embedding model (all-MiniLM-L6-v2)...")
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
# 2. Load the existing vector store
vector_db = initialize_vector_store(db_directory, embeddings)
# 3. Initialize the LLM client
llm_client = initialize_llm_client()
# 4. Setup conversation history with a system message
qa_system_prompt = """You are a specialized assistant for medical and insurance policy-related questions. Answer questions based *only* on the provided medical or insurance policy document context, ensuring responses are concise (maximum three sentences) and professional. If a question is unrelated to the document, not medical/insurance-related, or the answer is not found in the context, respond with: 'I cannot answer this based on the provided medical or insurance policy document.'"""
system_prompt = SystemMessage(content=qa_system_prompt)
conversation_history = [system_prompt]
print("System prompt set. Ready to chat.")
# --- Interactive Query Loop ---
while True:
user_query = input("\nEnter your query (or type 'quit' to exit): ").strip()
if user_query.lower() == 'quit':
print("Exiting program. Goodbye!")
break
if not user_query:
continue
# 1. Find relevant chunks from the document
results = perform_semantic_search(user_query, vector_db)
# 2. Create a detailed prompt for the LLM
prompt_for_llm = create_prompt_with_context(user_query, results)
# 3. Add the user's new message to the conversation history
conversation_history.append(HumanMessage(content=prompt_for_llm))
# 4. Get the LLM's response
print("\nAI is thinking...")
response = llm_client.complete(
messages=conversation_history,
model=model_name,
temperature=0.7, # Lower temperature for more factual answers
)
ai_response_text = response.choices[0].message.content
# 5. Add the AI's response to the history for future context
conversation_history.append(AIMessage(content=ai_response_text))
# 6. Display the result
print(f"\nAI: {ai_response_text}")
except FileNotFoundError as fnf_error:
print(f"\nERROR: {fnf_error}")
except ValueError as val_error:
print(f"\nERROR: {val_error}")
except Exception as general_error:
print(f"\nAn unexpected error occurred: {general_error}")