diff --git a/.gitignore b/.gitignore index 37f1add..0d2c535 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ .venv .env -.store \ No newline at end of file +.store +store +*.pdf +__pycache__ +*.pyc \ No newline at end of file diff --git a/__pycache__/app.cpython-313.pyc b/__pycache__/app.cpython-313.pyc index 7b0a698..39e781c 100644 Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ diff --git a/__pycache__/console.cpython-313.pyc b/__pycache__/console.cpython-313.pyc index d51f772..db4f146 100644 Binary files a/__pycache__/console.cpython-313.pyc and b/__pycache__/console.cpython-313.pyc differ diff --git a/__pycache__/mode.cpython-313.pyc b/__pycache__/mode.cpython-313.pyc index acc2240..945048a 100644 Binary files a/__pycache__/mode.cpython-313.pyc and b/__pycache__/mode.cpython-313.pyc differ diff --git a/coder_proprement.pdf b/coder_proprement.pdf deleted file mode 100755 index 46fbecd..0000000 Binary files a/coder_proprement.pdf and /dev/null differ diff --git a/main.py b/main.py index 29c01d1..84fedff 100755 --- a/main.py +++ b/main.py @@ -9,6 +9,8 @@ from modes.load_book_mode import LoadBookMode from modes.load_haiku_mode import LoadHaikuMode from modes.ask_mode import AskMode +from modes.cv_mode import CVMode +from modes.load_cv_mode import LoadCVMode load_dotenv() @@ -34,5 +36,7 @@ def sigkill_handler(sig, frame): app.use("load-haiku", LoadHaikuMode) app.use("load-book", LoadBookMode) app.use("book", BookMode) + app.use("cv", CVMode) + app.use("load-cv", LoadCVMode) app.run() diff --git a/modes/__pycache__/ask_mode.cpython-313.pyc b/modes/__pycache__/ask_mode.cpython-313.pyc index 0665edd..5896858 100644 Binary files a/modes/__pycache__/ask_mode.cpython-313.pyc and b/modes/__pycache__/ask_mode.cpython-313.pyc differ diff --git a/modes/__pycache__/book_mode.cpython-313.pyc b/modes/__pycache__/book_mode.cpython-313.pyc index 7877e29..19575f1 100644 Binary files a/modes/__pycache__/book_mode.cpython-313.pyc and b/modes/__pycache__/book_mode.cpython-313.pyc differ diff --git a/modes/__pycache__/chat_mode.cpython-313.pyc b/modes/__pycache__/chat_mode.cpython-313.pyc index bce648c..69230f1 100644 Binary files a/modes/__pycache__/chat_mode.cpython-313.pyc and b/modes/__pycache__/chat_mode.cpython-313.pyc differ diff --git a/modes/__pycache__/haiku_mode.cpython-313.pyc b/modes/__pycache__/haiku_mode.cpython-313.pyc index bd68c27..0612aef 100644 Binary files a/modes/__pycache__/haiku_mode.cpython-313.pyc and b/modes/__pycache__/haiku_mode.cpython-313.pyc differ diff --git a/modes/__pycache__/load_book_mode.cpython-313.pyc b/modes/__pycache__/load_book_mode.cpython-313.pyc index 767f53b..a3e1872 100644 Binary files a/modes/__pycache__/load_book_mode.cpython-313.pyc and b/modes/__pycache__/load_book_mode.cpython-313.pyc differ diff --git a/modes/__pycache__/load_haiku_mode.cpython-313.pyc b/modes/__pycache__/load_haiku_mode.cpython-313.pyc index 5207854..120cb0a 100644 Binary files a/modes/__pycache__/load_haiku_mode.cpython-313.pyc and b/modes/__pycache__/load_haiku_mode.cpython-313.pyc differ diff --git a/modes/book_mode.py b/modes/book_mode.py index 83d13e2..0029720 100644 --- a/modes/book_mode.py +++ b/modes/book_mode.py @@ -7,7 +7,7 @@ from langchain.chat_models import init_chat_model from langchain_core.messages import HumanMessage, AIMessage, BaseMessage from langchain_chroma import Chroma -from langchain_openai import OpenAIEmbeddings +from langchain_ollama import OllamaEmbeddings class BookMode(Mode): @@ -16,7 +16,7 @@ class BookMode(Mode): def __init__( self, console: Console, - model: str = "llama3.2:3b", + model: str = "llama3.2:1b", system: str = "default", verbose: bool = False): super().__init__(console) @@ -28,7 +28,7 @@ def __init__( @staticmethod def add_subparser(name: str, subparser: _SubParsersAction): chat_subparser = subparser.add_parser(name) - chat_subparser.add_argument("--model", type=str, default="llama3.2:3b") + chat_subparser.add_argument("--model", type=str, default="llama3.2:1b") chat_subparser.add_argument("--system", type=str, default="default") chat_subparser.add_argument("--verbose", "-v", action="store_true") @@ -46,8 +46,8 @@ def run(self): # load VectorStore vector_store = Chroma( - embedding_function=OpenAIEmbeddings(), - persist_directory="./store" + embedding_function=OllamaEmbeddings(model="mxbai-embed-large:latest"), + persist_directory="./.store" ) # Load model diff --git a/modes/chat_mode.py b/modes/chat_mode.py index 55fb47c..13ca64b 100644 --- a/modes/chat_mode.py +++ b/modes/chat_mode.py @@ -14,7 +14,7 @@ class ChatMode(Mode): def __init__( self, console: Console, - model: str = "llama3.2:3b", + model: str = "llama3.2:1b", system: str = "default", verbose: bool = False): super().__init__(console) @@ -26,7 +26,7 @@ def __init__( @staticmethod def add_subparser(name: str, subparser: _SubParsersAction): chat_subparser = subparser.add_parser(name) - chat_subparser.add_argument("--model", type=str, default="llama3.2:3b") + chat_subparser.add_argument("--model", type=str, default="llama3.2:1b") chat_subparser.add_argument("--system", type=str, default="default") chat_subparser.add_argument("--verbose", "-v", action="store_true") diff --git a/modes/cv_mode.py b/modes/cv_mode.py new file mode 100644 index 0000000..28ee76c --- /dev/null +++ b/modes/cv_mode.py @@ -0,0 +1,100 @@ +import os +from mode import Mode +from console import Console +from argparse import _SubParsersAction +from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, MessagesPlaceholder +from langchain_core.output_parsers import StrOutputParser +from langchain.chat_models import init_chat_model +from langchain_core.messages import HumanMessage, AIMessage, BaseMessage +from langchain_chroma import Chroma +from langchain_ollama import OllamaEmbeddings + +class CVMode(Mode): + + history: list[BaseMessage] = [] + + def __init__( + self, + console: Console, + model: str = "llama3.2:1b", + system: str = "default", + verbose: bool = False): + super().__init__(console) + + self.model = model + self.system = system + self.verbose = verbose + + @staticmethod + def add_subparser(name: str, subparser: _SubParsersAction): + chat_subparser = subparser.add_parser(name) + chat_subparser.add_argument("--model", type=str, default="llama3.2:1b") + chat_subparser.add_argument("--system", type=str, default="default") + chat_subparser.add_argument("--verbose", "-v", action="store_true") + + def run(self): + # System prompt + system_prompt = """ + Tu es un recruteur expérimenté. Sur la base du CV fourni ci-dessous, rédige une série de questions d’entretien pertinentes pour évaluer le candidat. + Les questions doivent couvrir les aspects suivants : + - Questions techniques liées au domaine du candidat + - Questions comportementales (soft skills, travail en équipe, etc.) + - Questions de motivation ou de culture d’entreprise + Voici le CV de l'utilisateur : + {documents} + """ + + # Load VectorStore + vector_store = Chroma( + embedding_function=OllamaEmbeddings(model="mxbai-embed-large:latest"), + persist_directory="./.store" + ) + + # Requête pour orienter la recherche des documents pertinents + query = "Génère des questions d’entretien à partir du CV" + documents = vector_store.similarity_search(query, k=4) + + # Affichage des documents récupérés (optionnel) + if self.verbose: + self.console.info("Documents chargés :") + for document in documents: + self.console.info(document.page_content) + + # Chargement du modèle + if self.verbose: + self.console.info(f"Chargement du modèle {self.model}...") + + model = init_chat_model( + self.model, + model_provider="ollama", + temperature=0.7, + ) + + # Construction du prompt + prompt = ChatPromptTemplate.from_messages([ + SystemMessagePromptTemplate.from_template(system_prompt), + MessagesPlaceholder(variable_name="messages"), + ]) + + # Construction de la chaîne + chain = prompt | model | StrOutputParser() + + # Message utilisateur simulé pour lancer la génération + user_message = HumanMessage(content="Génère des questions d’entretien adaptées à mon CV.") + self.history.append(user_message) + + # Exécution de la chaîne + self.console.bot_start() + stream = chain.stream({ + "messages": self.history, + "documents": documents + }) + + # Affichage du flux de réponse + bot_message = "" + for chunk in stream: + bot_message += chunk + self.console.bot_chunk(chunk) + self.console.bot_end() + + self.history.append(AIMessage(bot_message)) diff --git a/modes/load_book_mode.py b/modes/load_book_mode.py index 25dab51..38e2161 100644 --- a/modes/load_book_mode.py +++ b/modes/load_book_mode.py @@ -3,7 +3,7 @@ from mode import Mode from langchain_community.document_loaders import PyPDFLoader from langchain_experimental.text_splitter import SemanticChunker -from langchain_openai import OpenAIEmbeddings +from langchain_ollama import OllamaEmbeddings from langchain_core.documents import Document from langchain_chroma import Chroma @@ -30,18 +30,19 @@ def run(self): # Create vector store vector_store = Chroma( - embedding_function=OpenAIEmbeddings(), - persist_directory="./store" + embedding_function=OllamaEmbeddings(model="mxbai-embed-large:latest"), + persist_directory="./.store" ) # Loading text_splitter = SemanticChunker( - embeddings=OpenAIEmbeddings(), + embeddings=OllamaEmbeddings(model="mxbai-embed-large:latest"), ) loader = PyPDFLoader(self.book) for page in loader.lazy_load(): chunks = text_splitter.split_documents([page]) + print(f"Loaded {len(chunks)} chunks from page {page.metadata['page']}") vector_store.add_documents(chunks) \ No newline at end of file diff --git a/modes/load_cv_mode.py b/modes/load_cv_mode.py new file mode 100644 index 0000000..10075af --- /dev/null +++ b/modes/load_cv_mode.py @@ -0,0 +1,47 @@ +from argparse import _SubParsersAction +from console import Console +from mode import Mode +from langchain_community.document_loaders import PyPDFLoader +from langchain_experimental.text_splitter import SemanticChunker +from langchain_ollama import OllamaEmbeddings +from langchain_core.documents import Document +from langchain_chroma import Chroma + + +class LoadCVMode(Mode): + def __init__( + self, + console: Console, + cv: str, + verbose: bool = False): + super().__init__(console) + + self.cv = cv + self.verbose = verbose + + @staticmethod + def add_subparser(name: str, subparser: _SubParsersAction): + load_cv_subparser = subparser.add_parser(name) + load_cv_subparser.add_argument("cv", type=str, help="The cv to load") + load_cv_subparser.add_argument("--verbose", "-v", action="store_true", help="Verbose mode") + + def run(self): + self.console.info(f"Loading cv {self.cv}...") + + # Create vector store + vector_store = Chroma( + embedding_function=OllamaEmbeddings(model="mxbai-embed-large:latest"), + persist_directory="./.store" + ) + + # Loading + text_splitter = SemanticChunker( + embeddings=OllamaEmbeddings(model="mxbai-embed-large:latest"), + ) + + loader = PyPDFLoader(self.cv) + for page in loader.lazy_load(): + chunks = text_splitter.split_documents([page]) + vector_store.add_documents(chunks) + + \ No newline at end of file diff --git a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/data_level0.bin b/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/data_level0.bin deleted file mode 100644 index 27cc0f4..0000000 Binary files a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/data_level0.bin and /dev/null differ diff --git a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/header.bin b/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/header.bin deleted file mode 100644 index c5f0b90..0000000 Binary files a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/header.bin and /dev/null differ diff --git a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/length.bin b/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/length.bin deleted file mode 100644 index ba4322f..0000000 Binary files a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/length.bin and /dev/null differ diff --git a/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/link_lists.bin b/store/7e6e4430-60d1-424b-a5f1-b998102c8f02/link_lists.bin deleted file mode 100644 index e69de29..0000000 diff --git a/store/chroma.sqlite3 b/store/chroma.sqlite3 deleted file mode 100644 index b376f2c..0000000 Binary files a/store/chroma.sqlite3 and /dev/null differ