Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified __pycache__/app.cpython-313.pyc
Binary file not shown.
Binary file modified __pycache__/console.cpython-313.pyc
Binary file not shown.
Binary file not shown.
Binary file modified __pycache__/mode.cpython-313.pyc
Binary file not shown.
71 changes: 71 additions & 0 deletions legal_provisions_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Iterator, Tuple

from re import compile, MULTILINE

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document

class LegalProvisionsLoader():
RE_LEVELS = {
"partie": compile(r'^\s*(Partie [^\n]+)', flags=MULTILINE),
"livre": compile(r'^(Livre [^\n]+)', flags=MULTILINE),
"titre": compile(r'^(Titre [^\n]+)', flags=MULTILINE),
"chapitre": compile(r'^(Chapitre [^\n]+)', flags=MULTILINE),
"article": compile(r'^(Article [^\n]+)', flags=MULTILINE)
}

def __init__(self, file_path: str) -> None:
self.file_path = file_path

self.code, self.partie = self._extract_code_partie()

def _extract_code_partie(self) -> Tuple[str, str]:
with open(self.file_path, 'r', encoding='utf-8') as file:
lines = [next(file) for _ in range(3)]

match = self.RE_LEVELS["partie"].match(lines[2])

return lines[0].strip(), match.group(1).strip() if match else None

def lazy_load(self) -> Iterator[Document]:
current = {
"livre": None,
"titre": None,
"chapitre": None,
"article": None
}
buffer = []

def flush():
if current['article'] and buffer:
yield Document(
page_content=''.join(buffer).strip(),
metadata={
"code": self.code,
"partie": self.partie,
**current,
"source": self.file_path
}
)

with open(self.file_path, 'r', encoding='utf-8') as file:
for _ in range(3):
next(file)

for line in file:
for level in ("livre", "titre", "chapitre"):
match = self.RE_LEVELS[level].match(line)
if match:
current[level] = match.group(1).strip()
break
else:
match = self.RE_LEVELS["article"].match(line)
if match:
yield from flush()

current["article"] = match.group(1).strip()
buffer = [line]
else:
buffer.append(line)

yield from flush()
10 changes: 9 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from modes.load_haiku_mode import LoadHaikuMode
from modes.ask_mode import AskMode

from modes.legal_provisions_mode import LegalProvisionsMode
from modes.load_legal_provisions_mode import LoadLegalProvisionsMode

load_dotenv()

if __name__ == "__main__":
Expand All @@ -30,9 +33,14 @@ def sigkill_handler(sig, frame):

app.use("chat", ChatMode)
app.use("ask", AskMode)

app.use("haiku", HaikuMode)
app.use("load-haiku", LoadHaikuMode)
app.use("load-book", LoadBookMode)

app.use("book", BookMode)
app.use("load-book", LoadBookMode)

app.use("legal-provisions", LegalProvisionsMode)
app.use("load-legal-provisions", LoadLegalProvisionsMode)

app.run()
Binary file modified modes/__pycache__/ask_mode.cpython-313.pyc
Binary file not shown.
Binary file modified modes/__pycache__/book_mode.cpython-313.pyc
Binary file not shown.
Binary file modified modes/__pycache__/chat_mode.cpython-313.pyc
Binary file not shown.
Binary file modified modes/__pycache__/haiku_mode.cpython-313.pyc
Binary file not shown.
Binary file not shown.
Binary file modified modes/__pycache__/load_book_mode.cpython-313.pyc
Binary file not shown.
Binary file modified modes/__pycache__/load_haiku_mode.cpython-313.pyc
Binary file not shown.
Binary file not shown.
147 changes: 147 additions & 0 deletions modes/legal_provisions_mode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from os import getenv

from argparse import _SubParsersAction

from langchain.chat_models import init_chat_model
from langchain.output_parsers import BooleanOutputParser

from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage, BaseMessage

from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma

from mode import Mode
from console import Console

class LegalProvisionsMode(Mode):
history: list[BaseMessage] = []

def __init__(
self,
console: Console,
model: str = "gpt-4o-mini",
system: str = "default",
verbose: bool = False
):
super().__init__(console)

self.model = model
self.system = system
self.verbose = verbose

@staticmethod
def add_subparser(name: str, subparser: _SubParsersAction):
chat_subparser = subparser.add_parser(name)
chat_subparser.add_argument("--model", type=str, default="gpt-4o-mini")
chat_subparser.add_argument("--system", type=str, default="default")
chat_subparser.add_argument("--verbose", "-v", action="store_true")

def __should_retrieve_from_history_only(
self,
model
):
message_prompt = """
Compte tenu de l'historique de conversation, réponds par 'oui' ou par 'non' s'il est possible de répondre à la question de l'utilisateur en fonction des informations déjà disponible dans l'historique.\n
Réponds excclusivement soit 'oui', soit 'non' et rien de plus.
"""

prompt = ChatPromptTemplate.from_messages([
SystemMessagePromptTemplate.from_template(message_prompt),
MessagesPlaceholder(variable_name="history"),
])

chain = prompt | model | BooleanOutputParser(false_val='non', true_val='oui')
response = chain.invoke({ 'history': self.history })

return response


def run(self):
# Load model
if self.verbose:
self.console.info(f"Loading model {self.model}...")

model = init_chat_model(
self.model,
model_provider="openai",
api_key = getenv('OPENAI_API_KEY')
)

# Load vector store
if self.verbose:
self.console.info(f"Loading embedding {getenv('EMBEDDING_MODEL')}")

embeddings = OpenAIEmbeddings(
model=getenv('EMBEDDING_MODEL'),
api_key=getenv('OPENAI_API_KEY')
)

vector_store = Chroma(
collection_name='legal-provisions',
embedding_function=embeddings,
persist_directory="./.store"
)

# System prompt
system_prompt = """
Réponds à la question de l’utilisateur en t’appuyant sur le contenue de l'historique de conversation et en t'appuyant sur les extraits du code de l'action sociale et des familles fourni ci-dessous.

Voici les extraits pertinents des articles de loi relatifs à la question de l’utilisateur :

{documents}
"""

if self.verbose:
self.console.system_output(system_prompt)

# Create prompt
prompt = ChatPromptTemplate.from_messages([
SystemMessagePromptTemplate.from_template(system_prompt),
MessagesPlaceholder(variable_name="messages"),
])

# Create chain
chain = prompt | model | StrOutputParser()

while True:
user_input = self.console.human_input()
self.history.append(HumanMessage(user_input))

should_retrieve = self.__should_retrieve_from_history_only(model=model)
if self.verbose:
self.console.info(f"Should retrive from history only : {should_retrieve}")

if not should_retrieve:
documents = vector_store.similarity_search(query=user_input, k=5)

if self.verbose:
for document in documents:
self.console.info(f'{document} \n')

self.console.bot_start()
stream = chain.stream({
"messages": self.history,
"documents": documents
})
bot_message = ""
for chunk in stream:
bot_message += chunk
self.console.bot_chunk(chunk=chunk)
self.console.bot_end()

self.history.append(AIMessage(content=bot_message))
else:
self.console.bot_start()
stream = chain.stream({
"messages": self.history,
"documents": None
})
bot_message = ""
for chunk in stream:
bot_message += chunk
self.console.bot_chunk(chunk=chunk)
self.console.bot_end()

self.history.append(AIMessage(content=bot_message))
46 changes: 46 additions & 0 deletions modes/load_legal_provisions_mode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from os import path, getenv

from argparse import _SubParsersAction

from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma

from mode import Mode
from console import Console
from legal_provisions_loader import LegalProvisionsLoader

class LoadLegalProvisionsMode(Mode):
def __init__(
self,
console: Console,
book: str,
verbose: bool = False):
super().__init__(console)

self.book = book
self.verbose = verbose

@staticmethod
def add_subparser(name: str, subparser: _SubParsersAction):
load_book_subparser = subparser.add_parser(name)
load_book_subparser.add_argument("book", type=str, help="The book to load")
load_book_subparser.add_argument("--verbose", "-v", action="store_true", help="Verbose mode")

def run(self):
self.console.info(f"Loading book {self.book}...")

loader = LegalProvisionsLoader(self.book)

embeddings = OpenAIEmbeddings(
model = getenv('EMBEDDING_MODEL'),
api_key = getenv('OPENAI_API_KEY')
)

vector_store = Chroma(
collection_name='legal-provisions',
embedding_function=embeddings,
persist_directory=getenv('VECTOR_STORE_DATA')
)

for doc in loader.lazy_load():
vector_store.add_documents([doc])
Binary file not shown.
Binary file removed store/7e6e4430-60d1-424b-a5f1-b998102c8f02/header.bin
Binary file not shown.
Binary file not shown.
Empty file.
Binary file modified store/chroma.sqlite3
Binary file not shown.