From cb58c31f520d4d620895c1c695b67648f723a142 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 20 Nov 2024 01:57:26 +0000 Subject: [PATCH 1/5] fixed the continuous input --- ChatQnA/chatqna.py | 60 +++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 95318e9613..f409fb54bb 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -64,6 +64,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs = {} next_inputs["model"] = LLM_MODEL next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] + print('LLM INPUT -----------------------------', next_inputs['messages']) next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] next_inputs["stream"] = inputs["streaming"] @@ -155,29 +156,56 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di return next_data - def align_generator(self, gen, **kwargs): + def split_lines(line): + """ + Split line into individual `data:` segments if multiple `data:` sections exist. + """ + parts = line.split("data:") + return [f"data:{part.strip()}\n\n" for part in parts if part.strip()] # openai reaponse format # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n' for line in gen: line = line.decode("utf-8") - start = line.find("{") - end = line.rfind("}") + 1 - - json_str = line[start:end] - try: - # sometimes yield empty chunk, do a fallback here - json_data = json.loads(json_str) - if ( - json_data["choices"][0]["finish_reason"] != "eos_token" - and "content" in json_data["choices"][0]["delta"] - ): - yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" - except Exception as e: - yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + + if line.count("data:") > 1: + split_data = split_lines(line) + + for part in split_data: + print("split_data-------------", part) + start = part.find("{") + end = part.rfind("}") + 1 + + json_str = part[start:end] + try: + # sometimes yield empty chunk, do a fallback here + json_data = json.loads(json_str) + if ( + json_data["choices"][0]["finish_reason"] != "eos_token" + and "content" in json_data["choices"][0]["delta"] + ): + yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" + except Exception as e: + yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + else: + print("line-------------", line) + start = line.find("{") + end = line.rfind("}") + 1 + + json_str = line[start:end] + try: + # sometimes yield empty chunk, do a fallback here + json_data = json.loads(json_str) + if ( + json_data["choices"][0]["finish_reason"] != "eos_token" + and "content" in json_data["choices"][0]["delta"] + ): + yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" + except Exception as e: + yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + yield "data: [DONE]\n\n" - class ChatQnAService: def __init__(self, host="0.0.0.0", port=8000): self.host = host From 5a87bceda23ef0f0160a9a34549d40dd8e565722 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Tue, 19 Nov 2024 18:02:15 -0800 Subject: [PATCH 2/5] added chatqna.py --- ChatQnA/chatqna.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index f409fb54bb..dd5afaa026 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -48,6 +48,7 @@ def generate_rag_prompt(question, documents): LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0") LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80)) LLM_MODEL = os.getenv("LLM_MODEL", "Intel/neural-chat-7b-v3-3") +LLM_PROMPT = os.getenv("LLM_PROMPT", None) def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): @@ -63,8 +64,11 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k # convert TGI/vLLM to unified OpenAI /v1/chat/completions format next_inputs = {} next_inputs["model"] = LLM_MODEL - next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] - print('LLM INPUT -----------------------------', next_inputs['messages']) + if LLM_PROMPT is None: + next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] + else: + next_inputs['messages'] = [{"role": "user", "content": LLM_PROMPT}] + next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] next_inputs["stream"] = inputs["streaming"] From 64ab7f124a317b21e8a32f7c2de2e2c038bc55bb Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 27 Nov 2024 17:58:09 -0800 Subject: [PATCH 3/5] added RAG FLAG --- ChatQnA/chatqna.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index dd5afaa026..9c5d4e9761 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -49,6 +49,7 @@ def generate_rag_prompt(question, documents): LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80)) LLM_MODEL = os.getenv("LLM_MODEL", "Intel/neural-chat-7b-v3-3") LLM_PROMPT = os.getenv("LLM_PROMPT", None) +RAG = os.getenv("RAG", False) def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): @@ -65,9 +66,24 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs = {} next_inputs["model"] = LLM_MODEL if LLM_PROMPT is None: - next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] + if RAG == True: + print(f"LLM_PROMPT is None, RAG == True, INPUTS = {inputs['inputs']} \n\n") + question_index = inputs["inputs"].find("\n\n### Question:") + cleaned_query = inputs["inputs"][:question_index + len("\n\n### Question:")] + print(f"LLM_PROMPT is None, RAG == True, CLEANED_QUERY: {cleaned_query} \n\n") + next_inputs["messages"] = [{"role": "user", "content": cleaned_query}] + else: + next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] + else: - next_inputs['messages'] = [{"role": "user", "content": LLM_PROMPT}] + if RAG == True: + print(f"LLM_PROMPT is not None, RAG == True, INPUTS = {inputs['inputs']} \n\n") + question_index = inputs["inputs"].find("\n\n### Question:") + cleaned_query = inputs["inputs"][:question_index + len("\n\n### Question:")] + print(f"LLM_PROMPT is not None, RAG == True, CLEANED_QUERY: {cleaned_query} \n\n") + next_inputs["messages"] = [{"role": "user", "content": cleaned_query}] + else: + next_inputs['messages'] = [{"role": "user", "content": LLM_PROMPT}] next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] From c101a51ecb0bf506d4cbba975b03af6cd225637c Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 27 Nov 2024 19:03:09 -0800 Subject: [PATCH 4/5] rename RAG env --- ChatQnA/chatqna.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 9c5d4e9761..bc5f69e927 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -49,7 +49,7 @@ def generate_rag_prompt(question, documents): LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80)) LLM_MODEL = os.getenv("LLM_MODEL", "Intel/neural-chat-7b-v3-3") LLM_PROMPT = os.getenv("LLM_PROMPT", None) -RAG = os.getenv("RAG", False) +RAG = os.getenv("RAG", None) def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): @@ -66,7 +66,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs = {} next_inputs["model"] = LLM_MODEL if LLM_PROMPT is None: - if RAG == True: + if RAG == "enabled": print(f"LLM_PROMPT is None, RAG == True, INPUTS = {inputs['inputs']} \n\n") question_index = inputs["inputs"].find("\n\n### Question:") cleaned_query = inputs["inputs"][:question_index + len("\n\n### Question:")] @@ -76,7 +76,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] else: - if RAG == True: + if RAG == "enabled": print(f"LLM_PROMPT is not None, RAG == True, INPUTS = {inputs['inputs']} \n\n") question_index = inputs["inputs"].find("\n\n### Question:") cleaned_query = inputs["inputs"][:question_index + len("\n\n### Question:")] From 205ba43e9fa19f3e6dbafefb31f28e5bc9f181d8 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 28 Nov 2024 02:28:11 -0800 Subject: [PATCH 5/5] removed ### --- ChatQnA/chatqna.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index bc5f69e927..b90807a869 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -68,8 +68,8 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k if LLM_PROMPT is None: if RAG == "enabled": print(f"LLM_PROMPT is None, RAG == True, INPUTS = {inputs['inputs']} \n\n") - question_index = inputs["inputs"].find("\n\n### Question:") - cleaned_query = inputs["inputs"][:question_index + len("\n\n### Question:")] + question_index = inputs["inputs"].find("### Question:") + cleaned_query = inputs["inputs"][:question_index + len("### Question:")] print(f"LLM_PROMPT is None, RAG == True, CLEANED_QUERY: {cleaned_query} \n\n") next_inputs["messages"] = [{"role": "user", "content": cleaned_query}] else: @@ -78,8 +78,8 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k else: if RAG == "enabled": print(f"LLM_PROMPT is not None, RAG == True, INPUTS = {inputs['inputs']} \n\n") - question_index = inputs["inputs"].find("\n\n### Question:") - cleaned_query = inputs["inputs"][:question_index + len("\n\n### Question:")] + question_index = inputs["inputs"].find("### Question:") + cleaned_query = inputs["inputs"][:question_index + len("### Question:")] print(f"LLM_PROMPT is not None, RAG == True, CLEANED_QUERY: {cleaned_query} \n\n") next_inputs["messages"] = [{"role": "user", "content": cleaned_query}] else: @@ -208,7 +208,6 @@ def split_lines(line): except Exception as e: yield f"data: {repr(json_str.encode('utf-8'))}\n\n" else: - print("line-------------", line) start = line.find("{") end = line.rfind("}") + 1