From 928393b4abd7d69272dd7293cf78a24ec07a139c Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 5 Nov 2025 19:03:04 +0800 Subject: [PATCH 1/3] modify bug --- src/memos/mem_reader/simple_struct.py | 22 +++++++++++-------- src/memos/templates/instruction_completion.py | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 0f74adead..2d5fc0296 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -67,9 +67,17 @@ def detect_lang(text): try: if not text or not isinstance(text, str): return "en" + cleaned_text = text + # remove role and timestamp + cleaned_text = re.sub(r'\b(user|assistant|query|answer)\s*:', '', cleaned_text, flags=re.IGNORECASE) + cleaned_text = re.sub(r'\[[\d\-:\s]+\]', '', cleaned_text) + + # extract chinese characters chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" - chinese_chars = re.findall(chinese_pattern, text) - if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3: + chinese_chars = re.findall(chinese_pattern, cleaned_text) + text_without_special = re.sub(r"[\s\d\W]", "", cleaned_text) + print(text_without_special) + if text_without_special and len(chinese_chars) / len(text_without_special) > 0.3: return "zh" return "en" except Exception: @@ -466,15 +474,11 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]: if type == "chat": for items in scene_data: result = [] - for item in items: - # Convert dictionary to string - if "chat_time" in item: - result.append(item) - else: - result.append(item) + for i, item in enumerate(items): + result.append(item) if len(result) >= 10: results.append(result) - context = copy.deepcopy(result[-2:]) + context = copy.deepcopy(result[-2:]) if i + 1 < len(items) else [] result = context if result: results.append(result) diff --git a/src/memos/templates/instruction_completion.py b/src/memos/templates/instruction_completion.py index 03ae52c77..b84b79ed2 100644 --- a/src/memos/templates/instruction_completion.py +++ b/src/memos/templates/instruction_completion.py @@ -45,7 +45,7 @@ def instruct_completion( "zh": "隐式偏好 > ", "en": "implicit preference > ", } - lang = detect_lang(explicit_pref_str + implicit_pref_str) + lang = detect_lang(explicit_pref_str.replace("Explicit Preference:\n", "") + implicit_pref_str.replace("Implicit Preference:\n", "")) if not explicit_pref_str and not implicit_pref_str: return "", "" From f417cd07b05b1b7c9e8262b54bf7f14bd853c542 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 5 Nov 2025 19:04:47 +0800 Subject: [PATCH 2/3] modify bug --- src/memos/mem_reader/simple_struct.py | 8 +++++--- src/memos/templates/instruction_completion.py | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 2d5fc0296..624353c63 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -69,9 +69,11 @@ def detect_lang(text): return "en" cleaned_text = text # remove role and timestamp - cleaned_text = re.sub(r'\b(user|assistant|query|answer)\s*:', '', cleaned_text, flags=re.IGNORECASE) - cleaned_text = re.sub(r'\[[\d\-:\s]+\]', '', cleaned_text) - + cleaned_text = re.sub( + r"\b(user|assistant|query|answer)\s*:", "", cleaned_text, flags=re.IGNORECASE + ) + cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text) + # extract chinese characters chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" chinese_chars = re.findall(chinese_pattern, cleaned_text) diff --git a/src/memos/templates/instruction_completion.py b/src/memos/templates/instruction_completion.py index b84b79ed2..b88ff474c 100644 --- a/src/memos/templates/instruction_completion.py +++ b/src/memos/templates/instruction_completion.py @@ -45,7 +45,10 @@ def instruct_completion( "zh": "隐式偏好 > ", "en": "implicit preference > ", } - lang = detect_lang(explicit_pref_str.replace("Explicit Preference:\n", "") + implicit_pref_str.replace("Implicit Preference:\n", "")) + lang = detect_lang( + explicit_pref_str.replace("Explicit Preference:\n", "") + + implicit_pref_str.replace("Implicit Preference:\n", "") + ) if not explicit_pref_str and not implicit_pref_str: return "", "" From e0b4d8c63663574bf6dd836dfa3454e1daff7383 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 5 Nov 2025 19:27:03 +0800 Subject: [PATCH 3/3] remove print --- src/memos/mem_reader/simple_struct.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 624353c63..13515c038 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -78,7 +78,6 @@ def detect_lang(text): chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]" chinese_chars = re.findall(chinese_pattern, cleaned_text) text_without_special = re.sub(r"[\s\d\W]", "", cleaned_text) - print(text_without_special) if text_without_special and len(chinese_chars) / len(text_without_special) > 0.3: return "zh" return "en"