From a0dfab482f4cd93605628719c04568e6678ff46d Mon Sep 17 00:00:00 2001
From: YumemiDream <1803068130@qq.com>
Date: Thu, 4 Jun 2026 19:41:19 +0800
Subject: [PATCH 1/6] fix: prevent plugin from overwriting manually edited
 jargon meanings

- Add meaning_edited flag to Jargon ORM and dataclass
- Set meaning_edited=True when user edits meaning via dashboard
- _should_infer_meaning skips jargon with meaning_edited=True
- Auto-migration will add the new column on startup
---
 models/jargon.py                           | 2 ++
 models/orm/jargon.py                       | 2 ++
 services/database/facades/jargon_facade.py | 4 ++++
 services/jargon/jargon_miner.py            | 3 +++
 webui/services/jargon_service.py           | 1 +
 5 files changed, 12 insertions(+)

diff --git a/models/jargon.py b/models/jargon.py
index a3272c5f..109b1d0b 100644
--- a/models/jargon.py
+++ b/models/jargon.py
@@ -20,6 +20,7 @@ class Jargon:
     count: int = 1                          # 出现次数
     last_inference_count: int = 0           # 上次推断时的count值
     is_complete: bool = False               # 是否完成所有推断 (count>=100)
+    meaning_edited: bool = False            # 用户是否手动编辑过释义
     is_global: bool = False                 # 是否全局黑话
     chat_id: str = ""                       # 群组ID
     created_at: Optional[datetime] = None   # 创建时间
@@ -36,6 +37,7 @@ def to_dict(self) -> dict:
             'count': self.count,
             'last_inference_count': self.last_inference_count,
             'is_complete': self.is_complete,
+            'meaning_edited': self.meaning_edited,
             'is_global': self.is_global,
             'chat_id': self.chat_id,
             'created_at': self.created_at.isoformat() if self.created_at else None,
diff --git a/models/orm/jargon.py b/models/orm/jargon.py
index b8cf462f..cf313279 100644
--- a/models/orm/jargon.py
+++ b/models/orm/jargon.py
@@ -19,6 +19,7 @@ class Jargon(Base):
     count = Column(Integer, default=1)
     last_inference_count = Column(Integer, default=0)
     is_complete = Column(Boolean, default=False)
+    meaning_edited = Column(Boolean, default=False)
     is_global = Column(Boolean, default=False)
     chat_id = Column(String(255), nullable=False, index=True)
     # 使用 BigInteger 存储 Unix 时间戳（自动迁移会将 DATETIME 转换为 BIGINT）
@@ -46,6 +47,7 @@ def to_dict(self):
             'count': self.count,
             'last_inference_count': self.last_inference_count,
             'is_complete': self.is_complete,
+            'meaning_edited': self.meaning_edited,
             'is_global': self.is_global,
             'chat_id': self.chat_id,
             'created_at': self.created_at,
diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py
index f978fd59..448e1d20 100644
--- a/services/database/facades/jargon_facade.py
+++ b/services/database/facades/jargon_facade.py
@@ -164,6 +164,9 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
                         record.meaning = json.dumps(meaning_val, ensure_ascii=False)
                     else:
                         record.meaning = str(meaning_val) if meaning_val is not None else None
+                    # Only mark meaning_edited when explicitly set (not from inference)
+                    if jargon_data.get('meaning_edited'):
+                        record.meaning_edited = True
                 if 'is_jargon' in jargon_data:
                     record.is_jargon = jargon_data['is_jargon']
                 if 'count' in jargon_data:
@@ -476,6 +479,7 @@ async def search_jargon(
                         'is_jargon': r.is_jargon,
                         'count': r.count or 0,
                         'is_complete': r.is_complete,
+                        'meaning_edited': r.meaning_edited or False,
                         'is_global': r.is_global or False,
                         'chat_id': r.chat_id,
                         'created_at': r.created_at,
diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py
index 97ae33a2..b30002f1 100644
--- a/services/jargon/jargon_miner.py
+++ b/services/jargon/jargon_miner.py
@@ -299,6 +299,8 @@ def _should_infer_meaning(self, jargon: Jargon) -> bool:
         """
         if jargon.is_complete:
             return False
+        if jargon.meaning_edited:
+            return False
 
         count = jargon.count or 0
         last_inference = jargon.last_inference_count or 0
@@ -512,6 +514,7 @@ async def save_or_update_jargon(
                     count=existing_dict.get('count', 1),
                     last_inference_count=existing_dict.get('last_inference_count', 0),
                     is_complete=existing_dict.get('is_complete', False),
+                    meaning_edited=existing_dict.get('meaning_edited', False),
                     is_global=existing_dict.get('is_global', False),
                     chat_id=existing_dict.get('chat_id', ''),
                     created_at=existing_dict.get('created_at'),
diff --git a/webui/services/jargon_service.py b/webui/services/jargon_service.py
index ef4badb9..2f82192e 100644
--- a/webui/services/jargon_service.py
+++ b/webui/services/jargon_service.py
@@ -352,6 +352,7 @@ async def update_jargon(
                 payload["content"] = content
             if meaning is not None:
                 payload["meaning"] = meaning
+                payload["meaning_edited"] = True
 
             if len(payload) <= 1:
                 return False, "没有需要更新的字段", self._format_jargon_for_frontend(current)

From 0194dd411170b5c17afdc1fcc51f3ea3654d694f Mon Sep 17 00:00:00 2001
From: YumemiDream <1803068130@qq.com>
Date: Thu, 4 Jun 2026 19:58:42 +0800
Subject: [PATCH 2/6] fix: change jargon count to actual chat occurrence
 frequency

- Add sync_jargon_counts to facade: bulk-update count from statistical
  filter's term frequency table
- mine_jargon syncs filter frequencies to DB before inference
- Remove manual count+1 in save_or_update_jargon (count now managed
  by frequency sync)
- Inference thresholds [3,6,10,20,40,60,100] now reflect actual chat
  occurrences, not LLM validation pass count
---
 services/database/facades/jargon_facade.py    | 46 +++++++++++++++++++
 .../database/sqlalchemy_database_manager.py   |  3 ++
 services/jargon/jargon_miner.py               |  3 +-
 services/learning/message_pipeline.py         | 10 ++++
 4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py
index 448e1d20..cba48273 100644
--- a/services/database/facades/jargon_facade.py
+++ b/services/database/facades/jargon_facade.py
@@ -195,6 +195,52 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
             self._logger.error(f"[JargonFacade] 更新黑话失败: {e}", exc_info=True)
             return False
 
+    # 3b. sync_jargon_counts
+    async def sync_jargon_counts(
+        self, chat_id: str, term_frequencies: Dict[str, int]
+    ) -> int:
+        """Sync occurrence counts for existing jargon in a group.
+
+        Only updates jargon terms that already exist in DB for *chat_id*.
+        Returns the number of records updated.
+
+        Args:
+            chat_id: Group ID.
+            term_frequencies: ``{term: actual_chat_count}`` from the
+                statistical filter.
+        """
+        if not term_frequencies:
+            return 0
+
+        updated = 0
+        try:
+            async with self.get_session() as session:
+                stmt = select(Jargon).where(
+                    Jargon.chat_id == chat_id,
+                    Jargon.content.in_(list(term_frequencies.keys())),
+                )
+                result = await session.execute(stmt)
+                records = result.scalars().all()
+
+                now = int(time.time())
+                for record in records:
+                    new_count = term_frequencies.get(record.content, 0)
+                    if new_count > (record.count or 0):
+                        record.count = new_count
+                        record.updated_at = now
+                        updated += 1
+
+                if updated:
+                    await session.commit()
+                    self._logger.debug(
+                        f"[JargonFacade] 同步黑话出现次数: "
+                        f"chat={chat_id}, updated={updated}"
+                    )
+        except Exception as e:
+            self._logger.error(f"[JargonFacade] 同步黑话出现次数失败: {e}", exc_info=True)
+
+        return updated
+
     # 4. get_jargon_statistics
     async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]:
         """获取黑话学习统计信息
diff --git a/services/database/sqlalchemy_database_manager.py b/services/database/sqlalchemy_database_manager.py
index 82fc5b4a..e13af6bc 100644
--- a/services/database/sqlalchemy_database_manager.py
+++ b/services/database/sqlalchemy_database_manager.py
@@ -960,6 +960,9 @@ async def insert_jargon(self, jargon_data: Dict[str, Any]) -> Optional[int]:
     async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
         return await self._call_jargon("update_jargon", False, jargon_data)
 
+    async def sync_jargon_counts(self, chat_id: str, term_frequencies: Dict[str, int]) -> int:
+        return await self._call_jargon("sync_jargon_counts", 0, chat_id, term_frequencies)
+
     async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]:
         return await self._call_jargon(
             "get_jargon_statistics",
diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py
index b30002f1..589d59d2 100644
--- a/services/jargon/jargon_miner.py
+++ b/services/jargon/jargon_miner.py
@@ -521,8 +521,7 @@ async def save_or_update_jargon(
                     updated_at=existing_dict.get('updated_at')
                 )
 
-                # 更新现有记录
-                existing.count = (existing.count or 0) + 1
+                # 更新现有记录（count 由频率同步管理，此处不递增）
 
                 # 合并 raw_content
                 existing_list = safe_parse_llm_json(existing.raw_content) or []
diff --git a/services/learning/message_pipeline.py b/services/learning/message_pipeline.py
index 6e8be3a2..3be8d547 100644
--- a/services/learning/message_pipeline.py
+++ b/services/learning/message_pipeline.py
@@ -256,6 +256,16 @@ async def mine_jargon(self, group_id: str) -> None:
                 if not statistical_candidates:
                     statistical_candidates = None
 
+                # Sync actual chat occurrence counts to DB
+                term_freq = self._jargon_statistical_filter._group_term_freq.get(group_id)
+                if term_freq:
+                    try:
+                        await self._db_manager.sync_jargon_counts(
+                            group_id, dict(term_freq)
+                        )
+                    except Exception as e:
+                        logger.debug(f"[JargonMining] Frequency sync failed: {e}")
+
             await jargon_miner.run_once(
                 chat_messages,
                 len(recent_messages),

From 4ef77bf7761bf310e2d9ee9f201be93d3181d0bb Mon Sep 17 00:00:00 2001
From: YumemiDream <1803068130@qq.com>
Date: Thu, 4 Jun 2026 20:08:37 +0800
Subject: [PATCH 3/6] Revert "fix: change jargon count to actual chat
 occurrence frequency"

This reverts commit 0194dd411170b5c17afdc1fcc51f3ea3654d694f.
---
 services/database/facades/jargon_facade.py    | 46 -------------------
 .../database/sqlalchemy_database_manager.py   |  3 --
 services/jargon/jargon_miner.py               |  3 +-
 services/learning/message_pipeline.py         | 10 ----
 4 files changed, 2 insertions(+), 60 deletions(-)

diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py
index cba48273..448e1d20 100644
--- a/services/database/facades/jargon_facade.py
+++ b/services/database/facades/jargon_facade.py
@@ -195,52 +195,6 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
             self._logger.error(f"[JargonFacade] 更新黑话失败: {e}", exc_info=True)
             return False
 
-    # 3b. sync_jargon_counts
-    async def sync_jargon_counts(
-        self, chat_id: str, term_frequencies: Dict[str, int]
-    ) -> int:
-        """Sync occurrence counts for existing jargon in a group.
-
-        Only updates jargon terms that already exist in DB for *chat_id*.
-        Returns the number of records updated.
-
-        Args:
-            chat_id: Group ID.
-            term_frequencies: ``{term: actual_chat_count}`` from the
-                statistical filter.
-        """
-        if not term_frequencies:
-            return 0
-
-        updated = 0
-        try:
-            async with self.get_session() as session:
-                stmt = select(Jargon).where(
-                    Jargon.chat_id == chat_id,
-                    Jargon.content.in_(list(term_frequencies.keys())),
-                )
-                result = await session.execute(stmt)
-                records = result.scalars().all()
-
-                now = int(time.time())
-                for record in records:
-                    new_count = term_frequencies.get(record.content, 0)
-                    if new_count > (record.count or 0):
-                        record.count = new_count
-                        record.updated_at = now
-                        updated += 1
-
-                if updated:
-                    await session.commit()
-                    self._logger.debug(
-                        f"[JargonFacade] 同步黑话出现次数: "
-                        f"chat={chat_id}, updated={updated}"
-                    )
-        except Exception as e:
-            self._logger.error(f"[JargonFacade] 同步黑话出现次数失败: {e}", exc_info=True)
-
-        return updated
-
     # 4. get_jargon_statistics
     async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]:
         """获取黑话学习统计信息
diff --git a/services/database/sqlalchemy_database_manager.py b/services/database/sqlalchemy_database_manager.py
index e13af6bc..82fc5b4a 100644
--- a/services/database/sqlalchemy_database_manager.py
+++ b/services/database/sqlalchemy_database_manager.py
@@ -960,9 +960,6 @@ async def insert_jargon(self, jargon_data: Dict[str, Any]) -> Optional[int]:
     async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
         return await self._call_jargon("update_jargon", False, jargon_data)
 
-    async def sync_jargon_counts(self, chat_id: str, term_frequencies: Dict[str, int]) -> int:
-        return await self._call_jargon("sync_jargon_counts", 0, chat_id, term_frequencies)
-
     async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]:
         return await self._call_jargon(
             "get_jargon_statistics",
diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py
index 589d59d2..b30002f1 100644
--- a/services/jargon/jargon_miner.py
+++ b/services/jargon/jargon_miner.py
@@ -521,7 +521,8 @@ async def save_or_update_jargon(
                     updated_at=existing_dict.get('updated_at')
                 )
 
-                # 更新现有记录（count 由频率同步管理，此处不递增）
+                # 更新现有记录
+                existing.count = (existing.count or 0) + 1
 
                 # 合并 raw_content
                 existing_list = safe_parse_llm_json(existing.raw_content) or []
diff --git a/services/learning/message_pipeline.py b/services/learning/message_pipeline.py
index 3be8d547..6e8be3a2 100644
--- a/services/learning/message_pipeline.py
+++ b/services/learning/message_pipeline.py
@@ -256,16 +256,6 @@ async def mine_jargon(self, group_id: str) -> None:
                 if not statistical_candidates:
                     statistical_candidates = None
 
-                # Sync actual chat occurrence counts to DB
-                term_freq = self._jargon_statistical_filter._group_term_freq.get(group_id)
-                if term_freq:
-                    try:
-                        await self._db_manager.sync_jargon_counts(
-                            group_id, dict(term_freq)
-                        )
-                    except Exception as e:
-                        logger.debug(f"[JargonMining] Frequency sync failed: {e}")
-
             await jargon_miner.run_once(
                 chat_messages,
                 len(recent_messages),

From 28a9de16ac82ac1ba39d83a8683014400710efc5 Mon Sep 17 00:00:00 2001
From: YumemiDream <1803068130@qq.com>
Date: Thu, 4 Jun 2026 20:09:16 +0800
Subject: [PATCH 4/6] fix: remove jargon sort by occurrences (count not
 reliable)

---
 web_res/static/html/dashboard.html | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/web_res/static/html/dashboard.html b/web_res/static/html/dashboard.html
index 71368b6b..011c1c87 100644
--- a/web_res/static/html/dashboard.html
+++ b/web_res/static/html/dashboard.html
@@ -2489,7 +2489,6 @@ <h2>黑话与批次</h2>
                             <option value="newest">最新优先</option>
                             <option value="oldest">最早优先</option>
                             <option value="name">按名称</option>
-                            <option value="occurrences">按出现次数</option>
                         </select>
                         <input id="jargonSearchInput" class="content-search" style="width:160px;height:32px;font-size:12px;" type="search" placeholder="搜索黑话" autocomplete="off">
                     </div>
@@ -6399,8 +6398,6 @@ <h3>${escapeHtml(item.title)}</h3>
                         items.reverse();
                     } else if (state.jargon.sort === 'name') {
                         items.sort((a, b) => (a.term || a.word || '').localeCompare(b.term || b.word || ''));
-                    } else if (state.jargon.sort === 'occurrences') {
-                        items.sort((a, b) => (b.occurrences || 0) - (a.occurrences || 0));
                     }
 
                     state.jargon.items = items;

From 6a9b68f71698b3a8aa5af4642f5bdff96f6dcbdf Mon Sep 17 00:00:00 2001
From: YumemiDream <1803068130@qq.com>
Date: Thu, 4 Jun 2026 20:26:39 +0800
Subject: [PATCH 5/6] fix: prioritize real dialogue pairs over LLM-generated
 patterns

Style learning now extracts user->bot pairs from actual chat history
first (chronologically matched), falling back to LLM-generated
expression patterns only when no real pairs are found.
---
 services/core_learning/progressive_learning.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/services/core_learning/progressive_learning.py b/services/core_learning/progressive_learning.py
index c220cb97..0087c013 100644
--- a/services/core_learning/progressive_learning.py
+++ b/services/core_learning/progressive_learning.py
@@ -1206,13 +1206,9 @@ async def _save_style_learning_record(self, group_id: str, style_analysis: Dict[
                 logger.debug(f"群组 {group_id} 没有风格分析结果且没有消息，跳过风格学习记录保存")
                 return
 
-            # 1. 保存表达模式到 expression_patterns 表
-            expression_patterns = style_analysis_dict.get('expression_patterns', [])
-            expression_patterns = self._filter_expression_patterns(expression_patterns)
-
-            # 在 fewshot 模式下，style_analysis 可能不包含 expression_patterns。
-            # 此时从数据库获取 bot 消息与用户消息合并，提取 user->bot 对话对。
-            if not expression_patterns and messages:
+            # 1. 优先从真实对话中提取 user->bot 对话对（逻辑连贯）
+            expression_patterns = []
+            if messages:
                 try:
                     merged = await self._merge_bot_messages_for_pairs(group_id, messages)
                     if merged:
@@ -1220,6 +1216,11 @@ async def _save_style_learning_record(self, group_id: str, style_analysis: Dict[
                 except Exception as pair_err:
                     logger.debug(f"提取 fewshot 对话对失败: {pair_err}")
 
+            # 真实对话对不足时，回退到 LLM 生成的表达模式
+            if not expression_patterns:
+                expression_patterns = style_analysis_dict.get('expression_patterns', [])
+                expression_patterns = self._filter_expression_patterns(expression_patterns)
+
             if expression_patterns:
                 await self._save_expression_patterns(group_id, expression_patterns)
 

From adaffe25d88bb210ea9deeff9ec348d4591cdf69 Mon Sep 17 00:00:00 2001
From: YumemiDream <1803068130@qq.com>
Date: Thu, 4 Jun 2026 20:39:15 +0800
Subject: [PATCH 6/6] fix: deduplicate style learning dialogue pairs at
 extraction and injection

- Extraction: _extract_fewshot_pairs_from_merged deduplicates by
  (situation, expression) content within a single batch
- Extraction: _extract_style_dialog_pairs deduplicates learned_patterns
  and few-shot pairs
- Injection: _build_style_begin_dialogs checks existing begin_dialogs
  for matching user messages before appending, preventing duplicates
  across multiple approved reviews
---
 .../core_learning/progressive_learning.py     |  7 +++++
 webui/services/persona_review_service.py      | 27 +++++++++++++++----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/services/core_learning/progressive_learning.py b/services/core_learning/progressive_learning.py
index 0087c013..3a551598 100644
--- a/services/core_learning/progressive_learning.py
+++ b/services/core_learning/progressive_learning.py
@@ -1373,8 +1373,10 @@ def _extract_fewshot_pairs_from_merged(
 
         Mirrors the logic of ExpressionPatternLearner._extract_few_shot_pairs
         but operates on plain dicts and returns expression pattern dicts.
+        Deduplicates by (situation, expression) content.
         """
         pairs = []
+        seen = set()
         current_time = time.time()
 
         for i in range(len(merged) - 1):
@@ -1400,6 +1402,11 @@ def _extract_fewshot_pairs_from_merged(
                 if '@' in msg_text or '@' in nxt_text:
                     continue
 
+                key = (msg_text[:50], nxt_text[:100])
+                if key in seen:
+                    continue
+                seen.add(key)
+
                 pairs.append({
                     'situation': msg_text[:50],
                     'expression': nxt_text[:100],
diff --git a/webui/services/persona_review_service.py b/webui/services/persona_review_service.py
index f6f06348..ce8bad5d 100644
--- a/webui/services/persona_review_service.py
+++ b/webui/services/persona_review_service.py
@@ -176,19 +176,27 @@ def _build_change_payload(
 
     @staticmethod
     def _extract_style_dialog_pairs(review: Dict[str, Any]) -> List[Tuple[str, str]]:
-        """Extract style review dialog pairs from structured patterns or few-shot text."""
+        """Extract style review dialog pairs from structured patterns or few-shot text. Deduplicates by content."""
         dialog_pairs = []
+        seen = set()
         learned_patterns = review.get('learned_patterns', [])
         for pattern in learned_patterns:
             situation = pattern.get('situation', '') if isinstance(pattern, dict) else ''
             expression = pattern.get('expression', '') if isinstance(pattern, dict) else ''
             if situation and expression:
-                dialog_pairs.append((str(situation), str(expression)))
+                key = (str(situation), str(expression))
+                if key not in seen:
+                    seen.add(key)
+                    dialog_pairs.append(key)
 
         if not dialog_pairs:
-            dialog_pairs = PersonaReviewService._parse_few_shots_to_pairs(
+            for user_msg, assistant_msg in PersonaReviewService._parse_few_shots_to_pairs(
                 review.get('few_shots_content', '') or ''
-            )
+            ):
+                key = (user_msg, assistant_msg)
+                if key not in seen:
+                    seen.add(key)
+                    dialog_pairs.append(key)
         return dialog_pairs
 
     def _dialog_pairs_for_style_review(self, review: Dict[str, Any]) -> List[Tuple[str, str]]:
@@ -199,10 +207,19 @@ def _build_style_begin_dialogs(
         current_begin_dialogs: List[str],
         dialog_pairs: List[Tuple[str, str]]
     ) -> List[str]:
-        """Append style examples and keep only latest style example pairs."""
+        """Append style examples, skipping pairs already present in begin_dialogs."""
         updated_dialogs = PersonaReviewService._normalize_begin_dialogs(current_begin_dialogs)
 
+        # Build set of existing user messages for dedup
+        existing_user_msgs = set()
+        for d in updated_dialogs:
+            if isinstance(d, str) and d.startswith(STYLE_BEGIN_DIALOG_PREFIX):
+                existing_user_msgs.add(d[len(STYLE_BEGIN_DIALOG_PREFIX):].strip())
+
         for user_msg, assistant_msg in dialog_pairs:
+            if user_msg.strip() in existing_user_msgs:
+                continue
+            existing_user_msgs.add(user_msg.strip())
             updated_dialogs.append(f"{STYLE_BEGIN_DIALOG_PREFIX}{user_msg}")
             updated_dialogs.append(str(assistant_msg))