NickCharlie · YumemiDream · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/models/jargon.py b/models/jargon.py
@@ -20,6 +20,7 @@ class Jargon:
     count: int = 1                          # 出现次数
     last_inference_count: int = 0           # 上次推断时的count值
     is_complete: bool = False               # 是否完成所有推断 (count>=100)
+    meaning_edited: bool = False            # 用户是否手动编辑过释义
     is_global: bool = False                 # 是否全局黑话
     chat_id: str = ""                       # 群组ID
     created_at: Optional[datetime] = None   # 创建时间
@@ -36,6 +37,7 @@ def to_dict(self) -> dict:
             'count': self.count,
             'last_inference_count': self.last_inference_count,
             'is_complete': self.is_complete,
+            'meaning_edited': self.meaning_edited,
             'is_global': self.is_global,
             'chat_id': self.chat_id,
             'created_at': self.created_at.isoformat() if self.created_at else None,

diff --git a/models/orm/jargon.py b/models/orm/jargon.py
@@ -19,6 +19,7 @@ class Jargon(Base):
     count = Column(Integer, default=1)
     last_inference_count = Column(Integer, default=0)
     is_complete = Column(Boolean, default=False)
+    meaning_edited = Column(Boolean, default=False)
     is_global = Column(Boolean, default=False)
     chat_id = Column(String(255), nullable=False, index=True)
     # 使用 BigInteger 存储 Unix 时间戳（自动迁移会将 DATETIME 转换为 BIGINT）
@@ -46,6 +47,7 @@ def to_dict(self):
             'count': self.count,
             'last_inference_count': self.last_inference_count,
             'is_complete': self.is_complete,
+            'meaning_edited': self.meaning_edited,
             'is_global': self.is_global,
             'chat_id': self.chat_id,
             'created_at': self.created_at,

diff --git a/services/core_learning/progressive_learning.py b/services/core_learning/progressive_learning.py
@@ -1295,20 +1295,21 @@ async def _save_style_learning_record(self, group_id: str, style_analysis: Dict[
                 logger.debug(f"群组 {group_id} 没有风格分析结果且没有消息，跳过风格学习记录保存")
                 return
 
-            # 1. 保存表达模式到 expression_patterns 表
-            expression_patterns = style_analysis_dict.get('expression_patterns', [])
-            expression_patterns = self._filter_expression_patterns(expression_patterns)
-
-            # 在 fewshot 模式下，style_analysis 可能不包含 expression_patterns。
-            # 此时从数据库获取 bot 消息与用户消息合并，提取 user->bot 对话对。
-            if not expression_patterns and messages:
+            # 1. 优先从真实对话中提取 user->bot 对话对（逻辑连贯）
+            expression_patterns = []
+            if messages:
                 try:
                     merged = await self._merge_bot_messages_for_pairs(group_id, messages)
                     if merged:
                         expression_patterns = self._extract_fewshot_pairs_from_merged(merged, group_id)
                 except Exception as pair_err:
                     logger.debug(f"提取 fewshot 对话对失败: {pair_err}")
 
+            # 真实对话对不足时，回退到 LLM 生成的表达模式
+            if not expression_patterns:
+                expression_patterns = style_analysis_dict.get('expression_patterns', [])
+                expression_patterns = self._filter_expression_patterns(expression_patterns)
+
             if expression_patterns:
                 await self._save_expression_patterns(group_id, expression_patterns)
 
@@ -1461,8 +1462,10 @@ def _extract_fewshot_pairs_from_merged(
 
         Mirrors the logic of ExpressionPatternLearner._extract_few_shot_pairs
         but operates on plain dicts and returns expression pattern dicts.
+        Deduplicates by (situation, expression) content.
         """
         pairs = []
+        seen = set()
         current_time = time.time()
 
         for i in range(len(merged) - 1):
@@ -1488,6 +1491,11 @@ def _extract_fewshot_pairs_from_merged(
                 if '@' in msg_text or '@' in nxt_text:
                     continue
 
+                key = (msg_text[:50], nxt_text[:100])
+                if key in seen:
+                    continue
+                seen.add(key)
+
                 pairs.append({
                     'situation': msg_text[:50],
                     'expression': nxt_text[:100],

diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py
@@ -164,6 +164,9 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
                         record.meaning = json.dumps(meaning_val, ensure_ascii=False)
                     else:
                         record.meaning = str(meaning_val) if meaning_val is not None else None
+                    # Only mark meaning_edited when explicitly set (not from inference)
+                    if jargon_data.get('meaning_edited'):
+                        record.meaning_edited = True
                 if 'is_jargon' in jargon_data:
                     record.is_jargon = jargon_data['is_jargon']
                 if 'count' in jargon_data:
@@ -476,6 +479,7 @@ async def search_jargon(
                         'is_jargon': r.is_jargon,
                         'count': r.count or 0,
                         'is_complete': r.is_complete,
+                        'meaning_edited': r.meaning_edited or False,
                         'is_global': r.is_global or False,
                         'chat_id': r.chat_id,
                         'created_at': r.created_at,

diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py
@@ -299,6 +299,8 @@ def _should_infer_meaning(self, jargon: Jargon) -> bool:
         """
         if jargon.is_complete:
             return False
+        if jargon.meaning_edited:
+            return False
 
         count = jargon.count or 0
         last_inference = jargon.last_inference_count or 0
@@ -512,6 +514,7 @@ async def save_or_update_jargon(
                     count=existing_dict.get('count', 1),
                     last_inference_count=existing_dict.get('last_inference_count', 0),
                     is_complete=existing_dict.get('is_complete', False),
+                    meaning_edited=existing_dict.get('meaning_edited', False),
                     is_global=existing_dict.get('is_global', False),
                     chat_id=existing_dict.get('chat_id', ''),
                     created_at=existing_dict.get('created_at'),

diff --git a/web_res/static/html/dashboard.html b/web_res/static/html/dashboard.html
@@ -2530,7 +2530,6 @@ <h2>黑话与批次</h2>
                             <option value="newest">最新优先</option>
                             <option value="oldest">最早优先</option>
                             <option value="name">按名称</option>
-                            <option value="occurrences">按出现次数</option>
                         </select>
                         <input id="jargonSearchInput" class="content-search" style="width:160px;height:32px;font-size:12px;" type="search" placeholder="搜索黑话" autocomplete="off">
                     </div>
@@ -6563,8 +6562,6 @@ <h3>${escapeHtml(item.title)}</h3>
                         items.reverse();
                     } else if (state.jargon.sort === 'name') {
                         items.sort((a, b) => (a.term || a.word || '').localeCompare(b.term || b.word || ''));
-                    } else if (state.jargon.sort === 'occurrences') {
-                        items.sort((a, b) => (b.occurrences || 0) - (a.occurrences || 0));
                     }
 
                     state.jargon.items = items;

diff --git a/webui/services/jargon_service.py b/webui/services/jargon_service.py
@@ -352,6 +352,7 @@ async def update_jargon(
                 payload["content"] = content
             if meaning is not None:
                 payload["meaning"] = meaning
+                payload["meaning_edited"] = True
 
             if len(payload) <= 1:
                 return False, "没有需要更新的字段", self._format_jargon_for_frontend(current)

diff --git a/webui/services/persona_review_service.py b/webui/services/persona_review_service.py
@@ -184,19 +184,27 @@ def _build_change_payload(
 
     @staticmethod
     def _extract_style_dialog_pairs(review: Dict[str, Any]) -> List[Tuple[str, str]]:
-        """Extract style review dialog pairs from structured patterns or few-shot text."""
+        """Extract style review dialog pairs from structured patterns or few-shot text. Deduplicates by content."""
         dialog_pairs = []
+        seen = set()
         learned_patterns = review.get('learned_patterns', [])
         for pattern in learned_patterns:
             situation = pattern.get('situation', '') if isinstance(pattern, dict) else ''
             expression = pattern.get('expression', '') if isinstance(pattern, dict) else ''
             if situation and expression:
-                dialog_pairs.append((str(situation), str(expression)))
+                key = (str(situation), str(expression))
+                if key not in seen:
+                    seen.add(key)
+                    dialog_pairs.append(key)
 
         if not dialog_pairs:
-            dialog_pairs = PersonaReviewService._parse_few_shots_to_pairs(
+            for user_msg, assistant_msg in PersonaReviewService._parse_few_shots_to_pairs(
                 review.get('few_shots_content', '') or ''
-            )
+            ):
+                key = (user_msg, assistant_msg)
+                if key not in seen:
+                    seen.add(key)
+                    dialog_pairs.append(key)
         return dialog_pairs
 
     def _dialog_pairs_for_style_review(self, review: Dict[str, Any]) -> List[Tuple[str, str]]:
@@ -207,10 +215,19 @@ def _build_style_begin_dialogs(
         current_begin_dialogs: List[str],
         dialog_pairs: List[Tuple[str, str]]
     ) -> List[str]:
-        """Append style examples and keep only latest style example pairs."""
+        """Append style examples, skipping pairs already present in begin_dialogs."""
         updated_dialogs = PersonaReviewService._normalize_begin_dialogs(current_begin_dialogs)
 
+        # Build set of existing user messages for dedup
+        existing_user_msgs = set()
+        for d in updated_dialogs:
+            if isinstance(d, str) and d.startswith(STYLE_BEGIN_DIALOG_PREFIX):
+                existing_user_msgs.add(d[len(STYLE_BEGIN_DIALOG_PREFIX):].strip())
+
         for user_msg, assistant_msg in dialog_pairs:
+            if user_msg.strip() in existing_user_msgs:
+                continue
+            existing_user_msgs.add(user_msg.strip())
             updated_dialogs.append(f"{STYLE_BEGIN_DIALOG_PREFIX}{user_msg}")
             updated_dialogs.append(str(assistant_msg))