diff --git a/models/jargon.py b/models/jargon.py index a3272c5f..109b1d0b 100644 --- a/models/jargon.py +++ b/models/jargon.py @@ -20,6 +20,7 @@ class Jargon: count: int = 1 # 出现次数 last_inference_count: int = 0 # 上次推断时的count值 is_complete: bool = False # 是否完成所有推断 (count>=100) + meaning_edited: bool = False # 用户是否手动编辑过释义 is_global: bool = False # 是否全局黑话 chat_id: str = "" # 群组ID created_at: Optional[datetime] = None # 创建时间 @@ -36,6 +37,7 @@ def to_dict(self) -> dict: 'count': self.count, 'last_inference_count': self.last_inference_count, 'is_complete': self.is_complete, + 'meaning_edited': self.meaning_edited, 'is_global': self.is_global, 'chat_id': self.chat_id, 'created_at': self.created_at.isoformat() if self.created_at else None, diff --git a/models/orm/jargon.py b/models/orm/jargon.py index b8cf462f..cf313279 100644 --- a/models/orm/jargon.py +++ b/models/orm/jargon.py @@ -19,6 +19,7 @@ class Jargon(Base): count = Column(Integer, default=1) last_inference_count = Column(Integer, default=0) is_complete = Column(Boolean, default=False) + meaning_edited = Column(Boolean, default=False) is_global = Column(Boolean, default=False) chat_id = Column(String(255), nullable=False, index=True) # 使用 BigInteger 存储 Unix 时间戳(自动迁移会将 DATETIME 转换为 BIGINT) @@ -46,6 +47,7 @@ def to_dict(self): 'count': self.count, 'last_inference_count': self.last_inference_count, 'is_complete': self.is_complete, + 'meaning_edited': self.meaning_edited, 'is_global': self.is_global, 'chat_id': self.chat_id, 'created_at': self.created_at, diff --git a/services/core_learning/progressive_learning.py b/services/core_learning/progressive_learning.py index e88696d3..e5c805f6 100644 --- a/services/core_learning/progressive_learning.py +++ b/services/core_learning/progressive_learning.py @@ -1295,13 +1295,9 @@ async def _save_style_learning_record(self, group_id: str, style_analysis: Dict[ logger.debug(f"群组 {group_id} 没有风格分析结果且没有消息,跳过风格学习记录保存") return - # 1. 保存表达模式到 expression_patterns 表 - expression_patterns = style_analysis_dict.get('expression_patterns', []) - expression_patterns = self._filter_expression_patterns(expression_patterns) - - # 在 fewshot 模式下,style_analysis 可能不包含 expression_patterns。 - # 此时从数据库获取 bot 消息与用户消息合并,提取 user->bot 对话对。 - if not expression_patterns and messages: + # 1. 优先从真实对话中提取 user->bot 对话对(逻辑连贯) + expression_patterns = [] + if messages: try: merged = await self._merge_bot_messages_for_pairs(group_id, messages) if merged: @@ -1309,6 +1305,11 @@ async def _save_style_learning_record(self, group_id: str, style_analysis: Dict[ except Exception as pair_err: logger.debug(f"提取 fewshot 对话对失败: {pair_err}") + # 真实对话对不足时,回退到 LLM 生成的表达模式 + if not expression_patterns: + expression_patterns = style_analysis_dict.get('expression_patterns', []) + expression_patterns = self._filter_expression_patterns(expression_patterns) + if expression_patterns: await self._save_expression_patterns(group_id, expression_patterns) @@ -1461,8 +1462,10 @@ def _extract_fewshot_pairs_from_merged( Mirrors the logic of ExpressionPatternLearner._extract_few_shot_pairs but operates on plain dicts and returns expression pattern dicts. + Deduplicates by (situation, expression) content. """ pairs = [] + seen = set() current_time = time.time() for i in range(len(merged) - 1): @@ -1488,6 +1491,11 @@ def _extract_fewshot_pairs_from_merged( if '@' in msg_text or '@' in nxt_text: continue + key = (msg_text[:50], nxt_text[:100]) + if key in seen: + continue + seen.add(key) + pairs.append({ 'situation': msg_text[:50], 'expression': nxt_text[:100], diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py index f978fd59..448e1d20 100644 --- a/services/database/facades/jargon_facade.py +++ b/services/database/facades/jargon_facade.py @@ -164,6 +164,9 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool: record.meaning = json.dumps(meaning_val, ensure_ascii=False) else: record.meaning = str(meaning_val) if meaning_val is not None else None + # Only mark meaning_edited when explicitly set (not from inference) + if jargon_data.get('meaning_edited'): + record.meaning_edited = True if 'is_jargon' in jargon_data: record.is_jargon = jargon_data['is_jargon'] if 'count' in jargon_data: @@ -476,6 +479,7 @@ async def search_jargon( 'is_jargon': r.is_jargon, 'count': r.count or 0, 'is_complete': r.is_complete, + 'meaning_edited': r.meaning_edited or False, 'is_global': r.is_global or False, 'chat_id': r.chat_id, 'created_at': r.created_at, diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py index 97ae33a2..b30002f1 100644 --- a/services/jargon/jargon_miner.py +++ b/services/jargon/jargon_miner.py @@ -299,6 +299,8 @@ def _should_infer_meaning(self, jargon: Jargon) -> bool: """ if jargon.is_complete: return False + if jargon.meaning_edited: + return False count = jargon.count or 0 last_inference = jargon.last_inference_count or 0 @@ -512,6 +514,7 @@ async def save_or_update_jargon( count=existing_dict.get('count', 1), last_inference_count=existing_dict.get('last_inference_count', 0), is_complete=existing_dict.get('is_complete', False), + meaning_edited=existing_dict.get('meaning_edited', False), is_global=existing_dict.get('is_global', False), chat_id=existing_dict.get('chat_id', ''), created_at=existing_dict.get('created_at'), diff --git a/web_res/static/html/dashboard.html b/web_res/static/html/dashboard.html index d61e32b4..d3fad599 100644 --- a/web_res/static/html/dashboard.html +++ b/web_res/static/html/dashboard.html @@ -2530,7 +2530,6 @@