From a0dfab482f4cd93605628719c04568e6678ff46d Mon Sep 17 00:00:00 2001 From: YumemiDream <1803068130@qq.com> Date: Thu, 4 Jun 2026 19:41:19 +0800 Subject: [PATCH 1/6] fix: prevent plugin from overwriting manually edited jargon meanings - Add meaning_edited flag to Jargon ORM and dataclass - Set meaning_edited=True when user edits meaning via dashboard - _should_infer_meaning skips jargon with meaning_edited=True - Auto-migration will add the new column on startup --- models/jargon.py | 2 ++ models/orm/jargon.py | 2 ++ services/database/facades/jargon_facade.py | 4 ++++ services/jargon/jargon_miner.py | 3 +++ webui/services/jargon_service.py | 1 + 5 files changed, 12 insertions(+) diff --git a/models/jargon.py b/models/jargon.py index a3272c5f..109b1d0b 100644 --- a/models/jargon.py +++ b/models/jargon.py @@ -20,6 +20,7 @@ class Jargon: count: int = 1 # 出现次数 last_inference_count: int = 0 # 上次推断时的count值 is_complete: bool = False # 是否完成所有推断 (count>=100) + meaning_edited: bool = False # 用户是否手动编辑过释义 is_global: bool = False # 是否全局黑话 chat_id: str = "" # 群组ID created_at: Optional[datetime] = None # 创建时间 @@ -36,6 +37,7 @@ def to_dict(self) -> dict: 'count': self.count, 'last_inference_count': self.last_inference_count, 'is_complete': self.is_complete, + 'meaning_edited': self.meaning_edited, 'is_global': self.is_global, 'chat_id': self.chat_id, 'created_at': self.created_at.isoformat() if self.created_at else None, diff --git a/models/orm/jargon.py b/models/orm/jargon.py index b8cf462f..cf313279 100644 --- a/models/orm/jargon.py +++ b/models/orm/jargon.py @@ -19,6 +19,7 @@ class Jargon(Base): count = Column(Integer, default=1) last_inference_count = Column(Integer, default=0) is_complete = Column(Boolean, default=False) + meaning_edited = Column(Boolean, default=False) is_global = Column(Boolean, default=False) chat_id = Column(String(255), nullable=False, index=True) # 使用 BigInteger 存储 Unix 时间戳(自动迁移会将 DATETIME 转换为 BIGINT) @@ -46,6 +47,7 @@ def to_dict(self): 'count': self.count, 'last_inference_count': self.last_inference_count, 'is_complete': self.is_complete, + 'meaning_edited': self.meaning_edited, 'is_global': self.is_global, 'chat_id': self.chat_id, 'created_at': self.created_at, diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py index f978fd59..448e1d20 100644 --- a/services/database/facades/jargon_facade.py +++ b/services/database/facades/jargon_facade.py @@ -164,6 +164,9 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool: record.meaning = json.dumps(meaning_val, ensure_ascii=False) else: record.meaning = str(meaning_val) if meaning_val is not None else None + # Only mark meaning_edited when explicitly set (not from inference) + if jargon_data.get('meaning_edited'): + record.meaning_edited = True if 'is_jargon' in jargon_data: record.is_jargon = jargon_data['is_jargon'] if 'count' in jargon_data: @@ -476,6 +479,7 @@ async def search_jargon( 'is_jargon': r.is_jargon, 'count': r.count or 0, 'is_complete': r.is_complete, + 'meaning_edited': r.meaning_edited or False, 'is_global': r.is_global or False, 'chat_id': r.chat_id, 'created_at': r.created_at, diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py index 97ae33a2..b30002f1 100644 --- a/services/jargon/jargon_miner.py +++ b/services/jargon/jargon_miner.py @@ -299,6 +299,8 @@ def _should_infer_meaning(self, jargon: Jargon) -> bool: """ if jargon.is_complete: return False + if jargon.meaning_edited: + return False count = jargon.count or 0 last_inference = jargon.last_inference_count or 0 @@ -512,6 +514,7 @@ async def save_or_update_jargon( count=existing_dict.get('count', 1), last_inference_count=existing_dict.get('last_inference_count', 0), is_complete=existing_dict.get('is_complete', False), + meaning_edited=existing_dict.get('meaning_edited', False), is_global=existing_dict.get('is_global', False), chat_id=existing_dict.get('chat_id', ''), created_at=existing_dict.get('created_at'), diff --git a/webui/services/jargon_service.py b/webui/services/jargon_service.py index ef4badb9..2f82192e 100644 --- a/webui/services/jargon_service.py +++ b/webui/services/jargon_service.py @@ -352,6 +352,7 @@ async def update_jargon( payload["content"] = content if meaning is not None: payload["meaning"] = meaning + payload["meaning_edited"] = True if len(payload) <= 1: return False, "没有需要更新的字段", self._format_jargon_for_frontend(current) From 0194dd411170b5c17afdc1fcc51f3ea3654d694f Mon Sep 17 00:00:00 2001 From: YumemiDream <1803068130@qq.com> Date: Thu, 4 Jun 2026 19:58:42 +0800 Subject: [PATCH 2/6] fix: change jargon count to actual chat occurrence frequency - Add sync_jargon_counts to facade: bulk-update count from statistical filter's term frequency table - mine_jargon syncs filter frequencies to DB before inference - Remove manual count+1 in save_or_update_jargon (count now managed by frequency sync) - Inference thresholds [3,6,10,20,40,60,100] now reflect actual chat occurrences, not LLM validation pass count --- services/database/facades/jargon_facade.py | 46 +++++++++++++++++++ .../database/sqlalchemy_database_manager.py | 3 ++ services/jargon/jargon_miner.py | 3 +- services/learning/message_pipeline.py | 10 ++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py index 448e1d20..cba48273 100644 --- a/services/database/facades/jargon_facade.py +++ b/services/database/facades/jargon_facade.py @@ -195,6 +195,52 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool: self._logger.error(f"[JargonFacade] 更新黑话失败: {e}", exc_info=True) return False + # 3b. sync_jargon_counts + async def sync_jargon_counts( + self, chat_id: str, term_frequencies: Dict[str, int] + ) -> int: + """Sync occurrence counts for existing jargon in a group. + + Only updates jargon terms that already exist in DB for *chat_id*. + Returns the number of records updated. + + Args: + chat_id: Group ID. + term_frequencies: ``{term: actual_chat_count}`` from the + statistical filter. + """ + if not term_frequencies: + return 0 + + updated = 0 + try: + async with self.get_session() as session: + stmt = select(Jargon).where( + Jargon.chat_id == chat_id, + Jargon.content.in_(list(term_frequencies.keys())), + ) + result = await session.execute(stmt) + records = result.scalars().all() + + now = int(time.time()) + for record in records: + new_count = term_frequencies.get(record.content, 0) + if new_count > (record.count or 0): + record.count = new_count + record.updated_at = now + updated += 1 + + if updated: + await session.commit() + self._logger.debug( + f"[JargonFacade] 同步黑话出现次数: " + f"chat={chat_id}, updated={updated}" + ) + except Exception as e: + self._logger.error(f"[JargonFacade] 同步黑话出现次数失败: {e}", exc_info=True) + + return updated + # 4. get_jargon_statistics async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]: """获取黑话学习统计信息 diff --git a/services/database/sqlalchemy_database_manager.py b/services/database/sqlalchemy_database_manager.py index 82fc5b4a..e13af6bc 100644 --- a/services/database/sqlalchemy_database_manager.py +++ b/services/database/sqlalchemy_database_manager.py @@ -960,6 +960,9 @@ async def insert_jargon(self, jargon_data: Dict[str, Any]) -> Optional[int]: async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool: return await self._call_jargon("update_jargon", False, jargon_data) + async def sync_jargon_counts(self, chat_id: str, term_frequencies: Dict[str, int]) -> int: + return await self._call_jargon("sync_jargon_counts", 0, chat_id, term_frequencies) + async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]: return await self._call_jargon( "get_jargon_statistics", diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py index b30002f1..589d59d2 100644 --- a/services/jargon/jargon_miner.py +++ b/services/jargon/jargon_miner.py @@ -521,8 +521,7 @@ async def save_or_update_jargon( updated_at=existing_dict.get('updated_at') ) - # 更新现有记录 - existing.count = (existing.count or 0) + 1 + # 更新现有记录(count 由频率同步管理,此处不递增) # 合并 raw_content existing_list = safe_parse_llm_json(existing.raw_content) or [] diff --git a/services/learning/message_pipeline.py b/services/learning/message_pipeline.py index 6e8be3a2..3be8d547 100644 --- a/services/learning/message_pipeline.py +++ b/services/learning/message_pipeline.py @@ -256,6 +256,16 @@ async def mine_jargon(self, group_id: str) -> None: if not statistical_candidates: statistical_candidates = None + # Sync actual chat occurrence counts to DB + term_freq = self._jargon_statistical_filter._group_term_freq.get(group_id) + if term_freq: + try: + await self._db_manager.sync_jargon_counts( + group_id, dict(term_freq) + ) + except Exception as e: + logger.debug(f"[JargonMining] Frequency sync failed: {e}") + await jargon_miner.run_once( chat_messages, len(recent_messages), From 4ef77bf7761bf310e2d9ee9f201be93d3181d0bb Mon Sep 17 00:00:00 2001 From: YumemiDream <1803068130@qq.com> Date: Thu, 4 Jun 2026 20:08:37 +0800 Subject: [PATCH 3/6] Revert "fix: change jargon count to actual chat occurrence frequency" This reverts commit 0194dd411170b5c17afdc1fcc51f3ea3654d694f. --- services/database/facades/jargon_facade.py | 46 ------------------- .../database/sqlalchemy_database_manager.py | 3 -- services/jargon/jargon_miner.py | 3 +- services/learning/message_pipeline.py | 10 ---- 4 files changed, 2 insertions(+), 60 deletions(-) diff --git a/services/database/facades/jargon_facade.py b/services/database/facades/jargon_facade.py index cba48273..448e1d20 100644 --- a/services/database/facades/jargon_facade.py +++ b/services/database/facades/jargon_facade.py @@ -195,52 +195,6 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool: self._logger.error(f"[JargonFacade] 更新黑话失败: {e}", exc_info=True) return False - # 3b. sync_jargon_counts - async def sync_jargon_counts( - self, chat_id: str, term_frequencies: Dict[str, int] - ) -> int: - """Sync occurrence counts for existing jargon in a group. - - Only updates jargon terms that already exist in DB for *chat_id*. - Returns the number of records updated. - - Args: - chat_id: Group ID. - term_frequencies: ``{term: actual_chat_count}`` from the - statistical filter. - """ - if not term_frequencies: - return 0 - - updated = 0 - try: - async with self.get_session() as session: - stmt = select(Jargon).where( - Jargon.chat_id == chat_id, - Jargon.content.in_(list(term_frequencies.keys())), - ) - result = await session.execute(stmt) - records = result.scalars().all() - - now = int(time.time()) - for record in records: - new_count = term_frequencies.get(record.content, 0) - if new_count > (record.count or 0): - record.count = new_count - record.updated_at = now - updated += 1 - - if updated: - await session.commit() - self._logger.debug( - f"[JargonFacade] 同步黑话出现次数: " - f"chat={chat_id}, updated={updated}" - ) - except Exception as e: - self._logger.error(f"[JargonFacade] 同步黑话出现次数失败: {e}", exc_info=True) - - return updated - # 4. get_jargon_statistics async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]: """获取黑话学习统计信息 diff --git a/services/database/sqlalchemy_database_manager.py b/services/database/sqlalchemy_database_manager.py index e13af6bc..82fc5b4a 100644 --- a/services/database/sqlalchemy_database_manager.py +++ b/services/database/sqlalchemy_database_manager.py @@ -960,9 +960,6 @@ async def insert_jargon(self, jargon_data: Dict[str, Any]) -> Optional[int]: async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool: return await self._call_jargon("update_jargon", False, jargon_data) - async def sync_jargon_counts(self, chat_id: str, term_frequencies: Dict[str, int]) -> int: - return await self._call_jargon("sync_jargon_counts", 0, chat_id, term_frequencies) - async def get_jargon_statistics(self, group_id: str = None) -> Dict[str, Any]: return await self._call_jargon( "get_jargon_statistics", diff --git a/services/jargon/jargon_miner.py b/services/jargon/jargon_miner.py index 589d59d2..b30002f1 100644 --- a/services/jargon/jargon_miner.py +++ b/services/jargon/jargon_miner.py @@ -521,7 +521,8 @@ async def save_or_update_jargon( updated_at=existing_dict.get('updated_at') ) - # 更新现有记录(count 由频率同步管理,此处不递增) + # 更新现有记录 + existing.count = (existing.count or 0) + 1 # 合并 raw_content existing_list = safe_parse_llm_json(existing.raw_content) or [] diff --git a/services/learning/message_pipeline.py b/services/learning/message_pipeline.py index 3be8d547..6e8be3a2 100644 --- a/services/learning/message_pipeline.py +++ b/services/learning/message_pipeline.py @@ -256,16 +256,6 @@ async def mine_jargon(self, group_id: str) -> None: if not statistical_candidates: statistical_candidates = None - # Sync actual chat occurrence counts to DB - term_freq = self._jargon_statistical_filter._group_term_freq.get(group_id) - if term_freq: - try: - await self._db_manager.sync_jargon_counts( - group_id, dict(term_freq) - ) - except Exception as e: - logger.debug(f"[JargonMining] Frequency sync failed: {e}") - await jargon_miner.run_once( chat_messages, len(recent_messages), From 28a9de16ac82ac1ba39d83a8683014400710efc5 Mon Sep 17 00:00:00 2001 From: YumemiDream <1803068130@qq.com> Date: Thu, 4 Jun 2026 20:09:16 +0800 Subject: [PATCH 4/6] fix: remove jargon sort by occurrences (count not reliable) --- web_res/static/html/dashboard.html | 3 --- 1 file changed, 3 deletions(-) diff --git a/web_res/static/html/dashboard.html b/web_res/static/html/dashboard.html index 71368b6b..011c1c87 100644 --- a/web_res/static/html/dashboard.html +++ b/web_res/static/html/dashboard.html @@ -2489,7 +2489,6 @@