Skip to content
2 changes: 2 additions & 0 deletions models/jargon.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Jargon:
count: int = 1 # 出现次数
last_inference_count: int = 0 # 上次推断时的count值
is_complete: bool = False # 是否完成所有推断 (count>=100)
meaning_edited: bool = False # 用户是否手动编辑过释义
is_global: bool = False # 是否全局黑话
chat_id: str = "" # 群组ID
created_at: Optional[datetime] = None # 创建时间
Expand All @@ -36,6 +37,7 @@ def to_dict(self) -> dict:
'count': self.count,
'last_inference_count': self.last_inference_count,
'is_complete': self.is_complete,
'meaning_edited': self.meaning_edited,
'is_global': self.is_global,
'chat_id': self.chat_id,
'created_at': self.created_at.isoformat() if self.created_at else None,
Expand Down
2 changes: 2 additions & 0 deletions models/orm/jargon.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Jargon(Base):
count = Column(Integer, default=1)
last_inference_count = Column(Integer, default=0)
is_complete = Column(Boolean, default=False)
meaning_edited = Column(Boolean, default=False)
is_global = Column(Boolean, default=False)
chat_id = Column(String(255), nullable=False, index=True)
# 使用 BigInteger 存储 Unix 时间戳(自动迁移会将 DATETIME 转换为 BIGINT)
Expand Down Expand Up @@ -46,6 +47,7 @@ def to_dict(self):
'count': self.count,
'last_inference_count': self.last_inference_count,
'is_complete': self.is_complete,
'meaning_edited': self.meaning_edited,
'is_global': self.is_global,
'chat_id': self.chat_id,
'created_at': self.created_at,
Expand Down
22 changes: 15 additions & 7 deletions services/core_learning/progressive_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,20 +1295,21 @@ async def _save_style_learning_record(self, group_id: str, style_analysis: Dict[
logger.debug(f"群组 {group_id} 没有风格分析结果且没有消息,跳过风格学习记录保存")
return

# 1. 保存表达模式到 expression_patterns 表
expression_patterns = style_analysis_dict.get('expression_patterns', [])
expression_patterns = self._filter_expression_patterns(expression_patterns)

# 在 fewshot 模式下,style_analysis 可能不包含 expression_patterns。
# 此时从数据库获取 bot 消息与用户消息合并,提取 user->bot 对话对。
if not expression_patterns and messages:
# 1. 优先从真实对话中提取 user->bot 对话对(逻辑连贯)
expression_patterns = []
if messages:
try:
merged = await self._merge_bot_messages_for_pairs(group_id, messages)
if merged:
expression_patterns = self._extract_fewshot_pairs_from_merged(merged, group_id)
except Exception as pair_err:
logger.debug(f"提取 fewshot 对话对失败: {pair_err}")

# 真实对话对不足时,回退到 LLM 生成的表达模式
if not expression_patterns:
expression_patterns = style_analysis_dict.get('expression_patterns', [])
expression_patterns = self._filter_expression_patterns(expression_patterns)

if expression_patterns:
await self._save_expression_patterns(group_id, expression_patterns)

Expand Down Expand Up @@ -1461,8 +1462,10 @@ def _extract_fewshot_pairs_from_merged(

Mirrors the logic of ExpressionPatternLearner._extract_few_shot_pairs
but operates on plain dicts and returns expression pattern dicts.
Deduplicates by (situation, expression) content.
"""
pairs = []
seen = set()
current_time = time.time()

for i in range(len(merged) - 1):
Expand All @@ -1488,6 +1491,11 @@ def _extract_fewshot_pairs_from_merged(
if '@' in msg_text or '@' in nxt_text:
continue

key = (msg_text[:50], nxt_text[:100])
if key in seen:
continue
seen.add(key)

pairs.append({
'situation': msg_text[:50],
'expression': nxt_text[:100],
Expand Down
4 changes: 4 additions & 0 deletions services/database/facades/jargon_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ async def update_jargon(self, jargon_data: Dict[str, Any]) -> bool:
record.meaning = json.dumps(meaning_val, ensure_ascii=False)
else:
record.meaning = str(meaning_val) if meaning_val is not None else None
# Only mark meaning_edited when explicitly set (not from inference)
if jargon_data.get('meaning_edited'):
record.meaning_edited = True
Comment thread
YumemiDream marked this conversation as resolved.
if 'is_jargon' in jargon_data:
record.is_jargon = jargon_data['is_jargon']
if 'count' in jargon_data:
Expand Down Expand Up @@ -476,6 +479,7 @@ async def search_jargon(
'is_jargon': r.is_jargon,
'count': r.count or 0,
'is_complete': r.is_complete,
'meaning_edited': r.meaning_edited or False,
'is_global': r.is_global or False,
'chat_id': r.chat_id,
'created_at': r.created_at,
Expand Down
3 changes: 3 additions & 0 deletions services/jargon/jargon_miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ def _should_infer_meaning(self, jargon: Jargon) -> bool:
"""
if jargon.is_complete:
return False
if jargon.meaning_edited:
return False

count = jargon.count or 0
last_inference = jargon.last_inference_count or 0
Expand Down Expand Up @@ -512,6 +514,7 @@ async def save_or_update_jargon(
count=existing_dict.get('count', 1),
last_inference_count=existing_dict.get('last_inference_count', 0),
is_complete=existing_dict.get('is_complete', False),
meaning_edited=existing_dict.get('meaning_edited', False),
is_global=existing_dict.get('is_global', False),
chat_id=existing_dict.get('chat_id', ''),
created_at=existing_dict.get('created_at'),
Expand Down
3 changes: 0 additions & 3 deletions web_res/static/html/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -2530,7 +2530,6 @@ <h2>黑话与批次</h2>
<option value="newest">最新优先</option>
<option value="oldest">最早优先</option>
<option value="name">按名称</option>
<option value="occurrences">按出现次数</option>
</select>
<input id="jargonSearchInput" class="content-search" style="width:160px;height:32px;font-size:12px;" type="search" placeholder="搜索黑话" autocomplete="off">
</div>
Expand Down Expand Up @@ -6563,8 +6562,6 @@ <h3>${escapeHtml(item.title)}</h3>
items.reverse();
} else if (state.jargon.sort === 'name') {
items.sort((a, b) => (a.term || a.word || '').localeCompare(b.term || b.word || ''));
} else if (state.jargon.sort === 'occurrences') {
items.sort((a, b) => (b.occurrences || 0) - (a.occurrences || 0));
}

state.jargon.items = items;
Expand Down
1 change: 1 addition & 0 deletions webui/services/jargon_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ async def update_jargon(
payload["content"] = content
if meaning is not None:
payload["meaning"] = meaning
payload["meaning_edited"] = True

if len(payload) <= 1:
return False, "没有需要更新的字段", self._format_jargon_for_frontend(current)
Expand Down
27 changes: 22 additions & 5 deletions webui/services/persona_review_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,19 +184,27 @@ def _build_change_payload(

@staticmethod
def _extract_style_dialog_pairs(review: Dict[str, Any]) -> List[Tuple[str, str]]:
"""Extract style review dialog pairs from structured patterns or few-shot text."""
"""Extract style review dialog pairs from structured patterns or few-shot text. Deduplicates by content."""
dialog_pairs = []
seen = set()
learned_patterns = review.get('learned_patterns', [])
for pattern in learned_patterns:
situation = pattern.get('situation', '') if isinstance(pattern, dict) else ''
expression = pattern.get('expression', '') if isinstance(pattern, dict) else ''
if situation and expression:
dialog_pairs.append((str(situation), str(expression)))
key = (str(situation), str(expression))
if key not in seen:
seen.add(key)
dialog_pairs.append(key)

if not dialog_pairs:
dialog_pairs = PersonaReviewService._parse_few_shots_to_pairs(
for user_msg, assistant_msg in PersonaReviewService._parse_few_shots_to_pairs(
review.get('few_shots_content', '') or ''
)
):
key = (user_msg, assistant_msg)
if key not in seen:
seen.add(key)
dialog_pairs.append(key)
return dialog_pairs

def _dialog_pairs_for_style_review(self, review: Dict[str, Any]) -> List[Tuple[str, str]]:
Expand All @@ -207,10 +215,19 @@ def _build_style_begin_dialogs(
current_begin_dialogs: List[str],
dialog_pairs: List[Tuple[str, str]]
) -> List[str]:
"""Append style examples and keep only latest style example pairs."""
"""Append style examples, skipping pairs already present in begin_dialogs."""
updated_dialogs = PersonaReviewService._normalize_begin_dialogs(current_begin_dialogs)

# Build set of existing user messages for dedup
existing_user_msgs = set()
for d in updated_dialogs:
if isinstance(d, str) and d.startswith(STYLE_BEGIN_DIALOG_PREFIX):
existing_user_msgs.add(d[len(STYLE_BEGIN_DIALOG_PREFIX):].strip())

for user_msg, assistant_msg in dialog_pairs:
if user_msg.strip() in existing_user_msgs:
continue
existing_user_msgs.add(user_msg.strip())
updated_dialogs.append(f"{STYLE_BEGIN_DIALOG_PREFIX}{user_msg}")
updated_dialogs.append(str(assistant_msg))

Expand Down
Loading