-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebate_engine.py
More file actions
123 lines (91 loc) · 3.16 KB
/
debate_engine.py
File metadata and controls
123 lines (91 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class DebateEngine:
"""
Multi-agent debate system:
- agents produce independent outputs
- optional cross-critique
- judge scoring via LLM
- returns ranked reasoning
"""
def __init__(self, llm_router):
self.llm_router = llm_router
# =====================================================
# MAIN DEBATE LOOP
# =====================================================
def run(self, agents, prompt, task_type="reasoning"):
outputs = {}
# ---------------------------------------------
# ROUND 1: independent generation
# ---------------------------------------------
for agent in agents:
model, response = agent.think(prompt, task_type)
outputs[agent.name] = {
"model": model,
"response": response
}
# ---------------------------------------------
# ROUND 2: optional critique pass
# ---------------------------------------------
critiques = self._critique_round(agents, outputs, prompt)
return {
"outputs": outputs,
"critiques": critiques
}
# =====================================================
# CRITIQUE PHASE (agents evaluate each other)
# =====================================================
def _critique_round(self, agents, outputs, prompt):
critiques = {}
for agent in agents:
others = "\n\n".join(
f"{name}: {data['response']}"
for name, data in outputs.items()
if name != agent.name
)
critique_prompt = f"""
You are {agent.name}.
Original task:
{prompt}
Other agents' answers:
{others}
Critique the reasoning quality.
Be strict, point out errors, contradictions, or improvements.
Return concise critique only.
"""
model, critique = agent.think(critique_prompt, task_type="critique")
critiques[agent.name] = {
"model": model,
"critique": critique
}
return critiques
# =====================================================
# SCORING (LLM JUDGE)
# =====================================================
def score(self, debate_result, judge_model="gpt"):
scores = {}
outputs = debate_result["outputs"]
critiques = debate_result["critiques"]
for name, data in outputs.items():
critique = critiques.get(name, {}).get("critique", "")
eval_prompt = f"""
You are a strict reasoning evaluator.
Task quality scoring (1-10):
ORIGINAL ANSWER:
{data['response']}
CRITIQUE:
{critique}
Score ONLY a number (1-10).
"""
raw_score = self.llm_router.call(judge_model, eval_prompt)
try:
score = float(raw_score.strip())
except:
score = 0.0
scores[name] = score
return scores
# =====================================================
# WINNER SELECTION
# =====================================================
def winner(self, scores):
if not scores:
return None
return max(scores, key=scores.get)