Skip to content

Commit 56838e7

Browse files
author
EgonBot
committed
fix: per-archetype PremortemTask decomposition for small-model compatibility
PremortemAnalysis required the LLM to emit a deeply nested schema: 3 AssumptionItem + 3 FailureModeItem, with 11+ required fields per item including linked cross-reference IDs. Qwen 3.5-35B and other small local models consistently echoed the schema structure back instead of producing values, causing validation errors after exhausting all retries. Fix (applies the 'LLMs handle narrative; code handles structure' principle): - New ArchetypeNarrative schema: 5 plain text fields only (no IDs, no cross-references, no counts). The LLM writes narrative content only. - Per-archetype decomposition: one independent LLM call per archetype with up to 5 retries. Failed archetypes are skipped gracefully. - IDs, indices, and cross-references (assumption_id, failure_mode_index, root_cause_assumption_id) are assigned by code, not the LLM. - falsifier derived from test_now field to avoid hardcoded tautologies. - _calculate_risk_level_verbose returns 'Not Scored' when likelihood or impact is None (was rendering 'Likelihood None/5, Impact None/5'). - Dead code removed: ArchetypeAnalysis class, PREMORTEM_SYSTEM_PROMPT_TEMPLATE. Validated: PremortemTask PASSED on GLM 4.7 Flash (HVT_minimal run).
1 parent 70918f9 commit 56838e7

1 file changed

Lines changed: 55 additions & 64 deletions

File tree

worker_plan/worker_plan_internal/diagnostics/premortem.py

Lines changed: 55 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@ class PremortemAnalysis(BaseModel):
7171
assumptions_to_kill: List[AssumptionItem] = Field(description="A list of 3 new, critical, underlying assumptions to test immediately.")
7272
failure_modes: List[FailureModeItem] = Field(description="A list containing exactly 3 distinct failure failure_modes, one for each archetype.")
7373

74+
class ArchetypeNarrative(BaseModel):
75+
"""Minimal per-archetype schema. IDs and cross-references are assigned by the program, not the LLM."""
76+
assumption: str = Field(description="One critical assumption the project is making that, if false, would cause this failure.")
77+
test_now: str = Field(description="One concrete action to immediately test if this assumption holds.")
78+
failure_title: str = Field(description="A short, compelling title for this failure scenario (e.g. 'The Gridlock Gamble').")
79+
failure_story: str = Field(description="A detailed narrative of how this failure unfolds. Explain causes, chain of events, and impact.")
80+
warning_signs: List[str] = Field(description="2-4 observable signals that this failure is beginning to occur.")
81+
7482
PREMORTEM_SYSTEM_PROMPT = """
7583
Persona: You are a senior project analyst. Your primary goal is to write compelling, detailed, and distinct failure stories that are also operationally actionable.
7684
@@ -119,92 +127,75 @@ def execute(cls, llm_executor: LLMExecutor, speed_vs_detail: SpeedVsDetailEnum,
119127
logger.debug(f"User Prompt:\n{user_prompt}")
120128
system_prompt = PREMORTEM_SYSTEM_PROMPT.strip()
121129

122-
accumulated_chat_message_list = [
123-
ChatMessage(
124-
role=MessageRole.SYSTEM,
125-
content=system_prompt,
126-
)
127-
]
128-
129-
user_prompt_list = [
130-
user_prompt,
131-
"Generate 3 new assumptions that are thematically different from the previous ones. Start assumption_id at A4.",
132-
"Generate 3 new assumptions that are thematically different from the previous ones and covers different archetypes. Start assumption_id at A7.",
133-
]
130+
archetypes = ["Process/Financial", "Technical/Logistical", "Market/Human"]
134131
if speed_vs_detail == SpeedVsDetailEnum.FAST_BUT_SKIP_DETAILS:
135-
user_prompt_list = user_prompt_list[:1]
136-
logger.info("Running in FAST_BUT_SKIP_DETAILS mode. Omitting some assumptions.")
132+
archetypes = archetypes[:1]
133+
logger.info("Running in FAST_BUT_SKIP_DETAILS mode. Processing 1 archetype only.")
137134
else:
138-
logger.info("Running in ALL_DETAILS_BUT_SLOW mode. Processing all assumptions.")
135+
logger.info("Running in ALL_DETAILS_BUT_SLOW mode. Processing all 3 archetypes.")
139136

140-
responses: list[PremortemAnalysis] = []
137+
assumptions_to_kill: list[AssumptionItem] = []
138+
failure_modes: list[FailureModeItem] = []
141139
metadata_list: list[dict] = []
142-
for user_prompt_index, user_prompt_item in enumerate(user_prompt_list):
143-
logger.info(f"Processing user_prompt_index: {user_prompt_index+1} of {len(user_prompt_list)}")
144-
chat_message_list = accumulated_chat_message_list.copy()
145-
chat_message_list.append(
146-
ChatMessage(
147-
role=MessageRole.USER,
148-
content=user_prompt_item,
149-
)
140+
141+
for archetype_index, archetype in enumerate(archetypes):
142+
assumption_id = f"A{archetype_index + 1}"
143+
failure_mode_index = archetype_index + 1
144+
logger.info(f"Processing archetype {archetype_index+1} of {len(archetypes)}: {archetype!r}")
145+
146+
archetype_user_prompt = (
147+
f"{user_prompt}\n\n"
148+
f"Archetype: {archetype}\n"
149+
f"Write one assumption and one failure scenario for this archetype only."
150150
)
151+
chat_message_list = [
152+
ChatMessage(role=MessageRole.SYSTEM, content=system_prompt),
153+
ChatMessage(role=MessageRole.USER, content=archetype_user_prompt),
154+
]
151155

152156
def execute_function(llm: LLM) -> dict:
153-
sllm = llm.as_structured_llm(PremortemAnalysis)
157+
sllm = llm.as_structured_llm(ArchetypeNarrative)
154158
start_time = time.perf_counter()
155-
156159
chat_response = sllm.chat(chat_message_list)
157-
pydantic_response = chat_response.raw
158-
160+
narrative = require_raw(chat_response, ArchetypeNarrative)
159161
end_time = time.perf_counter()
160162
duration = int(ceil(end_time - start_time))
161-
162163
metadata = dict(llm.metadata)
163164
metadata["llm_classname"] = llm.class_name()
164165
metadata["duration"] = duration
165-
166-
return {
167-
"pydantic_response": pydantic_response,
168-
"metadata": metadata,
169-
"duration": duration
170-
}
166+
return {"narrative": narrative, "metadata": metadata}
171167

172168
try:
173169
result = llm_executor.run(execute_function)
174170
except PipelineStopRequested:
175-
# Re-raise PipelineStopRequested without wrapping it
176171
raise
177172
except Exception as e:
178-
logger.debug(f"LLM chat interaction failed: {e}")
179-
logger.error("LLM chat interaction failed.", exc_info=True)
180-
if user_prompt_index == 0:
181-
logger.error("The first user prompt failed. This is a critical error. Please check the system prompt and user prompt.")
182-
raise ValueError("LLM chat interaction failed.") from e
183-
else:
184-
logger.error(f"User prompt {user_prompt_index+1} failed. Continuing with next user prompt.")
185-
continue
186-
187-
assistant_content_raw: dict = result["pydantic_response"].model_dump()
188-
# Compact JSON without newlines and spaces, since it's going to be parsed by the LLM. Pretty printing wastes input tokens for the LLM.
189-
assistant_content: str = json.dumps(assistant_content_raw, separators=(',', ':'))
190-
191-
chat_message_list.append(
192-
ChatMessage(
193-
role=MessageRole.ASSISTANT,
194-
content=assistant_content,
195-
)
196-
)
173+
logger.error(f"Archetype {archetype!r} failed: {e}", exc_info=True)
174+
if archetype_index == 0:
175+
raise ValueError(f"First archetype failed: {e}") from e
176+
logger.warning(f"Skipping archetype {archetype!r} due to failure.")
177+
continue
197178

198-
responses.append(result["pydantic_response"])
179+
narrative: ArchetypeNarrative = result["narrative"]
199180
metadata_list.append(result["metadata"])
200-
accumulated_chat_message_list = chat_message_list.copy()
201181

202-
# Use the last response as the primary result
203-
assumptions_to_kill: list[AssumptionItem] = []
204-
failure_modes: list[FailureModeItem] = []
205-
for response in responses:
206-
assumptions_to_kill.extend(response.assumptions_to_kill)
207-
failure_modes.extend(response.failure_modes)
182+
# Code assigns IDs and cross-references — the LLM only provides narrative text.
183+
assumption = AssumptionItem(
184+
assumption_id=assumption_id,
185+
statement=narrative.assumption,
186+
test_now=narrative.test_now,
187+
falsifier=f"Result of: {narrative.test_now} — reveals the assumption does not hold.",
188+
)
189+
failure_mode = FailureModeItem(
190+
failure_mode_index=failure_mode_index,
191+
root_cause_assumption_id=assumption_id,
192+
failure_mode_archetype=archetype,
193+
failure_mode_title=narrative.failure_title,
194+
risk_analysis=narrative.failure_story,
195+
early_warning_signs=narrative.warning_signs,
196+
)
197+
assumptions_to_kill.append(assumption)
198+
failure_modes.append(failure_mode)
208199

209200
final_response = PremortemAnalysis(
210201
assumptions_to_kill=assumptions_to_kill,
@@ -286,7 +277,7 @@ def _calculate_risk_level_brief(likelihood: Optional[int], impact: Optional[int]
286277
def _calculate_risk_level_verbose(likelihood: Optional[int], impact: Optional[int]) -> str:
287278
"""Calculates a qualitative risk level from likelihood and impact scores."""
288279
if likelihood is None or impact is None:
289-
return f"Likelihood {likelihood}/5, Impact {impact}/5"
280+
return "Not Scored"
290281

291282
score = likelihood * impact
292283
if score >= 15:

0 commit comments

Comments
 (0)