forked from tcmartin/flowrunner
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstrucouttemplate
More file actions
148 lines (127 loc) · 5.58 KB
/
strucouttemplate
File metadata and controls
148 lines (127 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import yaml
import os # Needed for the utils import below
from pocketflow import Node, Flow
from utils import call_llm # Assumes utils.py with call_llm exists
class ResumeParserNode(Node):
def prep(self, shared):
"""Return resume text and target skills from shared state."""
return {
"resume_text": shared["resume_text"],
"target_skills": shared.get("target_skills", [])
}
def exec(self, prep_res):
"""Extract structured data from resume using prompt engineering.
Requests YAML output with comments and skill indexes as a list.
"""
resume_text = prep_res["resume_text"]
target_skills = prep_res["target_skills"]
# Format skills with indexes for the prompt
skill_list_for_prompt = "\n".join([f"{i}: {skill}" for i, skill in enumerate(target_skills)])
# Simplified Prompt focusing on key instructions and format
prompt = f"""
Analyze the resume below. Output ONLY the requested information in YAML format.
**Resume:**
```
{resume_text}
```
**Target Skills (use these indexes):**
```
{skill_list_for_prompt}
```
**YAML Output Requirements:**
- Extract `name` (string).
- Extract `email` (string).
- Extract `experience` (list of objects with `title` and `company`).
- Extract `skill_indexes` (list of integers found from the Target Skills list).
- **Add a YAML comment (`#`) explaining the source BEFORE `name`, `email`, `experience`, each item in `experience`, and `skill_indexes`.**
**Example Format:**
```yaml
# Found name at top
name: Jane Doe
# Found email in contact info
email: jane@example.com
# Experience section analysis
experience:
# First job listed
- title: Manager
company: Corp A
# Second job listed
- title: Assistant
company: Corp B
# Skills identified from the target list based on resume content
skill_indexes:
# Found 0 at top
- 0
# Found 2 in experience
- 2
```
Generate the YAML output now:
"""
response = call_llm(prompt)
# --- Minimal YAML Extraction ---
# Assumes LLM correctly uses ```yaml blocks
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
structured_result = yaml.safe_load(yaml_str)
# --- End Minimal Extraction ---
# --- Basic Validation ---
assert structured_result is not None, "Validation Failed: Parsed YAML is None"
assert "name" in structured_result, "Validation Failed: Missing 'name'"
assert "email" in structured_result, "Validation Failed: Missing 'email'"
assert "experience" in structured_result, "Validation Failed: Missing 'experience'"
assert isinstance(structured_result.get("experience"), list), "Validation Failed: 'experience' is not a list"
assert "skill_indexes" in structured_result, "Validation Failed: Missing 'skill_indexes'"
skill_indexes_val = structured_result.get("skill_indexes")
assert skill_indexes_val is None or isinstance(skill_indexes_val, list), "Validation Failed: 'skill_indexes' is not a list or None"
if isinstance(skill_indexes_val, list):
for index in skill_indexes_val:
assert isinstance(index, int), f"Validation Failed: Skill index '{index}' is not an integer"
# --- End Basic Validation ---
return structured_result
def post(self, shared, prep_res, exec_res):
"""Store structured data and print it."""
shared["structured_data"] = exec_res
print("\n=== STRUCTURED RESUME DATA (Comments & Skill Index List) ===\n")
# Dump YAML ensuring block style for readability
print(yaml.dump(exec_res, sort_keys=False, allow_unicode=True, default_flow_style=None))
print("\n============================================================\n")
print("✅ Extracted resume information.")
# === Main Execution Logic ===
if __name__ == "__main__":
print("=== Resume Parser - Structured Output with Indexes & Comments ===\n")
# --- Configuration ---
target_skills_to_find = [
"Team leadership & management", # 0
"CRM software", # 1
"Project management", # 2
"Public speaking", # 3
"Microsoft Office", # 4
"Python", # 5
"Data Analysis" # 6
]
resume_file = 'data.txt' # Assumes data.txt contains the resume
# --- Prepare Shared State ---
shared = {}
try:
with open(resume_file, 'r') as file:
shared["resume_text"] = file.read()
except FileNotFoundError:
print(f"Error: Resume file '{resume_file}' not found.")
exit(1) # Exit if resume file is missing
shared["target_skills"] = target_skills_to_find
# --- Define and Run Flow ---
parser_node = ResumeParserNode(max_retries=3, wait=10)
flow = Flow(start=parser_node)
flow.run(shared) # Execute the parsing node
# --- Display Found Skills ---
if "structured_data" in shared and "skill_indexes" in shared["structured_data"]:
print("\n--- Found Target Skills (from Indexes) ---")
found_indexes = shared["structured_data"]["skill_indexes"]
if found_indexes: # Check if the list is not empty or None
for index in found_indexes:
if 0 <= index < len(target_skills_to_find):
print(f"- {target_skills_to_find[index]} (Index: {index})")
else:
print(f"- Warning: Found invalid skill index {index}")
else:
print("No target skills identified from the list.")
print("----------------------------------------\n")