-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinspect_data.py
More file actions
105 lines (91 loc) · 4.79 KB
/
inspect_data.py
File metadata and controls
105 lines (91 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import sqlite3
import json
def inspect_data(db_path):
"""Inspects the contents of the screening database with detailed error handling."""
try:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
# Check if tables exist
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = [row['name'] for row in cursor.fetchall()]
print(f"\nFound tables: {tables}")
print("\n=== Job Description Data ===")
if 'job_descriptions' in tables:
cursor.execute("SELECT COUNT(*) as count FROM job_descriptions")
total_jds = cursor.fetchone()['count']
print(f"Total JDs in database: {total_jds}")
cursor.execute("SELECT COUNT(*) as count FROM job_descriptions WHERE summary_json IS NOT NULL")
processed_jds = cursor.fetchone()['count']
print(f"JDs with processed summaries: {processed_jds}")
if processed_jds > 0:
cursor.execute("SELECT jd_id, title, summary_json FROM job_descriptions WHERE summary_json IS NOT NULL LIMIT 1")
jd = cursor.fetchone()
if jd:
print(f"\nSample JD (ID: {jd['jd_id']}, Title: {jd['title']}):")
try:
summary = json.loads(jd['summary_json'])
print("Summary Data:")
print(json.dumps(summary, indent=2))
except json.JSONDecodeError as e:
print(f"Error decoding summary JSON: {e}")
print("Raw data:", jd['summary_json'][:200])
print("\n=== Candidate Data ===")
if 'candidates' in tables:
cursor.execute("SELECT COUNT(*) as count FROM candidates")
total_candidates = cursor.fetchone()['count']
print(f"Total candidates in database: {total_candidates}")
cursor.execute("SELECT COUNT(*) as count FROM candidates WHERE extracted_data_json IS NOT NULL")
processed_candidates = cursor.fetchone()['count']
print(f"Candidates with processed data: {processed_candidates}")
if processed_candidates > 0:
cursor.execute("SELECT candidate_id, cv_filename, extracted_data_json FROM candidates WHERE extracted_data_json IS NOT NULL LIMIT 1")
cand = cursor.fetchone()
if cand:
print(f"\nSample Candidate (ID: {cand['candidate_id']}, File: {cand['cv_filename']}):")
try:
data = json.loads(cand['extracted_data_json'])
print("Extracted Data:")
print(json.dumps(data, indent=2))
except json.JSONDecodeError as e:
print(f"Error decoding extracted data JSON: {e}")
print("Raw data:", cand['extracted_data_json'][:200])
print("\n=== Match Data ===")
if 'matches' in tables:
cursor.execute("SELECT COUNT(*) as count FROM matches")
total_matches = cursor.fetchone()['count']
print(f"Total matches in database: {total_matches}")
cursor.execute("SELECT COUNT(*) as count FROM matches WHERE match_score >= 0.75") # Assuming 0.75 threshold
shortlisted = cursor.fetchone()['count']
print(f"Matches above 0.75 threshold: {shortlisted}")
if total_matches > 0:
cursor.execute("""
SELECT m.*, j.title as job_title, c.cv_filename
FROM matches m
JOIN job_descriptions j ON m.jd_id = j.jd_id
JOIN candidates c ON m.candidate_id = c.candidate_id
ORDER BY m.match_score DESC
LIMIT 3
""")
print("\nTop 3 Matches:")
for match in cursor.fetchall():
print(f"\nJob: {match['job_title']}")
print(f"Candidate: {match['cv_filename']}")
print(f"Score: {match['match_score']}")
try:
if match['match_details_json']:
details = json.loads(match['match_details_json'])
print("Match Details:")
print(json.dumps(details, indent=2))
except json.JSONDecodeError as e:
print(f"Error decoding match details: {e}")
conn.close()
except sqlite3.Error as e:
print(f"SQLite error: {e}")
except Exception as e:
print(f"An error occurred while inspecting the database: {e}")
if 'conn' in locals():
conn.close()
if __name__ == "__main__":
db_path = "data/screening_database.sqlite"
inspect_data(db_path)