Skip to content

Commit 93a7244

Browse files
committed
updates
1 parent 6f564aa commit 93a7244

53 files changed

Lines changed: 1004 additions & 211 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ __pycache__/
1515
LogFiles/
1616

1717
# Deployment artifacts
18-
deployments/
18+
deployments/
19+
20+
sample_docs/

app.py

Lines changed: 150 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ def extract_bank_statement_hybrid(text, transactions_payload=None, save_json_pat
573573
Extract the following information in JSON format:
574574
575575
1. account_summary:
576+
- account_holder_name: Full name of the account holder (look for "Account Holder", "Customer Name", "Name", etc.)
576577
- opening_balance
577578
- closing_balance
578579
- total_inflows
@@ -2035,101 +2036,151 @@ def build_prefill_from_insights(qid: int) -> dict:
20352036
except Exception:
20362037
pass
20372038

2039+
# Scan uploads for insurance prefill - need to read insurance_type from metadata
20382040
insurance_prefill = {}
20392041
try:
2040-
sum_val = ins.get("sum_assured_or_insured")
2041-
ins_type = str(ins.get("insurance_type") or "").lower()
2042-
if isinstance(sum_val, (int, float)) and sum_val > 0:
2043-
if "life" in ins_type or "term" in ins_type or "ulip" in ins_type:
2044-
insurance_prefill["life_cover"] = float(sum_val)
2045-
elif "health" in ins_type or "mediclaim" in ins_type:
2046-
insurance_prefill["health_cover"] = float(sum_val)
2047-
else:
2048-
# Unknown type: default to life_cover; user can adjust
2042+
uploads = list_questionnaire_uploads(qid) or []
2043+
for upload in uploads:
2044+
doc_type = (upload["doc_type"] or "").lower()
2045+
if "insurance" in doc_type:
2046+
metadata_json = upload["metadata_json"]
2047+
if metadata_json:
2048+
try:
2049+
metadata = json.loads(metadata_json)
2050+
ins_type = str(metadata.get("insurance_type") or "").lower()
2051+
sum_val = metadata.get("sum_assured_or_insured")
2052+
2053+
# Also check aggregated insights if not in metadata
2054+
if sum_val is None or sum_val == "N/A":
2055+
sum_val = ins.get("sum_assured_or_insured")
2056+
2057+
if isinstance(sum_val, (int, float)) and sum_val > 0:
2058+
if "health" in ins_type or "mediclaim" in ins_type:
2059+
# Add to health cover (may have multiple health policies)
2060+
existing_health = insurance_prefill.get("health_cover", 0.0)
2061+
insurance_prefill["health_cover"] = existing_health + float(sum_val)
2062+
elif "life" in ins_type or "term" in ins_type or "ulip" in ins_type:
2063+
# Add to life cover
2064+
existing_life = insurance_prefill.get("life_cover", 0.0)
2065+
insurance_prefill["life_cover"] = existing_life + float(sum_val)
2066+
else:
2067+
# Unknown type: default to life_cover
2068+
existing_life = insurance_prefill.get("life_cover", 0.0)
2069+
insurance_prefill["life_cover"] = existing_life + float(sum_val)
2070+
except Exception:
2071+
continue
2072+
2073+
# Fallback: if no metadata found, use aggregated insights
2074+
if not insurance_prefill:
2075+
sum_val = ins.get("sum_assured_or_insured")
2076+
if isinstance(sum_val, (int, float)) and sum_val > 0:
2077+
# Default to life_cover when type unknown
20492078
insurance_prefill["life_cover"] = float(sum_val)
20502079
except Exception:
20512080
pass
20522081

2053-
# Personal info extraction from raw document extracts
2082+
# Personal info extraction - priority order:
2083+
# 1. Bank statement account_holder_name (most reliable, directly from user's bank)
2084+
# 2. CAS investor_name
2085+
# 3. Insurance policy_holder
2086+
# 4. ITR assessee_name (least reliable - may pick up father's name field)
20542087
personal_info = {}
20552088
try:
2056-
raw_extracts = di.get("raw_extracts") or []
20572089
uploads = list_questionnaire_uploads(qid) or []
20582090

2059-
# Create a mapping of document_id to doc_type
2060-
doc_type_map = {}
2091+
# First pass: look for bank statement (highest priority for name)
20612092
for upload in uploads:
2062-
doc_id = upload["document_id"]
2063-
if doc_id:
2064-
doc_type_map[doc_id] = upload["doc_type"] or ""
2093+
if personal_info.get("name"):
2094+
break
2095+
doc_type = (upload["doc_type"] or "").lower()
2096+
if "bank" in doc_type:
2097+
metadata_json = upload["metadata_json"]
2098+
if metadata_json:
2099+
try:
2100+
metadata = json.loads(metadata_json)
2101+
account_holder = metadata.get("account_holder_name")
2102+
if account_holder and account_holder != "N/A" and len(str(account_holder)) > 2:
2103+
personal_info["name"] = str(account_holder).strip().title()
2104+
except Exception:
2105+
continue
20652106

2066-
# Scan through raw extracts for personal info
2067-
for extract in raw_extracts:
2068-
if personal_info.get("name") and personal_info.get("age"):
2069-
break # Already have both
2070-
2071-
doc_id = extract.get("document_id")
2072-
summary = extract.get("summary") or {}
2073-
doc_type = doc_type_map.get(doc_id, "").lower()
2074-
2075-
# Additional data might be stored in document-level extraction (not just summary)
2076-
# We need to re-extract from the uploaded documents
2107+
# Second pass: look for CAS investor_name
2108+
if not personal_info.get("name"):
2109+
for upload in uploads:
2110+
if personal_info.get("name"):
2111+
break
2112+
doc_type = (upload["doc_type"] or "").lower()
2113+
if "cas" in doc_type or "mutual fund" in doc_type:
2114+
metadata_json = upload["metadata_json"]
2115+
if metadata_json:
2116+
try:
2117+
metadata = json.loads(metadata_json)
2118+
cas_data = metadata.get("cas_data") or {}
2119+
investor_name = metadata.get("investor_name") or cas_data.get("investor_name")
2120+
if investor_name and investor_name != "N/A" and len(investor_name) > 2:
2121+
personal_info["name"] = investor_name.strip().title()
2122+
except Exception:
2123+
continue
2124+
2125+
# Third pass: look for Insurance policy_holder
2126+
if not personal_info.get("name"):
2127+
for upload in uploads:
2128+
if personal_info.get("name"):
2129+
break
2130+
doc_type = (upload["doc_type"] or "").lower()
2131+
if "insurance" in doc_type:
2132+
metadata_json = upload["metadata_json"]
2133+
if metadata_json:
2134+
try:
2135+
metadata = json.loads(metadata_json)
2136+
policy_holder = metadata.get("policy_holder")
2137+
if policy_holder and policy_holder != "N/A" and len(policy_holder) > 2:
2138+
personal_info["name"] = policy_holder.strip().title()
2139+
except Exception:
2140+
continue
2141+
2142+
# Fourth pass: look for ITR assessee_name (lowest priority)
2143+
if not personal_info.get("name"):
2144+
for upload in uploads:
2145+
if personal_info.get("name"):
2146+
break
2147+
doc_type = (upload["doc_type"] or "").lower()
2148+
if "itr" in doc_type:
2149+
metadata_json = upload["metadata_json"]
2150+
if metadata_json:
2151+
try:
2152+
metadata = json.loads(metadata_json)
2153+
assessee_name = metadata.get("assessee_name")
2154+
if assessee_name and assessee_name != "N/A" and len(assessee_name) > 2:
2155+
personal_info["name"] = assessee_name.strip().title()
2156+
except Exception:
2157+
continue
20772158

2078-
# If not found in raw_extracts, scan uploaded document metadata for personal info
2159+
# Extract age from date_of_birth (from any document)
20792160
for upload in uploads:
2080-
if personal_info.get("name") and personal_info.get("age"):
2161+
if personal_info.get("age"):
20812162
break
2082-
2083-
doc_type = (upload["doc_type"] or "").lower()
20842163
metadata_json = upload["metadata_json"]
2085-
20862164
if metadata_json:
20872165
try:
20882166
metadata = json.loads(metadata_json)
2089-
2090-
# Check CAS data for investor_name
2091-
cas_data = metadata.get("cas_data") or {}
2092-
if not personal_info.get("name"):
2093-
investor_name = metadata.get("investor_name") or cas_data.get("investor_name")
2094-
if investor_name and investor_name != "N/A" and len(investor_name) > 2:
2095-
personal_info["name"] = investor_name.strip().title()
2096-
2097-
# Check for extracted personal details in metadata
2098-
if not personal_info.get("name"):
2099-
# Insurance policy_holder
2100-
policy_holder = metadata.get("policy_holder")
2101-
if policy_holder and policy_holder != "N/A" and len(policy_holder) > 2:
2102-
personal_info["name"] = policy_holder.strip().title()
2103-
2104-
if not personal_info.get("name"):
2105-
# ITR assessee_name
2106-
assessee_name = metadata.get("assessee_name")
2107-
if assessee_name and assessee_name != "N/A" and len(assessee_name) > 2:
2108-
personal_info["name"] = assessee_name.strip().title()
2109-
2110-
# Try to extract age from date_of_birth
2111-
if not personal_info.get("age"):
2112-
dob = metadata.get("date_of_birth")
2113-
if dob and dob != "N/A":
2167+
dob = metadata.get("date_of_birth")
2168+
if dob and dob != "N/A":
2169+
from datetime import datetime
2170+
for fmt in ["%d/%m/%Y", "%d-%m-%Y", "%d/%m/%y", "%d-%m-%y"]:
21142171
try:
2115-
# Parse DOB and calculate age
2116-
from datetime import datetime
2117-
for fmt in ["%d/%m/%Y", "%d-%m-%Y", "%d/%m/%y", "%d-%m-%y"]:
2118-
try:
2119-
birth_date = datetime.strptime(dob, fmt)
2120-
if birth_date.year < 100: # Handle 2-digit years
2121-
birth_date = birth_date.replace(year=birth_date.year + 1900)
2122-
today = datetime.now()
2123-
age = today.year - birth_date.year
2124-
if (today.month, today.day) < (birth_date.month, birth_date.day):
2125-
age -= 1
2126-
if 18 <= age <= 100:
2127-
personal_info["age"] = age
2128-
break
2129-
except ValueError:
2130-
continue
2131-
except Exception:
2132-
pass
2172+
birth_date = datetime.strptime(dob, fmt)
2173+
if birth_date.year < 100: # Handle 2-digit years
2174+
birth_date = birth_date.replace(year=birth_date.year + 1900)
2175+
today = datetime.now()
2176+
age = today.year - birth_date.year
2177+
if (today.month, today.day) < (birth_date.month, birth_date.day):
2178+
age -= 1
2179+
if 18 <= age <= 100:
2180+
personal_info["age"] = age
2181+
break
2182+
except ValueError:
2183+
continue
21332184
except Exception:
21342185
continue
21352186
except Exception:
@@ -2263,6 +2314,7 @@ def upload_document():
22632314
try:
22642315
bank_metadata = {
22652316
"size_bytes": len(file_bytes),
2317+
"account_holder_name": (bank_data.get("account_summary") or {}).get("account_holder_name"),
22662318
"bank_data": {
22672319
"account_summary": bank_data.get("account_summary", {}),
22682320
"recurring_debits": bank_data.get("recurring_debits", []),
@@ -2309,9 +2361,11 @@ def upload_document():
23092361
metadata_update["date_of_birth"] = other_data.get("date_of_birth")
23102362
metadata_update["pan"] = other_data.get("pan")
23112363

2312-
# Insurance metadata - store policy holder
2364+
# Insurance metadata - store policy holder and insurance_type
23132365
elif doc_type == "Insurance document":
23142366
metadata_update["policy_holder"] = other_data.get("policy_holder")
2367+
metadata_update["insurance_type"] = other_data.get("insurance_type")
2368+
metadata_update["sum_assured_or_insured"] = other_data.get("sum_assured_or_insured")
23152369
metadata_update["date_of_birth"] = other_data.get("date_of_birth")
23162370

23172371
update_questionnaire_upload_metadata(upload_link_ids[idx], metadata_update)
@@ -3572,6 +3626,26 @@ def header(canvas, doc):
35723626
def footer(canvas, doc):
35733627
canvas.saveState()
35743628
styles = get_custom_styles()
3629+
3630+
# Educational disclaimer - displayed on every page
3631+
disclaimer_text = (
3632+
"This is an educational analysis tool, not financial advice. "
3633+
"This report is for informational purposes only. "
3634+
"Consult a SEBI-registered Investment Advisor before making decisions. "
3635+
"We do not recommend specific securities or products."
3636+
)
3637+
disclaimer_style = ParagraphStyle(
3638+
'Disclaimer',
3639+
fontSize=6,
3640+
alignment=TA_CENTER,
3641+
textColor=colors.grey,
3642+
leading=8
3643+
)
3644+
disclaimer = Paragraph(disclaimer_text, disclaimer_style)
3645+
dw, dh = disclaimer.wrap(doc.width, doc.bottomMargin)
3646+
disclaimer.drawOn(canvas, doc.leftMargin, dh + 15)
3647+
3648+
# Page number
35753649
p = Paragraph(f"Page {doc.page}", styles['Footer'])
35763650
w, h = p.wrap(doc.width, doc.bottomMargin)
35773651
p.drawOn(canvas, doc.leftMargin, h)

0 commit comments

Comments
 (0)