Skip to content

Commit 22d0156

Browse files
committed
hopeful fix
1 parent 6913505 commit 22d0156

5 files changed

Lines changed: 55 additions & 8 deletions

app.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -756,14 +756,39 @@ def extract_bank_statement_hybrid(text, transactions_payload=None, save_json_pat
756756
- average_monthly_balance
757757
758758
2. recurring_credits: Array of objects with description, amount, frequency, dates
759-
3. recurring_debits: Array of objects with description, amount, frequency, dates
759+
760+
3. recurring_debits: Array of objects with:
761+
- description: Transaction description
762+
- amount: Numeric amount (no currency symbols)
763+
- frequency: Monthly/Quarterly/Yearly/Ad-hoc
764+
- dates: Array of dates when this recurring debit occurred
765+
- is_emi: Boolean - true if this is an EMI/loan payment, false otherwise
766+
767+
**EMI IDENTIFICATION RULES (IMPORTANT):**
768+
Mark is_emi=true for transactions that match ANY of these patterns:
769+
- Contains "EMI", "LOAN", "LN", "MORTGAGE", "INSTALMENT", "INSTALLMENT", "REPAYMENT"
770+
- Contains bank-specific loan codes: "ELM" (ICICI EMI), "HDFC LN", "SBI LN", "AXIS LN"
771+
- Contains "NACH" (National Automated Clearing House - often used for auto-debit EMIs/loans)
772+
- Contains "SI/" or "STANDING INSTRUCTION" for loan payments
773+
- Contains "ECS" (Electronic Clearing Service) for recurring loan debits
774+
- Contains "AUTO DEBIT" or "AUTODEBIT" for loan/EMI payments
775+
- Contains "HOME LOAN", "CAR LOAN", "PERSONAL LOAN", "VEHICLE LOAN", "HOUSING LOAN", "EDUCATION LOAN"
776+
- Same amount debited on or around the same date each month (±3 days) - likely an EMI
777+
778+
Common EMI transaction patterns in Indian banks:
779+
- ICICI: "BIL/INFT/ELM...", "NACH/..."
780+
- HDFC: "HDFC LN...", "NACH/HDFC..."
781+
- SBI: "EMI DED/...", "NACH/SBI..."
782+
- Axis: "AXIS LN...", "NACH/AXIS..."
783+
760784
4. high_value_transactions: Array with date, description, type, amount (threshold: 100000)
761785
5. bounce_penalty_charges: Array with date, description, amount
762786
763787
Notes:
764788
- Use the structured JSON transactions as the source of truth when present.
765789
- Use exact numeric values; do not include currency symbols.
766790
- Frequency can be Monthly/Quarterly/Yearly/Ad-hoc.
791+
- For recurring_debits, ALWAYS include the is_emi boolean field.
767792
768793
Structured Transactions JSON (optional):
769794
{tx_json_str if tx_json_str else "<none>"}
@@ -986,7 +1011,7 @@ def extract_insurance_hybrid(text):
9861011
data = {}
9871012

9881013
is_life_insurance = bool(re.search(r"(?i)(life\s+insurance|term\s+plan|endowment|ULIP|whole\s+life)", text))
989-
is_health_insurance = bool(re.search(r"(?i)(health\s+insurance|mediclaim|medical\s+insurance|hospitali[sz]ation|health\s*cover|in-?patient\s+treatment|annual\s+sum\s+insured|sum\s+insured|health\s+advantedge|health\s+plan|health\s+policy)", text))
1014+
is_health_insurance = bool(re.search(r"(?i)(health\s+insurance|mediclaim|medical\s+insurance|hospitali[sz]ation|health\s*cover|in-?patient\s+treatment|annual\s+sum\s+insured|sum\s+insured|health\s+advantedge|health\s+plan|health\s+policy|medicare\s*premier|tata\s*aig\s*medicare)", text))
9901015
is_general_insurance = bool(re.search(r"(?i)(motor\s+insurance|vehicle\s+insurance|property\s+insurance|home\s+insurance|fire\s+insurance)", text))
9911016

9921017
# Debug logging
@@ -1006,7 +1031,8 @@ def extract_insurance_hybrid(text):
10061031
patterns = {
10071032
"policy_number": [
10081033
r"(?i)Policy\s*(?:No\.?|Number)[\s:\-]*([A-Z0-9\-/]{6,25})",
1009-
r"(?i)Policy[\s:\-]*([A-Z0-9\-/]{6,25})"
1034+
r"(?i)Policy[\s:\-]*([A-Z0-9\-/]{6,25})",
1035+
r"(?i)Member\s*(?:ID|No\.?)[\s:\-]*([A-Z0-9]{10,25})" # TATA AIG uses Member ID
10101036
],
10111037
"insurer_name": [
10121038
r"(?i)(?:Insurer|Company|Insurance\s+Company)[\s:\-]*([A-Za-z\s&]+?)(?:\n|Ltd|Limited|Insurance)",
@@ -1086,6 +1112,8 @@ def _parse_indian_amount(num_str, suffix_str=""):
10861112
] if is_life_insurance else [
10871113
# Highest priority: "Annual Sum Insured" - this is the actual policy value
10881114
r"(?i)Annual\s+Sum\s+Insured[\s:\-\|]*(?:Rs\.?|₹)?\s*([\d,]+(?:\.\d+)?)\s*(Lakhs?|Lacs?|Crores?|Cr)?",
1115+
# TATA AIG format: "Sum Insured (₹)#" with value in same or next cell
1116+
r"(?i)Sum\s+Insured\s*\(?₹?\)?[#*]*[\s:\-\|]*([\d,]+(?:\.\d+)?)\s*(Lakhs?|Lacs?|Crores?|Cr)?",
10891117
# Next: general Sum Insured/Assured patterns
10901118
r"(?i)Sum\s*(?:Insured|Assured)[\s:\-]*(?:Rs\.?|₹)?\s*([\d,]+(?:\.\d+)?)\s*(Lakhs?|Lacs?|Crores?|Cr)?",
10911119
r"(?i)Cover(?:age)?\s*Amount[\s:\-]*(?:Rs\.?|₹)?\s*([\d,]+(?:\.\d+)?)\s*(Lakhs?|Lacs?|Crores?|Cr)?",
@@ -2321,7 +2349,21 @@ def build_prefill_from_insights(qid: int) -> dict:
23212349
try:
23222350
uploads = list_questionnaire_uploads(qid) or []
23232351
total_monthly_emi = 0.0
2324-
emi_keywords = ["emi", "loan", "mortgage", "instalment", "installment", "repayment", "home loan", "car loan", "personal loan", "vehicle loan", "housing loan"]
2352+
# Expanded EMI keywords including bank-specific codes
2353+
emi_keywords = [
2354+
"emi", "loan", "ln", "mortgage", "instalment", "installment", "repayment",
2355+
"home loan", "car loan", "personal loan", "vehicle loan", "housing loan", "education loan",
2356+
# Bank-specific codes
2357+
"elm", # ICICI EMI/Loan
2358+
"nach", # National Automated Clearing House (auto-debit EMIs)
2359+
"hdfc ln", "sbi ln", "icici ln", "axis ln", "kotak ln", # Bank loan prefixes
2360+
"si/", # Standing Instruction
2361+
"standing instruction",
2362+
"ecs", # Electronic Clearing Service
2363+
"auto debit", "autodebit",
2364+
"emi ded", # SBI pattern
2365+
"bil/inft/elm", # ICICI pattern
2366+
]
23252367
for upload in uploads:
23262368
if (upload["doc_type"] or "").lower() == "bank statement":
23272369
metadata_json = upload["metadata_json"]
@@ -2334,8 +2376,14 @@ def build_prefill_from_insights(qid: int) -> dict:
23342376
desc = (debit.get("description") or "").lower()
23352377
amount = debit.get("amount")
23362378
freq = (debit.get("frequency") or "").lower()
2337-
# Check if this is an EMI payment
2338-
if any(kw in desc for kw in emi_keywords):
2379+
is_emi = debit.get("is_emi", False)
2380+
2381+
# Check if this is an EMI payment:
2382+
# 1. LLM marked it as EMI (is_emi=True)
2383+
# 2. OR description matches EMI keywords
2384+
is_emi_payment = is_emi or any(kw in desc for kw in emi_keywords)
2385+
2386+
if is_emi_payment:
23392387
if isinstance(amount, (int, float)) and amount > 0:
23402388
# Convert to monthly if not already monthly
23412389
if freq in ("monthly", "month"):
@@ -2351,9 +2399,8 @@ def build_prefill_from_insights(qid: int) -> dict:
23512399
continue
23522400
if total_monthly_emi > 0:
23532401
lifestyle["monthly_emi"] = round(total_monthly_emi, 2)
2354-
logger.debug(f"Prefill extracted monthly_emi: {lifestyle['monthly_emi']}")
23552402
except Exception as e:
2356-
logger.warning(f"Prefill error extracting monthly_emi: {e}")
2403+
pass # Silently handle errors to avoid breaking prefill
23572404

23582405
allocation = {}
23592406
try:
-35.9 KB
Binary file not shown.
-38.4 KB
Binary file not shown.
-36.2 KB
Binary file not shown.
-42.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)