@@ -573,6 +573,7 @@ def extract_bank_statement_hybrid(text, transactions_payload=None, save_json_pat
573573 Extract the following information in JSON format:
574574
575575 1. account_summary:
576+ - account_holder_name: Full name of the account holder (look for "Account Holder", "Customer Name", "Name", etc.)
576577 - opening_balance
577578 - closing_balance
578579 - total_inflows
@@ -2035,101 +2036,151 @@ def build_prefill_from_insights(qid: int) -> dict:
20352036 except Exception :
20362037 pass
20372038
2039+ # Scan uploads for insurance prefill - need to read insurance_type from metadata
20382040 insurance_prefill = {}
20392041 try :
2040- sum_val = ins .get ("sum_assured_or_insured" )
2041- ins_type = str (ins .get ("insurance_type" ) or "" ).lower ()
2042- if isinstance (sum_val , (int , float )) and sum_val > 0 :
2043- if "life" in ins_type or "term" in ins_type or "ulip" in ins_type :
2044- insurance_prefill ["life_cover" ] = float (sum_val )
2045- elif "health" in ins_type or "mediclaim" in ins_type :
2046- insurance_prefill ["health_cover" ] = float (sum_val )
2047- else :
2048- # Unknown type: default to life_cover; user can adjust
2042+ uploads = list_questionnaire_uploads (qid ) or []
2043+ for upload in uploads :
2044+ doc_type = (upload ["doc_type" ] or "" ).lower ()
2045+ if "insurance" in doc_type :
2046+ metadata_json = upload ["metadata_json" ]
2047+ if metadata_json :
2048+ try :
2049+ metadata = json .loads (metadata_json )
2050+ ins_type = str (metadata .get ("insurance_type" ) or "" ).lower ()
2051+ sum_val = metadata .get ("sum_assured_or_insured" )
2052+
2053+ # Also check aggregated insights if not in metadata
2054+ if sum_val is None or sum_val == "N/A" :
2055+ sum_val = ins .get ("sum_assured_or_insured" )
2056+
2057+ if isinstance (sum_val , (int , float )) and sum_val > 0 :
2058+ if "health" in ins_type or "mediclaim" in ins_type :
2059+ # Add to health cover (may have multiple health policies)
2060+ existing_health = insurance_prefill .get ("health_cover" , 0.0 )
2061+ insurance_prefill ["health_cover" ] = existing_health + float (sum_val )
2062+ elif "life" in ins_type or "term" in ins_type or "ulip" in ins_type :
2063+ # Add to life cover
2064+ existing_life = insurance_prefill .get ("life_cover" , 0.0 )
2065+ insurance_prefill ["life_cover" ] = existing_life + float (sum_val )
2066+ else :
2067+ # Unknown type: default to life_cover
2068+ existing_life = insurance_prefill .get ("life_cover" , 0.0 )
2069+ insurance_prefill ["life_cover" ] = existing_life + float (sum_val )
2070+ except Exception :
2071+ continue
2072+
2073+ # Fallback: if no metadata found, use aggregated insights
2074+ if not insurance_prefill :
2075+ sum_val = ins .get ("sum_assured_or_insured" )
2076+ if isinstance (sum_val , (int , float )) and sum_val > 0 :
2077+ # Default to life_cover when type unknown
20492078 insurance_prefill ["life_cover" ] = float (sum_val )
20502079 except Exception :
20512080 pass
20522081
2053- # Personal info extraction from raw document extracts
2082+ # Personal info extraction - priority order:
2083+ # 1. Bank statement account_holder_name (most reliable, directly from user's bank)
2084+ # 2. CAS investor_name
2085+ # 3. Insurance policy_holder
2086+ # 4. ITR assessee_name (least reliable - may pick up father's name field)
20542087 personal_info = {}
20552088 try :
2056- raw_extracts = di .get ("raw_extracts" ) or []
20572089 uploads = list_questionnaire_uploads (qid ) or []
20582090
2059- # Create a mapping of document_id to doc_type
2060- doc_type_map = {}
2091+ # First pass: look for bank statement (highest priority for name)
20612092 for upload in uploads :
2062- doc_id = upload ["document_id" ]
2063- if doc_id :
2064- doc_type_map [doc_id ] = upload ["doc_type" ] or ""
2093+ if personal_info .get ("name" ):
2094+ break
2095+ doc_type = (upload ["doc_type" ] or "" ).lower ()
2096+ if "bank" in doc_type :
2097+ metadata_json = upload ["metadata_json" ]
2098+ if metadata_json :
2099+ try :
2100+ metadata = json .loads (metadata_json )
2101+ account_holder = metadata .get ("account_holder_name" )
2102+ if account_holder and account_holder != "N/A" and len (str (account_holder )) > 2 :
2103+ personal_info ["name" ] = str (account_holder ).strip ().title ()
2104+ except Exception :
2105+ continue
20652106
2066- # Scan through raw extracts for personal info
2067- for extract in raw_extracts :
2068- if personal_info .get ("name" ) and personal_info .get ("age" ):
2069- break # Already have both
2070-
2071- doc_id = extract .get ("document_id" )
2072- summary = extract .get ("summary" ) or {}
2073- doc_type = doc_type_map .get (doc_id , "" ).lower ()
2074-
2075- # Additional data might be stored in document-level extraction (not just summary)
2076- # We need to re-extract from the uploaded documents
2107+ # Second pass: look for CAS investor_name
2108+ if not personal_info .get ("name" ):
2109+ for upload in uploads :
2110+ if personal_info .get ("name" ):
2111+ break
2112+ doc_type = (upload ["doc_type" ] or "" ).lower ()
2113+ if "cas" in doc_type or "mutual fund" in doc_type :
2114+ metadata_json = upload ["metadata_json" ]
2115+ if metadata_json :
2116+ try :
2117+ metadata = json .loads (metadata_json )
2118+ cas_data = metadata .get ("cas_data" ) or {}
2119+ investor_name = metadata .get ("investor_name" ) or cas_data .get ("investor_name" )
2120+ if investor_name and investor_name != "N/A" and len (investor_name ) > 2 :
2121+ personal_info ["name" ] = investor_name .strip ().title ()
2122+ except Exception :
2123+ continue
2124+
2125+ # Third pass: look for Insurance policy_holder
2126+ if not personal_info .get ("name" ):
2127+ for upload in uploads :
2128+ if personal_info .get ("name" ):
2129+ break
2130+ doc_type = (upload ["doc_type" ] or "" ).lower ()
2131+ if "insurance" in doc_type :
2132+ metadata_json = upload ["metadata_json" ]
2133+ if metadata_json :
2134+ try :
2135+ metadata = json .loads (metadata_json )
2136+ policy_holder = metadata .get ("policy_holder" )
2137+ if policy_holder and policy_holder != "N/A" and len (policy_holder ) > 2 :
2138+ personal_info ["name" ] = policy_holder .strip ().title ()
2139+ except Exception :
2140+ continue
2141+
2142+ # Fourth pass: look for ITR assessee_name (lowest priority)
2143+ if not personal_info .get ("name" ):
2144+ for upload in uploads :
2145+ if personal_info .get ("name" ):
2146+ break
2147+ doc_type = (upload ["doc_type" ] or "" ).lower ()
2148+ if "itr" in doc_type :
2149+ metadata_json = upload ["metadata_json" ]
2150+ if metadata_json :
2151+ try :
2152+ metadata = json .loads (metadata_json )
2153+ assessee_name = metadata .get ("assessee_name" )
2154+ if assessee_name and assessee_name != "N/A" and len (assessee_name ) > 2 :
2155+ personal_info ["name" ] = assessee_name .strip ().title ()
2156+ except Exception :
2157+ continue
20772158
2078- # If not found in raw_extracts, scan uploaded document metadata for personal info
2159+ # Extract age from date_of_birth (from any document)
20792160 for upload in uploads :
2080- if personal_info .get ("name" ) and personal_info . get ( " age" ):
2161+ if personal_info .get ("age" ):
20812162 break
2082-
2083- doc_type = (upload ["doc_type" ] or "" ).lower ()
20842163 metadata_json = upload ["metadata_json" ]
2085-
20862164 if metadata_json :
20872165 try :
20882166 metadata = json .loads (metadata_json )
2089-
2090- # Check CAS data for investor_name
2091- cas_data = metadata .get ("cas_data" ) or {}
2092- if not personal_info .get ("name" ):
2093- investor_name = metadata .get ("investor_name" ) or cas_data .get ("investor_name" )
2094- if investor_name and investor_name != "N/A" and len (investor_name ) > 2 :
2095- personal_info ["name" ] = investor_name .strip ().title ()
2096-
2097- # Check for extracted personal details in metadata
2098- if not personal_info .get ("name" ):
2099- # Insurance policy_holder
2100- policy_holder = metadata .get ("policy_holder" )
2101- if policy_holder and policy_holder != "N/A" and len (policy_holder ) > 2 :
2102- personal_info ["name" ] = policy_holder .strip ().title ()
2103-
2104- if not personal_info .get ("name" ):
2105- # ITR assessee_name
2106- assessee_name = metadata .get ("assessee_name" )
2107- if assessee_name and assessee_name != "N/A" and len (assessee_name ) > 2 :
2108- personal_info ["name" ] = assessee_name .strip ().title ()
2109-
2110- # Try to extract age from date_of_birth
2111- if not personal_info .get ("age" ):
2112- dob = metadata .get ("date_of_birth" )
2113- if dob and dob != "N/A" :
2167+ dob = metadata .get ("date_of_birth" )
2168+ if dob and dob != "N/A" :
2169+ from datetime import datetime
2170+ for fmt in ["%d/%m/%Y" , "%d-%m-%Y" , "%d/%m/%y" , "%d-%m-%y" ]:
21142171 try :
2115- # Parse DOB and calculate age
2116- from datetime import datetime
2117- for fmt in ["%d/%m/%Y" , "%d-%m-%Y" , "%d/%m/%y" , "%d-%m-%y" ]:
2118- try :
2119- birth_date = datetime .strptime (dob , fmt )
2120- if birth_date .year < 100 : # Handle 2-digit years
2121- birth_date = birth_date .replace (year = birth_date .year + 1900 )
2122- today = datetime .now ()
2123- age = today .year - birth_date .year
2124- if (today .month , today .day ) < (birth_date .month , birth_date .day ):
2125- age -= 1
2126- if 18 <= age <= 100 :
2127- personal_info ["age" ] = age
2128- break
2129- except ValueError :
2130- continue
2131- except Exception :
2132- pass
2172+ birth_date = datetime .strptime (dob , fmt )
2173+ if birth_date .year < 100 : # Handle 2-digit years
2174+ birth_date = birth_date .replace (year = birth_date .year + 1900 )
2175+ today = datetime .now ()
2176+ age = today .year - birth_date .year
2177+ if (today .month , today .day ) < (birth_date .month , birth_date .day ):
2178+ age -= 1
2179+ if 18 <= age <= 100 :
2180+ personal_info ["age" ] = age
2181+ break
2182+ except ValueError :
2183+ continue
21332184 except Exception :
21342185 continue
21352186 except Exception :
@@ -2263,6 +2314,7 @@ def upload_document():
22632314 try :
22642315 bank_metadata = {
22652316 "size_bytes" : len (file_bytes ),
2317+ "account_holder_name" : (bank_data .get ("account_summary" ) or {}).get ("account_holder_name" ),
22662318 "bank_data" : {
22672319 "account_summary" : bank_data .get ("account_summary" , {}),
22682320 "recurring_debits" : bank_data .get ("recurring_debits" , []),
@@ -2309,9 +2361,11 @@ def upload_document():
23092361 metadata_update ["date_of_birth" ] = other_data .get ("date_of_birth" )
23102362 metadata_update ["pan" ] = other_data .get ("pan" )
23112363
2312- # Insurance metadata - store policy holder
2364+ # Insurance metadata - store policy holder and insurance_type
23132365 elif doc_type == "Insurance document" :
23142366 metadata_update ["policy_holder" ] = other_data .get ("policy_holder" )
2367+ metadata_update ["insurance_type" ] = other_data .get ("insurance_type" )
2368+ metadata_update ["sum_assured_or_insured" ] = other_data .get ("sum_assured_or_insured" )
23152369 metadata_update ["date_of_birth" ] = other_data .get ("date_of_birth" )
23162370
23172371 update_questionnaire_upload_metadata (upload_link_ids [idx ], metadata_update )
@@ -3572,6 +3626,26 @@ def header(canvas, doc):
35723626def footer (canvas , doc ):
35733627 canvas .saveState ()
35743628 styles = get_custom_styles ()
3629+
3630+ # Educational disclaimer - displayed on every page
3631+ disclaimer_text = (
3632+ "This is an educational analysis tool, not financial advice. "
3633+ "This report is for informational purposes only. "
3634+ "Consult a SEBI-registered Investment Advisor before making decisions. "
3635+ "We do not recommend specific securities or products."
3636+ )
3637+ disclaimer_style = ParagraphStyle (
3638+ 'Disclaimer' ,
3639+ fontSize = 6 ,
3640+ alignment = TA_CENTER ,
3641+ textColor = colors .grey ,
3642+ leading = 8
3643+ )
3644+ disclaimer = Paragraph (disclaimer_text , disclaimer_style )
3645+ dw , dh = disclaimer .wrap (doc .width , doc .bottomMargin )
3646+ disclaimer .drawOn (canvas , doc .leftMargin , dh + 15 )
3647+
3648+ # Page number
35753649 p = Paragraph (f"Page { doc .page } " , styles ['Footer' ])
35763650 w , h = p .wrap (doc .width , doc .bottomMargin )
35773651 p .drawOn (canvas , doc .leftMargin , h )
0 commit comments