Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified form-flow-backend/chroma_db/chroma.sqlite3
Binary file not shown.
4 changes: 4 additions & 0 deletions form-flow-backend/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ def cors_origins_list(self) -> list:
# ==========================================================================
# Smart Question Engine Configuration
# ==========================================================================
ENABLE_AI: bool = Field(
default=True,
description="Enable AI features (disable for dev/testing to save quota)"
)
SMART_GROUPING_ENABLED: bool = Field(
default=True,
description="Enable Smart Question Grouping (reduces 159 fields to ~30 groups)"
Expand Down
2 changes: 1 addition & 1 deletion form-flow-backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ psutil
py-cpuinfo
seaborn>=0.11.0
elevenlabs==1.6.1
vosk>=0.3.45
vosk>=0.3.44
sqlalchemy
asyncpg
passlib[bcrypt]
Expand Down
50 changes: 37 additions & 13 deletions form-flow-backend/routers/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class FormSubmitRequest(BaseModel):
form_data: Dict[str, Any]
form_schema: List[Dict[str, Any]]
use_cdp: bool = False # If True, connect to user's browser via Chrome DevTools Protocol
human_like: bool = False # If True, use anti-detection human behavior

class ConversationalFlowRequest(BaseModel):
extracted_fields: Dict[str, str]
Expand Down Expand Up @@ -319,21 +320,25 @@ async def scrape_form(
print(f"⏱️ Scrape + process: {t2 - t1:.2f}s")

# ━━━ MAGIC FILL (non-blocking — runs in background) ━━━
auth_header = request.headers.get('Authorization')
if auth_header and auth_header.startswith('Bearer '):
# Fire-and-forget: run Magic Fill in background so /scrape returns instantly
background_tasks.add_task(
_run_magic_fill_background,
url, auth_header, processed_data['form_schema'], db, gemini_service
)
# ━━━ MAGIC FILL (non-blocking — runs in background) ━━━
if settings.ENABLE_AI:
auth_header = request.headers.get('Authorization')
if auth_header and auth_header.startswith('Bearer '):
# Fire-and-forget: run Magic Fill in background so /scrape returns instantly
background_tasks.add_task(
_run_magic_fill_background,
url, auth_header, processed_data['form_schema'], db, gemini_service
)
else:
print("ℹ️ Magic Fill skipped (ENABLE_AI=False)")

# ━━━ BUILD RESPONSE ━━━
response_data = {
"message": "Form scraped and analyzed successfully",
**processed_data,
"gemini_ready": gemini_service is not None,
"gemini_ready": gemini_service is not None and settings.ENABLE_AI,
"magic_fill_data": None, # Will be available via /magic-fill-result endpoint
"magic_fill_status": "processing" if auth_header and auth_header.startswith('Bearer ') else "skipped"
"magic_fill_status": "processing" if settings.ENABLE_AI and auth_header and auth_header.startswith('Bearer ') else "skipped"
}

# ━━━ CACHE RESULT (30 min TTL) ━━━
Expand Down Expand Up @@ -393,17 +398,21 @@ async def comprehensive_form_setup(

# Step 3: Generate initial conversational flow if requested
conversational_flow = None
if data.auto_generate_flow and gemini_service:
if data.auto_generate_flow and gemini_service and settings.ENABLE_AI:
flow_result = gemini_service.generate_conversational_flow({}, processed_data["form_schema"])
if flow_result["success"]:
conversational_flow = flow_result["conversational_flow"]
elif not settings.ENABLE_AI:
print("ℹ️ Conversational flow generation skipped (ENABLE_AI=False)")

return {
"message": "Form setup completed successfully",
**processed_data,
"conversational_flow": conversational_flow,
"ready_for_interaction": True,
"gemini_ready": gemini_service is not None
"conversational_flow": conversational_flow,
"ready_for_interaction": True,
"gemini_ready": gemini_service is not None and settings.ENABLE_AI
}

except Exception as e:
Expand Down Expand Up @@ -452,6 +461,9 @@ async def generate_conversational_flow(
):
"""Generate conversational flow based on extracted fields using Gemini API."""
try:
if not settings.ENABLE_AI:
raise HTTPException(status_code=400, detail="AI features are disabled")

if not gemini_service:
raise HTTPException(status_code=500, detail="Gemini API not configured")

Expand Down Expand Up @@ -607,6 +619,16 @@ async def magic_fill(
"summary": "Please sign in to use Magic Fill"
}

# Check if AI is enabled
if not settings.ENABLE_AI:
return {
"success": False,
"error": "AI features are disabled",
"filled": {},
"unfilled": [],
"summary": "AI features are currently disabled"
}

# 2. Call Smart Form Filler Chain
if not gemini_service:
raise HTTPException(status_code=500, detail="Gemini service not available")
Expand Down Expand Up @@ -659,14 +681,16 @@ async def submit_form(
url=data.url,
form_data=formatted_data,
form_schema=data.form_schema,
use_cdp=data.use_cdp
use_cdp=data.use_cdp,
human_like=data.human_like
)
else:
result = await form_submitter.submit_form_data(
url=data.url,
form_data=data.form_data,
form_schema=data.form_schema,
use_cdp=data.use_cdp
use_cdp=data.use_cdp,
human_like=data.human_like
)

# --- History Tracking ---
Expand Down
31 changes: 31 additions & 0 deletions form-flow-backend/scripts/clear_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import sys
import os
import asyncio
import logging

# Setup path to include backend root
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, root_dir)

# Configure logging
logging.basicConfig(level=logging.INFO)

from utils.api_cache import invalidate_form_cache

async def main():
target_url = "https://www.zensar.com/contact-us"

print(f"🧹 Clearing cache for: {target_url}")

# 1. Clear form schema cache
await invalidate_form_cache(target_url)

# 2. Also check if there are other related keys (e.g. smart prompts)
# The prefix for form schema is "form_schema:"
# We rely on invalidate_form_cache logic

print("✅ Cache cleared successfully.")
print("Please refresh the frontend to re-scrape.")

if __name__ == "__main__":
asyncio.run(main())
122 changes: 119 additions & 3 deletions form-flow-backend/services/ai/profile/suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,13 @@ async def get_suggestions(
return await self._tier1_profile_based(profile, field_context, form_context, previous_answers, form_intent)
else:
# STRICT: No profile = No suggestions.
logger.warning("⛔ [Lifecycle] No Profile found. Skipping Tier 3 fallback (returning empty).")
return []
logger.info("🌱 [Lifecycle] No Profile found. Attempting Tier 0: Cold-Start suggestions.")
return await self._tier0_cold_start(field_context, form_context, previous_answers, form_intent)

except Exception as e:
logger.error(f"❌ [Lifecycle] CRITICAL ERROR: {str(e)}", exc_info=True)
return []


async def _tier1_profile_based(
self,
Expand All @@ -121,6 +122,28 @@ async def _tier1_profile_based(
except Exception as e:
logger.error(f"❌ [Lifecycle] Tier 1: LLM Failed ({str(e)})")
return [] # STRICT: Return empty instead of fallback

def _format_profile_for_prompt(self, profile: Any) -> str:
"""Extract and structure profile data for better LLM consumption."""
profile_text = getattr(profile, 'profile_text', None)

if not profile_text:
return str(profile)

try:
parsed = json.loads(profile_text) if isinstance(profile_text, str) else profile_text

# If it's already structured JSON, format it clearly
if isinstance(parsed, dict):
sections = []
for key, value in parsed.items():
label = key.replace("_", " ").title()
sections.append(f"- {label}: {value}")
return "\n".join(sections)
except (json.JSONDecodeError, TypeError):
pass

return str(profile_text)

async def _generate_llm_suggestions(
self,
Expand All @@ -142,7 +165,18 @@ async def _generate_llm_suggestions(
return None

# Extract profile text safely
profile_text = getattr(profile, 'profile_text', str(profile))
profile_text = self._format_profile_for_prompt(profile)

# ADD ↓
form_count = getattr(profile, 'form_count', 1)
try:
metadata = json.loads(getattr(profile, 'metadata_json', '{}') or '{}')
except Exception:
metadata = {}
forms_history = metadata.get('forms_analyzed', [])
history_str = ", ".join(forms_history[-5:]) if forms_history else "None"
maturity_hint = "mature — trust it heavily" if form_count >= 5 else "early stage — use as a hint, stay flexible"


# Context extraction
field_name = field_context.get("name", "unknown")
Expand Down Expand Up @@ -189,6 +223,8 @@ async def _generate_llm_suggestions(
4. **Guardrail:** NEVER describe the user in the third person (e.g., "User exhibits...") unless the form_type is explicitly 'diagnostic_report'.

5. **Output:** Return a JSON object with a list of 1-3 suggestions and your reasoning. The reasoning MUST mention the detected Form Intent.
6. **Profile Maturity:** The user has filled {form_count} forms — profile is {maturity_hint}. Weight suggestions accordingly.
7. **Past Forms:** They've previously filled: {forms_history}. Use this to infer domain or recurring needs.

FORMAT:
{{
Expand All @@ -212,6 +248,9 @@ async def _generate_llm_suggestions(
"field_name": field_name,
"persona": persona,
"previous_answers_context": previous_answers_str,
"form_count": form_count,
"maturity_hint": maturity_hint,
"forms_history": history_str,
})

duration = (datetime.now() - start_time).total_seconds()
Expand Down Expand Up @@ -259,6 +298,83 @@ def _tier3_pattern_only(
# DISABLED as per request
logger.info("🧩 [Lifecycle] Tier 3 requested but DISABLED.")
return []
async def _tier0_cold_start(
self,
field_context: Dict[str, Any],
form_context: Dict[str, Any],
previous_answers: Dict[str, str],
form_intent: Optional[FormIntent]
) -> List[IntelligentSuggestion]:
"""
Tier 0: Cold-start suggestions for users with no profile.
Uses only form intent + field semantics to generate contextual placeholders.
"""
gemini = get_gemini_service()
if not gemini or not gemini.llm:
return []

field_label = field_context.get("label", field_context.get("name", "unknown"))
form_purpose = form_intent.intent if form_intent else form_context.get("purpose", "General Form")
persona = form_intent.persona if form_intent else "Customer"

previous_answers_str = "None"
if previous_answers:
previous_answers_str = "\n".join([f"- {k}: {v}" for k, v in previous_answers.items() if v])

prompt = ChatPromptTemplate.from_messages([
("system", """You are a smart form-filling assistant helping a first-time user.
You have NO prior information about this user. Generate helpful, realistic example suggestions
for the field based ONLY on the form's purpose and previously filled fields.

CONTEXT:
- Form Intent: {form_intent}
- Persona: {persona}
- Field: "{field_label}"
- Previously Filled Fields:
{previous_answers_context}

INSTRUCTIONS:
1. Generate 2-3 realistic, generic-but-useful example values a typical {persona} would enter.
2. Use the form intent to tailor suggestions (e.g., for "Job Application" + "Skills" field → "Python, FastAPI, SQL").
3. Use previous answers to stay consistent (e.g., if Role = "Designer", suggest design-related skills).
4. Keep suggestions short, realistic, and immediately usable.
5. Do NOT say "example" or "placeholder" - write as if the user would actually submit this.

FORMAT:
{{
"suggestions": ["Value 1", "Value 2"],
"reasoning": "Based on the form intent '{form_intent}', these are typical values a {persona} would provide."
}}
""")
])

parser = JsonOutputParser(pydantic_object=SuggestionResponse)
chain = prompt | gemini.llm | parser

try:
result = await chain.ainvoke({
"form_intent": form_purpose,
"persona": persona,
"field_label": field_label,
"previous_answers_context": previous_answers_str,
})

if result and result.get("suggestions"):
return [
IntelligentSuggestion(
value=val,
confidence=0.55, # Lower confidence - no profile backing
tier=SuggestionTier.PATTERN_ONLY,
reasoning=result.get("reasoning", "Cold-start suggestion based on form intent"),
behavioral_match="cold_start_intent"
)
for val in result["suggestions"]
]
except Exception as e:
logger.error(f"❌ [Lifecycle] Tier 0 Cold Start Failed: {str(e)}")

return []



# Singleton instance
Expand Down
30 changes: 21 additions & 9 deletions form-flow-backend/services/ai/session_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,29 @@ async def cleanup_local_cache(self) -> int:

return len(expired)

def _serialize_session(self, session: Dict[str, Any]) -> Dict[str, Any]:
"""Serialize session data for storage."""
def _serialize_session(self, session: Dict[str, Any], _depth: int = 0) -> Dict[str, Any]:
"""Serialize session data for storage with depth limit to prevent infinite recursion."""
MAX_DEPTH = 10

if _depth > MAX_DEPTH:
logger.warning(f"Max serialization depth {MAX_DEPTH} reached, truncating")
return {}

serialized = {}
for key, value in session.items():
if isinstance(value, datetime):
serialized[key] = {'__datetime__': value.isoformat()}
elif hasattr(value, '__dict__'):
# Handle dataclass objects
serialized[key] = self._serialize_session(value.__dict__)
else:
serialized[key] = value
try:
if isinstance(value, datetime):
serialized[key] = {'__datetime__': value.isoformat()}
elif hasattr(value, '__dict__') and not isinstance(value, (str, int, float, bool, type(None))):
# Recursively serialize with depth tracking
serialized[key] = self._serialize_session(value.__dict__, _depth + 1)
else:
serialized[key] = value
except Exception as e:
logger.warning(f"Failed to serialize field '{key}': {e}")
# Fallback to string representation
serialized[key] = str(value)

return serialized

def _deserialize_session(self, data: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
Loading
Loading