Agentic-ai-NLP2SQL/app.py at master · nattu22/Agentic-ai-NLP2SQL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import json

import streamlit as st
import pandas as pd

from db import load_db_urls, create_engines, introspect_schema, execute_read_query
from semantic import build_semantic_model
from nlp_sql import generate_sql_from_nl, validate_sql
from agent import verify_sql_query
from insights import summarize_dataframe, narrative_insights_from_summary
from agent_chain import run_agent
from agentic_pipeline import run_agentic_pipeline


st.set_page_config(page_title="NLP → SQL Agent", layout="wide")

st.title("NLP → SQL Agent (Prototype)")

with st.sidebar.expander("Configuration"):
    db_urls_raw = st.text_area("DB URLs JSON (env DB_URLS)", value=os.environ.get("DB_URLS", "{}"), height=120)
    openai_model = st.text_input("OpenAI Model", value=os.environ.get("OPENAI_MODEL", "gpt-4"))

    if st.button("Save Config to ENV"):
        try:
            os.environ["DB_URLS"] = db_urls_raw
            os.environ["OPENAI_MODEL"] = openai_model
            st.success("Saved to environment for this session")
        except Exception as e:
            st.error(str(e))

db_urls = {}
try:
    db_urls = json.loads(db_urls_raw)
except Exception:
    if db_urls_raw.strip():
        db_urls = {"default": db_urls_raw.strip()}

engines = create_engines(db_urls) if db_urls else {}

selected = st.selectbox("Select database", options=["--none--"] + list(engines.keys()))

if selected and selected != "--none--":
    engine = engines[selected]
    st.write("Introspecting schema...")
    schema = introspect_schema(engine)
    st.json(schema)

    if st.button("Build semantic model"):
        sem = build_semantic_model(engine)
        st.session_state["semantic"] = sem
        st.success("Semantic model built and stored in session")

    semantic = st.session_state.get("semantic")
    if semantic:
        st.subheader("Ask a question in natural language")
        nl = st.text_area("User query", height=120)
        dialect = st.selectbox("SQL dialect", options=["default", "postgres", "mysql", "sqlite"], index=0)
        max_retries = st.slider("Max SQL generation retries", 1, 10, 5)

        orchestration_mode = st.radio(
            "Orchestration Mode",
            options=["Direct", "LangChain Agent", "Full Agentic Pipeline"],
            index=2,
            help="Direct: simple flow | LangChain: multi-tool agent | Pipeline: full step tracing + reasoning logs"
        )

        if st.button("Generate and Run"):
            if not nl.strip():
                st.error("Please provide a natural language query")
            else:
                if orchestration_mode == "Full Agentic Pipeline":
                    with st.spinner("Running full agentic pipeline..."):
                        res = run_agentic_pipeline(nl, engine, dialect=dialect)
                    st.subheader("Pipeline Result")
                    if res.get("error"):
                        st.error(f"Pipeline Error: {res['error']}")
                    else:
                        if res.get("final_result"):
                            final = res["final_result"]
                            st.write("**Generated SQL:**")
                            st.code(final.get("sql", ""), language="sql")
                            st.write(f"**Rows Returned:** {final.get('rows_returned', 0)}")
                            st.write("**Summary:**")
                            st.json(final.get("summary", {}))
                            st.write("**Narrative Insights:**")
                            st.write(final.get("narrative", ""))
                    # Show execution traces with collapsible details
                    with st.expander("📊 Execution Traces"):
                        for i, trace in enumerate(res.get("traces", []), 1):
                            col1, col2 = st.columns([3, 1])
                            status = "✓" if not trace.get("error") else "✗"
                            col1.write(f"{status} **{trace['step_name']}**")
                            col2.write(f"{trace.get('duration_ms', 0):.1f}ms")
                            if trace.get("reasoning"):
                                st.caption(f"Reasoning: {trace['reasoning']}")
                            if trace.get("error"):
                                st.error(f"Error: {trace['error']}")
                elif orchestration_mode == "LangChain Agent":
                    with st.spinner("Running LangChain agent..."):
                        res = run_agent(nl, semantic, engine, dialect=dialect)
                    st.subheader("Agent Result")
                    st.json(res)
                else:
                    with st.spinner("Generating SQL..."):
                        out = generate_sql_from_nl(nl, semantic, dialect=dialect, max_retries=max_retries)
                    st.subheader("Generated SQL")
                    st.code(out.get("sql", ""), language="sql")
                    if out.get("error"):
                        st.error(f"Failed to produce valid SQL after {out.get('attempts')} attempts: {out.get('error')}")
                    else:
                        # Execute safely
                        err = validate_sql(out["sql"], dialect=dialect)
                        if err:
                            st.error(f"Validation error before execution: {err}")
                        else:
                            # verify agent-level policy: only read-only/info queries allowed
                            ok, msg = verify_sql_query(out["sql"], dialect=dialect)
                            if not ok:
                                st.error(f"Query rejected by verifier: {msg}")
                            else:
                                try:
                                    df = execute_read_query(engine, out["sql"], limit=None)
                                    st.success(f"Query returned {len(df)} rows")
                                    st.dataframe(df)

                                    summary = summarize_dataframe(df)
                                    st.subheader("Summary")
                                    st.json(summary)

                                    with st.spinner("Generating narrative insights..."):
                                        narrative = narrative_insights_from_summary(summary, nl)
                                    st.subheader("Narrative Insights")
                                    st.write(narrative)

                                except Exception as e:
                                    st.error(f"Execution failed: {e}")

else:
    st.info("Configure DB URLs in the sidebar to get started.")