-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
414 lines (339 loc) Β· 15.4 KB
/
app.py
File metadata and controls
414 lines (339 loc) Β· 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
import streamlit as st
import pandas as pd
from text2sql_engine import Text2SQLEngine
from database_manager import DatabaseManager
import plotly.express as px
# Page configuration
st.set_page_config(
page_title="CloudCost Intelligence - Text2SQL",
page_icon="βοΈ",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1f77b4;
text-align: center;
padding: 1rem 0;
}
.sub-header {
font-size: 1.2rem;
color: #666;
text-align: center;
margin-bottom: 2rem;
}
.query-box {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 0.5rem;
margin: 1rem 0;
}
.metric-card {
background-color: #ffffff;
padding: 1.5rem;
border-radius: 0.5rem;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def get_engine():
"""Initialize and cache the Text2SQL engine"""
return Text2SQLEngine()
def format_currency(value):
"""Format number as currency"""
if pd.isna(value):
return "N/A"
return f"${value:,.2f}"
def display_results(result_data):
"""Display query results with visualizations"""
df = result_data['results']
if df is None or len(df) == 0:
st.warning("No results found for this query.")
return
# Display metrics if aggregated data
if len(df) <= 20 and 'total_cost' in df.columns:
cols = st.columns(3)
with cols[0]:
total = df['total_cost'].sum()
st.metric("Total Cost", format_currency(total))
with cols[1]:
avg = df['total_cost'].mean()
st.metric("Average Cost", format_currency(avg))
with cols[2]:
count = len(df)
st.metric("Number of Items", count)
# Display table
st.subheader("π Query Results")
# Format cost columns
display_df = df.copy()
for col in display_df.columns:
if 'cost' in col.lower() or 'price' in col.lower():
if display_df[col].dtype in ['float64', 'float32', 'int64', 'int32']:
display_df[col] = display_df[col].apply(format_currency)
st.dataframe(display_df, use_container_width=True, height=400)
# Visualizations for aggregated data
if len(df) <= 50 and 'total_cost' in df.columns:
st.subheader("π Visualizations")
viz_col1, viz_col2 = st.columns(2)
with viz_col1:
# Bar chart
if 'servicename' in df.columns:
fig = px.bar(
df.head(15),
x='servicename',
y='total_cost',
title='Cost by Service',
labels={'total_cost': 'Total Cost ($)', 'servicename': 'Service'},
color='total_cost',
color_continuous_scale='Blues'
)
fig.update_layout(xaxis_tickangle=-45)
st.plotly_chart(fig, use_container_width=True)
elif 'regionname' in df.columns:
fig = px.bar(
df.head(15),
x='regionname',
y='total_cost',
title='Cost by Region',
labels={'total_cost': 'Total Cost ($)', 'regionname': 'Region'},
color='total_cost',
color_continuous_scale='Blues'
)
fig.update_layout(xaxis_tickangle=-45)
st.plotly_chart(fig, use_container_width=True)
with viz_col2:
# Pie chart
if 'servicename' in df.columns:
fig = px.pie(
df.head(10),
values='total_cost',
names='servicename',
title='Cost Distribution by Service'
)
st.plotly_chart(fig, use_container_width=True)
elif 'regionname' in df.columns:
fig = px.pie(
df.head(10),
values='total_cost',
names='regionname',
title='Cost Distribution by Region'
)
st.plotly_chart(fig, use_container_width=True)
# Download button
csv = df.to_csv(index=False)
st.download_button(
label="π₯ Download Results as CSV",
data=csv,
file_name="query_results.csv",
mime="text/csv"
)
def main():
"""Main application"""
# Header
st.markdown('<div class="main-header">βοΈ CloudCost Intelligence</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-header">Text-to-SQL Engine with Semantic Metadata</div>', unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.header("βΉοΈ About")
st.markdown("""
This application converts natural language questions into SQL queries
to analyze cloud cost data from AWS and Azure.
**Features:**
- Natural language to SQL conversion
- Semantic metadata layer
- Interactive visualizations
- Support for AWS and Azure cost data
""")
st.header("π Example Queries")
example_queries = [
"What is the total AWS cost?",
"Show me top 10 services by cost",
"What are the costs by region?",
"Show me Azure costs by service",
"What is the average cost per service?",
"Show me the top 5 most expensive regions",
"What is the total cost for compute services?",
"Show me costs for storage services"
]
selected_example = st.selectbox(
"Select an example query:",
[""] + example_queries
)
if st.button("π Use Example Query") and selected_example:
st.session_state.query_text = selected_example
st.rerun()
# Initialize engine
engine = get_engine()
# Main query input
st.header("π Ask a Question")
natural_query = st.text_input(
"Enter your question in natural language:",
placeholder="e.g., What is the total cost for AWS services?",
key="query_text"
)
col1, col2 = st.columns([1, 4])
with col1:
execute_button = st.button("βΆοΈ Execute Query", type="primary", use_container_width=True)
# Execute query with clarification
if execute_button and natural_query:
# Step 1: Check if clarification is needed
clarification = engine.check_clarification_needed(natural_query)
if clarification['needs_clarification']:
# Show clarification question
st.warning("π€ Your query needs clarification")
st.markdown(f"**{clarification['question']}**")
# Store clarification in session state
st.session_state.clarification_pending = True
st.session_state.clarification_data = clarification
st.session_state.original_query = natural_query
# Clear any previous results
if 'last_result' in st.session_state:
del st.session_state.last_result
else:
# No clarification needed, execute directly
with st.spinner("π Converting to SQL and executing query..."):
try:
result = engine.execute_natural_query(natural_query)
# Store result in session state to persist across reruns
st.session_state.last_result = result
st.session_state.last_query = natural_query
# Store in session state for history
if 'query_history' not in st.session_state:
st.session_state.query_history = []
st.session_state.query_history.append({
'natural': natural_query,
'sql': result['sql_query'],
'method': result['method'],
'rows': result['row_count']
})
except Exception as e:
st.error(f"Error: {str(e)}")
st.exception(e)
# Display last result if it exists (persists across reruns)
if 'last_result' in st.session_state and st.session_state.last_result is not None:
result = st.session_state.last_result
# Display SQL query
st.subheader("π§ Generated SQL Query")
st.code(result['sql_query'], language='sql')
# Show warning if date filter was removed
if result.get('warning'):
st.warning(result['warning'])
# Show semantic metadata used for THIS query
if result.get('query_metadata'):
with st.expander("π§ Semantic Metadata Used for This Query", expanded=False):
st.markdown("""
This shows the semantic understanding that the AI used to generate the SQL query above.
""")
query_metadata = result['query_metadata']
for table_name, table_meta in query_metadata.items():
st.subheader(f"π {table_name}")
st.markdown(f"**Description:** {table_meta['description']}")
st.markdown(f"**Aliases:** {', '.join(table_meta['aliases'])}")
st.markdown("### π Key Columns")
# Show important columns
important_cols = ['billedcost', 'servicename', 'regionname', 'subaccountname']
for col in important_cols:
if col in table_meta['columns']:
col_meta = table_meta['columns'][col]
with st.container():
st.markdown(f"**`{col}`** ({col_meta.get('data_type', 'TEXT')})")
st.markdown(f"_{col_meta.get('description', 'No description')}_")
if 'aliases' in col_meta:
st.caption(f"π‘ Aliases: {', '.join(col_meta['aliases'][:5])}")
st.divider()
col1, col2 = st.columns(2)
with col1:
st.info(f"**Conversion Method:** {result['method']}")
with col2:
st.info(f"**Rows Returned:** {result['row_count']}")
# Display results
if result['results'] is not None:
display_results(result)
else:
st.error("Query execution failed. Please try a different question.")
# Handle clarification response
if st.session_state.get('clarification_pending', False):
clarification_data = st.session_state.clarification_data
original_query = st.session_state.original_query
# Create selectbox for options
selected_option = st.selectbox(
"Please select an option:",
options=[opt['label'] for opt in clarification_data['options']],
key="clarification_select"
)
if st.button("β
Apply Selection and Execute", type="primary"):
# Find selected value
selected_value = next(
opt['value'] for opt in clarification_data['options']
if opt['label'] == selected_option
)
# Apply context to query
enhanced_query = engine.apply_clarification(
original_query,
clarification_data['missing_context'][0],
selected_value
)
# Execute the enhanced query
with st.spinner("π Executing enhanced query..."):
try:
result = engine.execute_natural_query(enhanced_query)
# Store result in session state to persist
st.session_state.last_result = result
st.session_state.last_query = enhanced_query
# Clear clarification state
st.session_state.clarification_pending = False
# Store in history
if 'query_history' not in st.session_state:
st.session_state.query_history = []
st.session_state.query_history.append({
'natural': enhanced_query,
'sql': result['sql_query'],
'method': result['method'],
'rows': result['row_count']
})
# Rerun to show results
st.rerun()
# Clear clarification state
st.session_state.clarification_pending = False
except Exception as e:
st.error(f"Error: {str(e)}")
# Force rerun to clear clarification UI
st.rerun()
# Query history
if 'query_history' in st.session_state and st.session_state.query_history:
st.header("π Query History")
with st.expander("View previous queries"):
for i, query in enumerate(reversed(st.session_state.query_history[-10:])):
st.markdown(f"**Query {len(st.session_state.query_history) - i}:** {query['natural']}")
st.code(query['sql'], language='sql')
st.caption(f"Method: {query['method']} | Rows: {query['rows']}")
st.divider()
# Database statistics
with st.expander("π Database Statistics"):
db = DatabaseManager()
db.connect()
col1, col2 = st.columns(2)
with col1:
st.subheader("AWS Data")
aws_count = db.execute_query("SELECT COUNT(*) as count FROM aws_cost_usage")
if aws_count is not None:
st.metric("Total Records", f"{aws_count['count'][0]:,}")
aws_cost = db.execute_query("SELECT SUM(billedcost) as total FROM aws_cost_usage")
if aws_cost is not None:
st.metric("Total Cost", format_currency(aws_cost['total'][0]))
with col2:
st.subheader("Azure Data")
azure_count = db.execute_query("SELECT COUNT(*) as count FROM azure_cost_usage")
if azure_count is not None:
st.metric("Total Records", f"{azure_count['count'][0]:,}")
azure_cost = db.execute_query("SELECT SUM(billedcost) as total FROM azure_cost_usage")
if azure_cost is not None:
st.metric("Total Cost", format_currency(azure_cost['total'][0]))
db.close()
if __name__ == "__main__":
main()