shukdevdattaEX commited on
Commit
ccb7e50
·
verified ·
1 Parent(s): 4386f50

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +358 -0
app.py ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Groq
2
+ from pydantic import BaseModel
3
+ import json
4
+ import gradio as gr
5
+ import pandas as pd
6
+
7
+ class ValidationStatus(BaseModel):
8
+ # Indicates whether the generated SQL query is syntactically valid
9
+ is_valid: bool
10
+
11
+ # A list of SQL syntax error messages (empty if no errors are found)
12
+ syntax_errors: list[str]
13
+
14
+ class SQLQueryGeneration(BaseModel):
15
+ query: str ### SELECT product_id, name, price FROM products WHERE price < 50 ORDER BY price ASC
16
+
17
+ # The type of SQL query (e.g., SELECT, INSERT, UPDATE, DELETE)
18
+ query_type: str ### "SELECT",
19
+
20
+ # A list of table names referenced in the SQL query
21
+ tables_used: list[str] ### products
22
+
23
+ # Estimated complexity of the query (e.g., LOW, MEDIUM, HIGH)
24
+ estimated_complexity: str ### low
25
+
26
+ # Notes describing how the query executes or any assumptions made
27
+ execution_notes: list[str]
28
+
29
+ # "Simple SELECT query on products table", "Filter products with price less than $50", "Order results by price ascending"
30
+
31
+ # Validation results for the generated SQL query
32
+ validation_status: ValidationStatus
33
+ # "is_valid": true/false, "syntax_errors": []
34
+
35
+ # SQL CREATE TABLE statement describing the table schema
36
+ table_schema: str ### CREATE Table query (We create the table which is products here)
37
+
38
+ # Sample data used to populate the table (INSERT statements or table view)
39
+ sample_data: str #### INSERT DATA INTO _______
40
+
41
+ # Results of executing the SQL query, formatted as a pipe-delimited table
42
+ execution_results: str #### EXECUTION
43
+
44
+ # Suggestions for optimizing the SQL query (indexes, joins, filters, etc.)
45
+ optimization_notes: list[str] ### INSTRUCTIONS
46
+
47
+ def parse_execution_results_to_dataframe(execution_results):
48
+
49
+ """Convert text-based table results to pandas DataFrame"""
50
+
51
+ try:
52
+ # Remove leading/trailing whitespace and split the text into lines
53
+ lines = execution_results.strip().split('\n')
54
+
55
+ # If there are fewer than 3 lines, it's not a valid table (header, separator, data)
56
+ if len(lines) < 3:
57
+ return None
58
+
59
+ # --------------------
60
+ # Extract header row
61
+ # --------------------
62
+
63
+ # The first line contains the column headers
64
+ header_line = lines[0]
65
+
66
+ # Split the header by '|' and strip whitespace from each column name
67
+ headers = [col.strip() for col in header_line.split('|')]
68
+
69
+ # --------------------
70
+ # Extract data rows
71
+ # --------------------
72
+
73
+ # Initialize a list to store parsed data rows
74
+ data_rows = []
75
+
76
+ # Skip the second line (usually a separator like ----|----)
77
+ for line in lines[2:]:
78
+ # Ignore empty lines and separator-only lines
79
+ if line.strip() and not line.strip().startswith('-'):
80
+
81
+ # Split the row by '|' and clean up whitespace
82
+ row = [cell.strip() for cell in line.split('|')]
83
+
84
+ # Only keep rows that match the number of headers
85
+ if len(row) == len(headers):
86
+ data_rows.append(row)
87
+
88
+ # --------------------
89
+ # Create DataFrame
90
+ # --------------------
91
+
92
+ # If we successfully collected data rows
93
+ if data_rows:
94
+ # Create a pandas DataFrame using headers as column names
95
+ df = pd.DataFrame(data_rows, columns=headers) ### column with heading names from my original text based table and data rows collected from there....
96
+ return df
97
+
98
+ # Return None if no valid data rows were found
99
+ return None
100
+
101
+ except Exception as e:
102
+ # Catch and print any parsing errors
103
+ print(f"Error parsing results: {e}")
104
+ return None
105
+
106
+ def generate_sql_query(api_key, user_query):
107
+
108
+ """Generate SQL query from natural language using GROQ API"""
109
+
110
+ try:
111
+ # --------------------
112
+ # Input validation
113
+ # --------------------
114
+
115
+ # Check if API key is missing
116
+ if not api_key:
117
+ # Return error message and placeholders for expected return values
118
+ return "Error: Please enter your GROQ API key", "", "", "", None, ""
119
+
120
+ # Check if user query is missing
121
+ if not user_query:
122
+ # Return error message and placeholders for expected return values
123
+ return "Error: Please enter a query description", "", "", "", None, ""
124
+
125
+ # --------------------
126
+ # Initialize GROQ client
127
+ # --------------------
128
+
129
+ # Create a GROQ client using the provided API key
130
+ client = Groq(api_key=api_key)
131
+
132
+ # --------------------
133
+ # Call GROQ Chat Completion API
134
+ # --------------------
135
+
136
+ # Send a request to the GROQ chat completion endpoint
137
+ response = client.chat.completions.create(
138
+ # Specify the LLM model to use
139
+ model="moonshotai/kimi-k2-instruct-0905",
140
+
141
+ # Provide system and user messages
142
+ messages=[
143
+ {
144
+ # System prompt defines the assistant's role and output format
145
+ "role": "system",
146
+ "content": """You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata.
147
+ After generating the SQL query, you must:
148
+ 1. Create a sample SQL table schema based on the natural language description, including all necessary columns with appropriate data types
149
+ 2. Populate the table with realistic sample data that demonstrates the query's functionality
150
+ 3. Execute the generated SQL query against the sample table
151
+ 4. Display the SQL table structure and data clearly
152
+ 5. Show the query execution results in a pipe-delimited table format
153
+ IMPORTANT: The execution_results field must contain a properly formatted table with:
154
+ - Header row with column names separated by pipes (|)
155
+ - A separator row with dashes
156
+ - Data rows with values separated by pipes (|)
157
+ Example format:
158
+ column1 | column2 | column3
159
+ --------|---------|--------
160
+ value1 | value2 | value3
161
+ value4 | value5 | value6
162
+ Always present your response in this order:
163
+ - Generated SQL query with syntax explanation
164
+ - Table schema (CREATE TABLE statement)
165
+ - Sample data (INSERT statements or table visualization)
166
+ - Query execution results (in pipe-delimited table format)
167
+ - Any relevant notes about assumptions made or query optimization suggestions""",
168
+ },
169
+ {
170
+ # User-provided natural language query
171
+ "role": "user",
172
+ "content": user_query ### NLQ
173
+ },
174
+ ],
175
+
176
+ # Enforce structured JSON output using a predefined schema
177
+ response_format={
178
+ "type": "json_schema",
179
+ "json_schema": {
180
+ "name": "sql_query_generation",
181
+ # Convert Pydantic model into JSON schema
182
+ "schema": SQLQueryGeneration.model_json_schema()
183
+ }
184
+ }
185
+ )
186
+
187
+ # --------------------
188
+ # Parse and validate model output
189
+ # --------------------
190
+
191
+ # Convert the JSON string returned by the model into a Python object
192
+ sql_query_generation = SQLQueryGeneration.model_validate(
193
+ json.loads(response.choices[0].message.content)
194
+ )
195
+
196
+ # --------------------
197
+ # Format validation results
198
+ # --------------------
199
+
200
+ # Start validation summary with overall validity flag
201
+ validation_text = f"Valid: {sql_query_generation.validation_status.is_valid}\n" ## true or false
202
+
203
+ # If syntax errors exist, list them
204
+ if sql_query_generation.validation_status.syntax_errors: ## if any syntax error is there
205
+ validation_text += "Errors:\n" + "\n".join(
206
+ f"- {error}" for error in sql_query_generation.validation_status.syntax_errors
207
+ )
208
+ else:
209
+ # No syntax issues found
210
+ validation_text += "No syntax errors found"
211
+
212
+ # Build a metadata summary string describing the query
213
+ metadata = f"""Query Type: {sql_query_generation.query_type}
214
+ Tables Used: {', '.join(sql_query_generation.tables_used)}
215
+ Complexity: {sql_query_generation.estimated_complexity}
216
+ Execution Notes:
217
+ {chr(10).join(f"- {note}" for note in sql_query_generation.execution_notes)}
218
+ Optimization Notes:
219
+ {chr(10).join(f"- {note}" for note in sql_query_generation.optimization_notes)}"""
220
+
221
+ # --------------------
222
+ # Parse execution results into DataFrame
223
+ # --------------------
224
+
225
+ # Convert the pipe-delimited execution results into a pandas DataFrame
226
+ results_df = parse_execution_results_to_dataframe(
227
+ sql_query_generation.execution_results
228
+ )
229
+
230
+ # --------------------
231
+ # Return all outputs
232
+ # --------------------
233
+
234
+ return (
235
+ # Generated SQL query
236
+ sql_query_generation.query,
237
+
238
+ # Metadata summary
239
+ metadata,
240
+
241
+ # SQL CREATE TABLE schema
242
+ sql_query_generation.table_schema,
243
+
244
+ # Sample INSERT data or table visualization
245
+ sql_query_generation.sample_data,
246
+
247
+ # Pandas DataFrame of execution results
248
+ results_df,
249
+
250
+ # SQL validation summary
251
+ validation_text
252
+ )
253
+
254
+ except Exception as e:
255
+ # Catch unexpected errors and return an error message
256
+ error_msg = f"Error: {str(e)}"
257
+ return error_msg, "", "", "", None, ""
258
+
259
+ # Create Gradio interface
260
+ with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
261
+ gr.Markdown(
262
+ """
263
+ # 🗄️ Natural Language to SQL Query Generator
264
+ Convert your natural language descriptions into structured SQL queries with validation and execution results.
265
+ """
266
+ )
267
+
268
+ with gr.Row():
269
+ with gr.Column():
270
+ api_key_input = gr.Textbox(
271
+ label="GROQ API Key",
272
+ type="password",
273
+ placeholder="Enter your GROQ API key here...",
274
+ info="Your API key is not stored and only used for this session"
275
+ )
276
+
277
+ query_input = gr.Textbox(
278
+ label="Natural Language Query",
279
+ placeholder="e.g., Find all the students who scored more than 90 out of 100",
280
+ lines=3,
281
+ value="Find all the students who scored more than 90 out of 100"
282
+ )
283
+
284
+ generate_btn = gr.Button("Generate SQL Query", variant="primary", size="lg")
285
+
286
+ gr.Examples(
287
+ examples=[
288
+ ["Find all the students who scored more than 90 out of 100"],
289
+ ["Get the top 5 customers by total purchase amount"],
290
+ ["List all employees hired in the last 6 months"],
291
+ ["Find products with price between $50 and $100"],
292
+ ["Show average salary by department"]
293
+ ],
294
+ inputs=query_input,
295
+ label="Example Queries"
296
+ )
297
+
298
+ with gr.Row():
299
+ with gr.Column():
300
+ sql_output = gr.Code(
301
+ label="Generated SQL Query",
302
+ language="sql",
303
+ lines=5
304
+ )
305
+
306
+ metadata_output = gr.Textbox(
307
+ label="Query Metadata",
308
+ lines=8
309
+ )
310
+
311
+ validation_output = gr.Textbox(
312
+ label="Validation Status",
313
+ lines=3
314
+ )
315
+
316
+ with gr.Row():
317
+ with gr.Column():
318
+ schema_output = gr.Code(
319
+ label="Table Schema",
320
+ language="sql",
321
+ lines=8
322
+ )
323
+
324
+ with gr.Column():
325
+ sample_data_output = gr.Code(
326
+ label="Sample Data",
327
+ language="sql",
328
+ lines=8
329
+ )
330
+
331
+ with gr.Row():
332
+ execution_output = gr.Dataframe(
333
+ label="📊 Execution Results",
334
+ headers=None,
335
+ datatype="str",
336
+ row_count=10,
337
+ col_count=None,
338
+ wrap=True,
339
+ interactive=False
340
+ )
341
+
342
+ generate_btn.click(
343
+ fn=generate_sql_query,
344
+ inputs=[api_key_input, query_input],
345
+ outputs=[
346
+ sql_output,
347
+ metadata_output,
348
+ schema_output,
349
+ sample_data_output,
350
+ execution_output,
351
+ validation_output
352
+ ]
353
+ )
354
+
355
+ if __name__ == "__main__":
356
+ demo.launch(share=True)
357
+
358
+