Spaces:

nileshhanotia
/

text2sql

Sleeping

App Files Files Community

nileshhanotia commited on Oct 25

Commit

39e6004

•

1 Parent(s): 17ee535

Create app.py

Browse files

Files changed (1) hide show

app.py +208 -0

app.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from functools import lru_cache
+import json
+import mysql.connector
+from mysql.connector import Error
+import os
+import sys
+from datetime import datetime
+import time
+# Enable GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Database configuration
+DB_CONFIG = {
+    'host': 'sql12.freemysqlhosting.net',
+    'database': 'sql12740625',
+    'user': 'sql12740625',
+    'password': 'QGG9kdrE4g',
+    'port': 3306,
+    'pool_size': 5,
+    'pool_reset_session': True
+}
+# Global variables for model and tokenizer
+GLOBAL_MODEL = None
+GLOBAL_TOKENIZER = None
+def initialize_model():
+    """Initialize model and tokenizer globally"""
+    global GLOBAL_MODEL, GLOBAL_TOKENIZER
+    st.write("Initializing model and tokenizer...")
+    start_time = time.time()
+    model_name_sql = "premai-io/prem-1B-SQL"
+    GLOBAL_TOKENIZER = AutoTokenizer.from_pretrained(model_name_sql)
+    GLOBAL_MODEL = AutoModelForCausalLM.from_pretrained(
+        model_name_sql,
+        torch_dtype=torch.float32,  # Use float32 for CPU
+    ).to(device)
+    # Set model to evaluation mode
+    GLOBAL_MODEL.eval()
+    st.write(f"Model initialization took {time.time() - start_time:.2f} seconds")
+def test_db_connection():
+    """Test database connection with timeout"""
+    try:
+        connection = mysql.connector.connect(
+            **DB_CONFIG,
+            connect_timeout=10
+        )
+        if connection.is_connected():
+            db_info = connection.get_server_info()
+            cursor = connection.cursor()
+            cursor.execute("SELECT DATABASE();")
+            db_name = cursor.fetchone()[0]
+            cursor.close()
+            connection.close()
+            return True, f"Successfully connected to MySQL Server version {db_info}\nDatabase: {db_name}"
+    except Error as e:
+        return False, f"Error connecting to MySQL database: {e}"
+    return False, "Unable to establish database connection"
+def get_db_connection():
+    """Get database connection from pool"""
+    return mysql.connector.connect(**DB_CONFIG)
+def execute_query(query):
+    """Execute SQL query with timeout and connection pooling"""
+    connection = None
+    try:
+        connection = get_db_connection()
+        cursor = connection.cursor(dictionary=True, buffered=True)
+        cursor.execute(query)
+        results = cursor.fetchall()
+        return results
+    except Error as e:
+        return f"Error executing query: {e}"
+    finally:
+        if connection and connection.is_connected():
+            cursor.close()
+            connection.close()
+def generate_sql(natural_language_query):
+    """Generate SQL query with performance optimizations"""
+    try:
+        start_time = time.time()
+        schema_info = """
+        CREATE TABLE sales (
+          pizza_id DECIMAL(8,2) PRIMARY KEY,
+          order_id DECIMAL(8,2),
+          pizza_name_id VARCHAR(14),
+          quantity DECIMAL(4,2),
+          order_date DATE,
+          order_time VARCHAR(8),
+          unit_price DECIMAL(5,2),
+          total_price DECIMAL(5,2),
+          pizza_size VARCHAR(3),
+          pizza_category VARCHAR(7),
+          pizza_ingredients VARCHAR(97),
+          pizza_name VARCHAR(42)
+        );
+        """
+        prompt = f"""### Task: Generate a SQL query to answer the following question.
+        ### Database Schema:
+        {schema_info}
+        ### Question: {natural_language_query}
+        ### SQL Query:"""
+        inputs = GLOBAL_TOKENIZER(
+            prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512,
+            return_attention_mask=True
+        )
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = GLOBAL_MODEL.generate(
+                input_ids=inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                max_length=256,
+                temperature=0.1,
+                do_sample=True,
+                top_p=0.95,
+                num_return_sequences=1,
+                pad_token_id=GLOBAL_TOKENIZER.eos_token_id,
+            )
+        generated_query = GLOBAL_TOKENIZER.decode(outputs[0], skip_special_tokens=True)
+        sql_query = generated_query.split("### SQL Query:")[-1].strip()
+        st.write(f"SQL generation took {time.time() - start_time:.2f} seconds")
+        return sql_query
+    except Exception as e:
+        return f"Error generating SQL query: {str(e)}"
+def format_result(query_result):
+    """Format query results efficiently"""
+    if isinstance(query_result, str) and "Error" in query_result:
+        return query_result
+    if not query_result:
+        return "No results found."
+    # Use list comprehension for better performance
+    if len(query_result) == 1:
+        return "\n".join(f"{k}: {v}" for k, v in query_result[0].items())
+    results = [f"Found {len(query_result)} results:\n"]
+    for i, row in enumerate(query_result[:5], 1):
+        results.append(f"Result {i}:")
+        results.extend(f"{k}: {v}" for k, v in row.items())
+        results.append("")
+    if len(query_result) > 5:
+        results.append(f"(Showing first 5 of {len(query_result)} results)")
+    return "\n".join(results)
+def main():
+    """Main function with Streamlit UI components"""
+    st.title("Natural Language to SQL Query")
+    st.write("Ask questions about pizza sales data in plain English.")
+    # Test and display database connection status
+    db_success, db_message = test_db_connection()
+    st.write(db_message)
+    if not db_success:
+        st.write("Could not connect to the database. Exiting.")
+        return
+    # Initialize model
+    initialize_model()
+    # Input field for natural language query
+    natural_language_query = st.text_input("Enter your question", placeholder="e.g., What were the total sales for each pizza category?")
+    if st.button("Generate and Execute Query"):
+        if natural_language_query:
+            # Generate SQL query
+            sql_query = generate_sql(natural_language_query)
+            st.write("Generated SQL Query:", sql_query)
+            # Execute the generated query
+            query_result = execute_query(sql_query)
+            formatted_result = format_result(query_result)
+            st.write("Query Result:")
+            st.code(json.dumps(query_result, indent=2))
+            st.write("Human-Readable Response:")
+            st.text(formatted_result)
+        else:
+            st.write("Please enter a query.")
+if __name__ == "__main__":
+    main()