Spaces:

thechaiexperiment
/

TextToSQL

Running

App Files Files Community

thechaiexperiment commited on Feb 10

Commit

4aa996b

verified ·

1 Parent(s): 350e55d

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -25

app.py CHANGED Viewed

@@ -10,26 +10,33 @@ import os
 OPENROUTER_API_KEY = "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"
 OPENROUTER_MODEL = "sophosympatheia/rogue-rose-103b-v0.2:free"
-# Hugging Face Space path
-DB_PATH = "ecommerce.db"
 # Ensure dataset exists
-if not os.path.exists(DB_PATH):
-    os.system("wget https://your-dataset-link.com/ecommerce.db -O ecommerce.db")  # Replace with actual dataset link
 # Initialize OpenAI client
 openai_client = openai.OpenAI(api_key=OPENROUTER_API_KEY, base_url="https://openrouter.ai/api/v1")
-# Few-shot examples for text-to-SQL
 few_shot_examples = [
-    {"input": "Show all customers from São Paulo.", "output": "SELECT * FROM customers WHERE customer_state = 'SP';"},
-    {"input": "Find the total sales per product.", "output": "SELECT product_id, SUM(price) FROM order_items GROUP BY product_id;"},
-    {"input": "List all orders placed in 2017.", "output": "SELECT * FROM orders WHERE order_purchase_timestamp LIKE '2017%';"}
 ]
-# Function: Convert text to SQL
 def text_to_sql(query):
-    prompt = "Convert the following queries into SQL:\n\n"
     for example in few_shot_examples:
         prompt += f"Input: {example['input']}\nOutput: {example['output']}\n\n"
     prompt += f"Input: {query}\nOutput:"
@@ -37,33 +44,31 @@ def text_to_sql(query):
     try:
         response = openai_client.chat.completions.create(
             model=OPENROUTER_MODEL,
-            messages=[{"role": "system", "content": "You are an SQL expert."}, {"role": "user", "content": prompt}]
         )
         sql_query = response.choices[0].message.content.strip()
-        sql_query = sql_query.split("\n")[0]  # Take only the first line if multiple lines exist
-        sql_query = sql_query.replace("mathchar", "").rstrip(";")  # Remove unwanted text
-        return sql_query
     except Exception as e:
         return f"Error: {e}"
-# Function: Execute SQL on SQLite database
 def execute_sql(sql_query):
     try:
-        sql_query = sql_query.strip().rstrip(";")  # Remove trailing semicolons
-        sql_query = sql_query.replace("mathchar", "")  # Remove any bad tokens
-        conn = sqlite3.connect(DB_PATH)
         df = pd.read_sql_query(sql_query, conn)
         conn.close()
         return df
     except Exception as e:
         return f"SQL Execution Error: {e}"
-# Function: Generate Dynamic Visualization
 def visualize_data(df):
     if df.empty or df.shape[1] < 2:
         return None
-    # Detect numeric columns
     numeric_cols = df.select_dtypes(include=['number']).columns
     if len(numeric_cols) < 1:
         return None
@@ -71,17 +76,16 @@ def visualize_data(df):
     plt.figure(figsize=(6, 4))
     sns.set_theme(style="darkgrid")
-    # Choose visualization type dynamically
-    if len(numeric_cols) == 1:  # Single numeric column, assume it's a count metric
         sns.histplot(df[numeric_cols[0]], bins=10, kde=True, color="teal")
         plt.title(f"Distribution of {numeric_cols[0]}")
-    elif len(numeric_cols) == 2:  # Two numeric columns, assume X-Y plot
         sns.scatterplot(x=df[numeric_cols[0]], y=df[numeric_cols[1]], color="blue")
         plt.title(f"{numeric_cols[0]} vs {numeric_cols[1]}")
-    elif df.shape[0] < 10:  # If rows are few, prefer pie chart
         plt.pie(df[numeric_cols[0]], labels=df.iloc[:, 0], autopct='%1.1f%%', colors=sns.color_palette("pastel"))
         plt.title(f"Proportion of {numeric_cols[0]}")
-    else:  # Default: Bar chart for categories + values
         sns.barplot(x=df.iloc[:, 0], y=df[numeric_cols[0]], palette="coolwarm")
         plt.xticks(rotation=45)
         plt.title(f"{df.columns[0]} vs {numeric_cols[0]}")

 OPENROUTER_API_KEY = "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"
 OPENROUTER_MODEL = "sophosympatheia/rogue-rose-103b-v0.2:free"
+# Database Path
+db_path = "ecommerce.db"
 # Ensure dataset exists
+if not os.path.exists(db_path):
+    print("Database file not found! Please upload ecommerce.db.")
 # Initialize OpenAI client
 openai_client = openai.OpenAI(api_key=OPENROUTER_API_KEY, base_url="https://openrouter.ai/api/v1")
+# Updated Few-Shot Examples with SQLite-Compatible Queries
 few_shot_examples = [
+    {"input": "Find the busiest months for orders.",
+     "output": "SELECT strftime('%m', order_purchase_timestamp) AS month, COUNT(*) AS order_count FROM orders GROUP BY month ORDER BY order_count DESC;"},
+    {"input": "Show all customers from São Paulo.",
+     "output": "SELECT * FROM customers WHERE customer_state = 'SP';"},
+    {"input": "Find the total sales per product.",
+     "output": "SELECT product_id, SUM(price) FROM order_items GROUP BY product_id;"},
+    {"input": "List all orders placed in 2017.",
+     "output": "SELECT * FROM orders WHERE order_purchase_timestamp LIKE '2017%';"}
 ]
+# Function: Convert Text to SQL
 def text_to_sql(query):
+    prompt = "Convert the following queries into SQLite-compatible SQL:\n\n"
     for example in few_shot_examples:
         prompt += f"Input: {example['input']}\nOutput: {example['output']}\n\n"
     prompt += f"Input: {query}\nOutput:"
     try:
         response = openai_client.chat.completions.create(
             model=OPENROUTER_MODEL,
+            messages=[{"role": "system", "content": "You are an SQLite expert."},
+                      {"role": "user", "content": prompt}]
         )
         sql_query = response.choices[0].message.content.strip()
+        return sql_query if sql_query.lower().startswith("select") else f"Error: Invalid SQL generated - {sql_query}"
     except Exception as e:
         return f"Error: {e}"
+# Function: Execute SQL on SQLite Database
 def execute_sql(sql_query):
     try:
+        conn = sqlite3.connect(db_path)
         df = pd.read_sql_query(sql_query, conn)
         conn.close()
         return df
     except Exception as e:
         return f"SQL Execution Error: {e}"
+# Function: Generate Data Visualization
 def visualize_data(df):
     if df.empty or df.shape[1] < 2:
         return None
     numeric_cols = df.select_dtypes(include=['number']).columns
     if len(numeric_cols) < 1:
         return None
     plt.figure(figsize=(6, 4))
     sns.set_theme(style="darkgrid")
+    if len(numeric_cols) == 1:
         sns.histplot(df[numeric_cols[0]], bins=10, kde=True, color="teal")
         plt.title(f"Distribution of {numeric_cols[0]}")
+    elif len(numeric_cols) == 2:
         sns.scatterplot(x=df[numeric_cols[0]], y=df[numeric_cols[1]], color="blue")
         plt.title(f"{numeric_cols[0]} vs {numeric_cols[1]}")
+    elif df.shape[0] < 10:
         plt.pie(df[numeric_cols[0]], labels=df.iloc[:, 0], autopct='%1.1f%%', colors=sns.color_palette("pastel"))
         plt.title(f"Proportion of {numeric_cols[0]}")
+    else:
         sns.barplot(x=df.iloc[:, 0], y=df[numeric_cols[0]], palette="coolwarm")
         plt.xticks(rotation=45)
         plt.title(f"{df.columns[0]} vs {numeric_cols[0]}")