|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
from sentence_transformers import SentenceTransformer |
|
|
import gradio as gr |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from groq import Groq |
|
|
import sqlite3 |
|
|
import pandas as pd |
|
|
|
|
|
load_dotenv() |
|
|
api = os.getenv("groq_api_key") |
|
|
|
|
|
|
|
|
def setup_database(): |
|
|
conn = sqlite3.connect("college.db") |
|
|
cursor = conn.cursor() |
|
|
|
|
|
|
|
|
cursor.execute("DROP TABLE IF EXISTS student;") |
|
|
cursor.execute("DROP TABLE IF EXISTS employee;") |
|
|
cursor.execute("DROP TABLE IF EXISTS course_info;") |
|
|
|
|
|
|
|
|
cursor.execute(""" |
|
|
CREATE TABLE student ( |
|
|
student_id INTEGER, |
|
|
first_name TEXT, |
|
|
last_name TEXT, |
|
|
date_of_birth TEXT, |
|
|
email TEXT, |
|
|
phone_number TEXT, |
|
|
major TEXT, |
|
|
year_of_enrollment INTEGER |
|
|
); |
|
|
""") |
|
|
|
|
|
cursor.execute("INSERT INTO student VALUES (1, 'Alice', 'Smith', '2000-05-01', 'alice@example.com', '1234567890', 'Computer Science', 2019);") |
|
|
|
|
|
|
|
|
cursor.execute(""" |
|
|
CREATE TABLE employee ( |
|
|
employee_id INTEGER, |
|
|
first_name TEXT, |
|
|
last_name TEXT, |
|
|
email TEXT, |
|
|
department TEXT, |
|
|
position TEXT, |
|
|
salary REAL, |
|
|
date_of_joining TEXT |
|
|
); |
|
|
""") |
|
|
|
|
|
cursor.execute("INSERT INTO employee VALUES (101, 'John', 'Doe', 'john@college.edu', 'CSE', 'Professor', 80000, '2015-08-20');") |
|
|
|
|
|
|
|
|
cursor.execute(""" |
|
|
CREATE TABLE course_info ( |
|
|
course_id INTEGER, |
|
|
course_name TEXT, |
|
|
course_code TEXT, |
|
|
instructor_id INTEGER, |
|
|
department TEXT, |
|
|
credits INTEGER, |
|
|
semester TEXT |
|
|
); |
|
|
""") |
|
|
|
|
|
cursor.execute("INSERT INTO course_info VALUES (501, 'AI Basics', 'CS501', 101, 'CSE', 4, 'Fall');") |
|
|
|
|
|
conn.commit() |
|
|
conn.close() |
|
|
|
|
|
|
|
|
setup_database() |
|
|
|
|
|
|
|
|
def create_metadata_embeddings(): |
|
|
student = """Table: student...""" |
|
|
employee = """Table: employee...""" |
|
|
course = """Table: course_info...""" |
|
|
metadata_list = [student, employee, course] |
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
embeddings = model.encode(metadata_list) |
|
|
return embeddings, model, student, employee, course |
|
|
|
|
|
def find_best_fit(embeddings, model, user_query, student, employee, course): |
|
|
query_embedding = model.encode([user_query]) |
|
|
similarities = cosine_similarity(query_embedding, embeddings) |
|
|
best_match_table = similarities.argmax() |
|
|
return [student, employee, course][best_match_table] |
|
|
|
|
|
def create_prompt(user_query, table_metadata): |
|
|
system_prompt = """You are a SQL query generator specialized in generating SQL queries for a single table at a time. Your task is to accurately convert natural language queries into SQL statements based on the user's intent and the provided table metadata. |
|
|
|
|
|
Rules: |
|
|
- Multi-Table Queries Allowed: You can generate queries involving multiple tables using appropriate SQL JOIN operations, based on the provided metadata. |
|
|
- Join Logic: Use INNER JOIN, LEFT JOIN, or other appropriate joins based on logical relationships (e.g., foreign keys like `student_id`, `instructor_id`, etc.) inferred from the metadata. |
|
|
- Metadata-Based Validation: Always ensure the generated query matches the table names, columns, and data types as described in the metadata. |
|
|
- User Intent: Accurately capture the user's requirements such as filters, sorting, aggregations, and selections across one or more tables. |
|
|
- SQL Syntax: Use standard SQL syntax that is compatible with most relational database systems. |
|
|
- Output Format: Provide only the SQL query in a single line. Do not include explanations or any extra text. |
|
|
|
|
|
Input Format: |
|
|
User Query: The user's natural language request. |
|
|
Table Metadata: The structure of the relevant table, including the table name, column names, and data types. |
|
|
|
|
|
Output Format: |
|
|
SQL Query: A valid SQL query formatted for readability. |
|
|
Do not output anything else except the SQL query.Not even a single word extra.Ouput the whole query in a single line only. |
|
|
You are ready to generate SQL queries based on the user input and table metadata.""" |
|
|
user_prompt = f"User Query: {user_query}\nTable Metadata: {table_metadata}" |
|
|
return system_prompt, user_prompt |
|
|
|
|
|
def generate_sql(system_prompt, user_prompt): |
|
|
client = Groq(api_key=api) |
|
|
chat_completion = client.chat.completions.create( |
|
|
messages=[ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": user_prompt}, |
|
|
], |
|
|
model="llama3-70b-8192", |
|
|
) |
|
|
res = chat_completion.choices[0].message.content.strip() |
|
|
if res.lower().startswith("select"): |
|
|
return res |
|
|
else: |
|
|
return None |
|
|
|
|
|
|
|
|
def execute_sql(sql_query): |
|
|
try: |
|
|
conn = sqlite3.connect("college.db") |
|
|
df = pd.read_sql_query(sql_query, conn) |
|
|
conn.close() |
|
|
return df |
|
|
except Exception as e: |
|
|
return str(e) |
|
|
|
|
|
|
|
|
|
|
|
def response(user_query): |
|
|
embeddings, model, student, employee, course = create_metadata_embeddings() |
|
|
table_metadata = find_best_fit(embeddings, model, user_query, student, employee, course) |
|
|
system_prompt, user_prompt = create_prompt(user_query, table_metadata) |
|
|
sql_query = generate_output(system_prompt, user_prompt) |
|
|
|
|
|
|
|
|
try: |
|
|
conn = sqlite3.connect("college.db") |
|
|
cursor = conn.cursor() |
|
|
cursor.execute(sql_query) |
|
|
result = cursor.fetchall() |
|
|
conn.close() |
|
|
|
|
|
return f"SQL Query:\n{sql_query}\n\nQuery Result:\n{result}" |
|
|
except Exception as e: |
|
|
return f"SQL Query:\n{sql_query}\n\nQuery Result:\nError: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
desc = """Ask a natural language question about students, employees, or courses. I'll generate and run a SQL query for you.""" |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=response, |
|
|
inputs=gr.Textbox(label="Your Question"), |
|
|
outputs=gr.Textbox(label="SQL + Result"), |
|
|
title="Natural Language to SQL + Result", |
|
|
description="Ask a natural language question about students, employees, or courses. I'll generate and run a SQL query for you." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
demo.launch(share=True) |
|
|
|