Nikhil-Murade's picture
Update app.py
33ca36a verified
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
import pandas as pd
from sqlalchemy import create_engine
# load environment variables from .env file
load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")
llm_name = "gpt-3.5-turbo"
model = ChatOpenAI(api_key=openai_key, model=llm_name)
# read csv file
df = pd.read_csv("Struct Data_Data Science 100K.csv")
from langchain.agents import create_sql_agent
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.utilities import SQLDatabase
# Create db from csv file
# Path to your SQLite database file
database_file_path = "data_science_100k.db"
# Create an engine to connect to the SQLite database
# SQLite only requires the path to the database file
engine = create_engine(f"sqlite:///{database_file_path}")
file_url = "./Struct Data_Data Science 100K.csv"
# os.makedirs(os.path.dirname(database_file_path), exist_ok=True)
df = pd.read_csv(file_url)
df.to_sql("DataScience100k", con=engine, if_exists="replace", index=False)
print(f"Database created successfully! {df}")
# db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
# toolkit = SQLDatabaseToolkit(db=db, llm=model)
# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""
# sql_agent = create_sql_agent(
# toolkit=toolkit,
# llm=model,
# verbose=True
# )
# sql_agent.invoke(QUESTION)
# res = sql_agent.invoke(QUESTION)
# # print(res)
# Part 2 : Prepare the sql prompt
MSSQL_AGENT_PREFIX = """
You are an agent designed to interact with a SQL database.
## Instructions:
- Given an input question, create a syntactically correct {dialect} query
to run, then look at the results of the query and return the answer.
- Unless the user specifies a specific number of examples they wish to
obtain, **ALWAYS** limit your query to at most {top_k} results.
- You can order the results by a relevant column to return the most
interesting examples in the database.
- Never query for all the columns from a specific table, only ask for
the relevant columns given the question.
- You have access to tools for interacting with the database.
- You MUST double check your query before executing it.If you get an error
while executing a query,rewrite the query and try again.
- DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.)
to the database.
- DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE, ONLY USE THE RESULTS
OF THE CALCULATIONS YOU HAVE DONE.
- Your response should be in Markdown. However, **when running a SQL Query
in "Action Input", do not include the markdown backticks**.
Those are only for formatting the response, not for executing the command.
- ALWAYS, as part of your final answer, explain how you got to the answer
on a section that starts with: "Explanation:". Include the SQL query as
part of the explanation section.
- If the question does not seem related to the database, just return
"I don\'t know" as the answer.
- Only use the below tools. Only use the information returned by the
below tools to construct your query and final answer.
- Do not make up table names, only use the tables returned by any of the
tools below.
- as part of your final answer, please include the SQL query you used in json format or code format
## Tools:
"""
MSSQL_AGENT_FORMAT_INSTRUCTIONS = """
## Use the following format:
Question: the input question you must answer.
Thought: you should always think about what to do.
Action: the action to take, should be one of [{tool_names}].
Action Input: the input to the action.
Observation: the result of the action.
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer.
Final Answer: the final answer to the original input question.
Example of Final Answer:
<=== Beginning of example
Action: query_sql_db
Action Input:
SELECT TOP (10) [base_salary], [grade]
FROM salaries_2023
WHERE state = 'Division'
Observation:
[(27437.0,), (27088.0,), (26762.0,), (26521.0,), (26472.0,), (26421.0,), (26408.0,)]
Thought:I now know the final answer
Final Answer: There were 27437 workers making 100,000.
Explanation:
I queried the `xyz` table for the `salary` column where the department
is 'IGM' and the date starts with '2020'. The query returned a list of tuples
with the bazse salary for each day in 2020. To answer the question,
I took the sum of all the salaries in the list, which is 27437.
I used the following query
```sql
SELECT [salary] FROM xyztable WHERE department = 'IGM' AND date LIKE '2020%'"
```
===> End of Example
"""
db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
toolkit = SQLDatabaseToolkit(db=db, llm=model)
# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""
sql_agent = create_sql_agent(
prefix=MSSQL_AGENT_PREFIX,
format_instructions=MSSQL_AGENT_FORMAT_INSTRUCTIONS,
toolkit=toolkit,
llm=model,
tok_k=30,
verbose=True
)
# res = sql_agent.invoke(QUESTION)
import streamlit as st
st.title("SQL Query AI Agent")
question = st.text_input("Enter your query:")
if st.button("Run Query"):
if question:
res = sql_agent.invoke(question)
st.markdown(res["output"])
else:
st.error("Please Enter a Query.")