Spaces:
Sleeping
Sleeping
File size: 5,354 Bytes
3d51c0d 553fd88 3d51c0d 553fd88 33ca36a 3d51c0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
import pandas as pd
from sqlalchemy import create_engine
# load environment variables from .env file
load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")
llm_name = "gpt-3.5-turbo"
model = ChatOpenAI(api_key=openai_key, model=llm_name)
# read csv file
df = pd.read_csv("Struct Data_Data Science 100K.csv")
from langchain.agents import create_sql_agent
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.utilities import SQLDatabase
# Create db from csv file
# Path to your SQLite database file
database_file_path = "data_science_100k.db"
# Create an engine to connect to the SQLite database
# SQLite only requires the path to the database file
engine = create_engine(f"sqlite:///{database_file_path}")
file_url = "./Struct Data_Data Science 100K.csv"
# os.makedirs(os.path.dirname(database_file_path), exist_ok=True)
df = pd.read_csv(file_url)
df.to_sql("DataScience100k", con=engine, if_exists="replace", index=False)
print(f"Database created successfully! {df}")
# db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
# toolkit = SQLDatabaseToolkit(db=db, llm=model)
# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""
# sql_agent = create_sql_agent(
# toolkit=toolkit,
# llm=model,
# verbose=True
# )
# sql_agent.invoke(QUESTION)
# res = sql_agent.invoke(QUESTION)
# # print(res)
# Part 2 : Prepare the sql prompt
MSSQL_AGENT_PREFIX = """
You are an agent designed to interact with a SQL database.
## Instructions:
- Given an input question, create a syntactically correct {dialect} query
to run, then look at the results of the query and return the answer.
- Unless the user specifies a specific number of examples they wish to
obtain, **ALWAYS** limit your query to at most {top_k} results.
- You can order the results by a relevant column to return the most
interesting examples in the database.
- Never query for all the columns from a specific table, only ask for
the relevant columns given the question.
- You have access to tools for interacting with the database.
- You MUST double check your query before executing it.If you get an error
while executing a query,rewrite the query and try again.
- DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.)
to the database.
- DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE, ONLY USE THE RESULTS
OF THE CALCULATIONS YOU HAVE DONE.
- Your response should be in Markdown. However, **when running a SQL Query
in "Action Input", do not include the markdown backticks**.
Those are only for formatting the response, not for executing the command.
- ALWAYS, as part of your final answer, explain how you got to the answer
on a section that starts with: "Explanation:". Include the SQL query as
part of the explanation section.
- If the question does not seem related to the database, just return
"I don\'t know" as the answer.
- Only use the below tools. Only use the information returned by the
below tools to construct your query and final answer.
- Do not make up table names, only use the tables returned by any of the
tools below.
- as part of your final answer, please include the SQL query you used in json format or code format
## Tools:
"""
MSSQL_AGENT_FORMAT_INSTRUCTIONS = """
## Use the following format:
Question: the input question you must answer.
Thought: you should always think about what to do.
Action: the action to take, should be one of [{tool_names}].
Action Input: the input to the action.
Observation: the result of the action.
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer.
Final Answer: the final answer to the original input question.
Example of Final Answer:
<=== Beginning of example
Action: query_sql_db
Action Input:
SELECT TOP (10) [base_salary], [grade]
FROM salaries_2023
WHERE state = 'Division'
Observation:
[(27437.0,), (27088.0,), (26762.0,), (26521.0,), (26472.0,), (26421.0,), (26408.0,)]
Thought:I now know the final answer
Final Answer: There were 27437 workers making 100,000.
Explanation:
I queried the `xyz` table for the `salary` column where the department
is 'IGM' and the date starts with '2020'. The query returned a list of tuples
with the bazse salary for each day in 2020. To answer the question,
I took the sum of all the salaries in the list, which is 27437.
I used the following query
```sql
SELECT [salary] FROM xyztable WHERE department = 'IGM' AND date LIKE '2020%'"
```
===> End of Example
"""
db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
toolkit = SQLDatabaseToolkit(db=db, llm=model)
# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""
sql_agent = create_sql_agent(
prefix=MSSQL_AGENT_PREFIX,
format_instructions=MSSQL_AGENT_FORMAT_INSTRUCTIONS,
toolkit=toolkit,
llm=model,
tok_k=30,
verbose=True
)
# res = sql_agent.invoke(QUESTION)
import streamlit as st
st.title("SQL Query AI Agent")
question = st.text_input("Enter your query:")
if st.button("Run Query"):
if question:
res = sql_agent.invoke(question)
st.markdown(res["output"])
else:
st.error("Please Enter a Query.") |