Spaces:

Nikhil-Murade
/

TalentEdge-AI-Agent

Sleeping

App Files Files Community

TalentEdge-AI-Agent / app.py

Nikhil-Murade

Update app.py

33ca36a verified 4 months ago

raw

history blame contribute delete

5.35 kB

	import os
	from dotenv import load_dotenv
	from langchain_openai import ChatOpenAI
	import pandas as pd

	from sqlalchemy import create_engine

	# load environment variables from .env file
	load_dotenv()

	openai_key = os.getenv("OPENAI_API_KEY")

	llm_name = "gpt-3.5-turbo"
	model = ChatOpenAI(api_key=openai_key, model=llm_name)

	# read csv file
	df = pd.read_csv("Struct Data_Data Science 100K.csv")

	from langchain.agents import create_sql_agent
	from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
	from langchain_community.utilities import SQLDatabase

	# Create db from csv file

	# Path to your SQLite database file
	database_file_path = "data_science_100k.db"

	# Create an engine to connect to the SQLite database
	# SQLite only requires the path to the database file
	engine = create_engine(f"sqlite:///{database_file_path}")
	file_url = "./Struct Data_Data Science 100K.csv"
	# os.makedirs(os.path.dirname(database_file_path), exist_ok=True)

	df = pd.read_csv(file_url)
	df.to_sql("DataScience100k", con=engine, if_exists="replace", index=False)

	print(f"Database created successfully! {df}")


	# db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
	# toolkit = SQLDatabaseToolkit(db=db, llm=model)

	# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""

	# sql_agent = create_sql_agent(
	# toolkit=toolkit,
	# llm=model,
	# verbose=True
	# )

	# sql_agent.invoke(QUESTION)

	# res = sql_agent.invoke(QUESTION)

	# # print(res)

	# Part 2 : Prepare the sql prompt
	MSSQL_AGENT_PREFIX = """
	You are an agent designed to interact with a SQL database.
	## Instructions:
	- Given an input question, create a syntactically correct {dialect} query
	to run, then look at the results of the query and return the answer.
	- Unless the user specifies a specific number of examples they wish to
	obtain, ALWAYS limit your query to at most {top_k} results.
	- You can order the results by a relevant column to return the most
	interesting examples in the database.
	- Never query for all the columns from a specific table, only ask for
	the relevant columns given the question.
	- You have access to tools for interacting with the database.
	- You MUST double check your query before executing it.If you get an error
	while executing a query,rewrite the query and try again.
	- DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.)
	to the database.
	- DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE, ONLY USE THE RESULTS
	OF THE CALCULATIONS YOU HAVE DONE.
	- Your response should be in Markdown. However, **when running a SQL Query
	in "Action Input", do not include the markdown backticks**.
	Those are only for formatting the response, not for executing the command.
	- ALWAYS, as part of your final answer, explain how you got to the answer
	on a section that starts with: "Explanation:". Include the SQL query as
	part of the explanation section.
	- If the question does not seem related to the database, just return
	"I don\'t know" as the answer.
	- Only use the below tools. Only use the information returned by the
	below tools to construct your query and final answer.
	- Do not make up table names, only use the tables returned by any of the
	tools below.
	- as part of your final answer, please include the SQL query you used in json format or code format

	## Tools:

	"""

	MSSQL_AGENT_FORMAT_INSTRUCTIONS = """

	## Use the following format:

	Question: the input question you must answer.
	Thought: you should always think about what to do.
	Action: the action to take, should be one of [{tool_names}].
	Action Input: the input to the action.
	Observation: the result of the action.
	... (this Thought/Action/Action Input/Observation can repeat N times)
	Thought: I now know the final answer.
	Final Answer: the final answer to the original input question.

	Example of Final Answer:
	<=== Beginning of example

	Action: query_sql_db
	Action Input:
	SELECT TOP (10) [base_salary], [grade]
	FROM salaries_2023

	WHERE state = 'Division'

	Observation:
	[(27437.0,), (27088.0,), (26762.0,), (26521.0,), (26472.0,), (26421.0,), (26408.0,)]
	Thought:I now know the final answer
	Final Answer: There were 27437 workers making 100,000.

	Explanation:
	I queried the `xyz` table for the `salary` column where the department
	is 'IGM' and the date starts with '2020'. The query returned a list of tuples
	with the bazse salary for each day in 2020. To answer the question,
	I took the sum of all the salaries in the list, which is 27437.
	I used the following query

	```sql
	SELECT [salary] FROM xyztable WHERE department = 'IGM' AND date LIKE '2020%'"
	```
	===> End of Example

	"""

	db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
	toolkit = SQLDatabaseToolkit(db=db, llm=model)

	# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""

	sql_agent = create_sql_agent(
	prefix=MSSQL_AGENT_PREFIX,
	format_instructions=MSSQL_AGENT_FORMAT_INSTRUCTIONS,
	toolkit=toolkit,
	llm=model,
	tok_k=30,
	verbose=True
	)

	# res = sql_agent.invoke(QUESTION)


	import streamlit as st

	st.title("SQL Query AI Agent")

	question = st.text_input("Enter your query:")

	if st.button("Run Query"):
	if question:
	res = sql_agent.invoke(question)

	st.markdown(res["output"])

	else:
	st.error("Please Enter a Query.")