Spaces:
Runtime error
Runtime error
lakshmivairamani
commited on
Create poc_app.py
Browse files- poc_app.py +190 -0
poc_app.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
from langchain.memory import ConversationBufferMemory # Updated import
|
6 |
+
from langchain import OpenAI
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain_community.utilities import SQLDatabase
|
9 |
+
from langchain_core.output_parsers import StrOutputParser
|
10 |
+
from langchain_core.runnables import RunnablePassthrough
|
11 |
+
from langchain_openai import ChatOpenAI
|
12 |
+
from langchain.agents import create_tool_calling_agent, AgentExecutor, Tool
|
13 |
+
from langchain.vectorstores import FAISS
|
14 |
+
|
15 |
+
from langchain_openai import OpenAIEmbeddings
|
16 |
+
from langchain.text_splitter import CharacterTextSplitter
|
17 |
+
from PyPDF2 import PdfReader
|
18 |
+
|
19 |
+
# Initialize the memory
|
20 |
+
|
21 |
+
memory = ConversationBufferMemory(return_messages=True, memory_key="chat_history")
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
open_api_key_token = os.environ['OPEN_AI_API']
|
26 |
+
|
27 |
+
open_weather_api_key = os.environ['OPEN_WEATHER_API']
|
28 |
+
|
29 |
+
os.environ['OPENAI_API_KEY'] = open_api_key_token
|
30 |
+
db_uri = 'mysql+mysqlconnector://redmindgen:51(xtzb0z_P8wRkowkDGQe@188.166.133.137:3306/collegedb'
|
31 |
+
#db_uri = 'postgresql+psycopg2://postgres:postpass@193.203.162.39:5432/warehouse'
|
32 |
+
# Database setup
|
33 |
+
|
34 |
+
db = SQLDatabase.from_uri(db_uri)
|
35 |
+
|
36 |
+
# LLM setup
|
37 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
38 |
+
#llm = OpenAI(temperature=0, api_key= os.environ['OPEN_AI_API'], model_name='gpt-3.5-turbo')
|
39 |
+
|
40 |
+
# Define the SQL query generation tool
|
41 |
+
template_query_generation = """Based on the table schema below, write a SQL query that would answer the user's question:
|
42 |
+
{schema}
|
43 |
+
Question: {question}
|
44 |
+
SQL Query:"""
|
45 |
+
prompt_query_generation = ChatPromptTemplate.from_template(template_query_generation)
|
46 |
+
|
47 |
+
def get_schema(_):
|
48 |
+
return db.get_table_info()
|
49 |
+
|
50 |
+
def generate_sql_query(question):
|
51 |
+
schema = get_schema(None)
|
52 |
+
input_data = {"question": question}
|
53 |
+
sql_chain = (RunnablePassthrough.assign(schema=get_schema)
|
54 |
+
| prompt_query_generation
|
55 |
+
| llm.bind(stop="\n SQL Result:")
|
56 |
+
| StrOutputParser()
|
57 |
+
)
|
58 |
+
return sql_chain.invoke(input_data)
|
59 |
+
|
60 |
+
def run_query(query):
|
61 |
+
return db.run(query)
|
62 |
+
|
63 |
+
# Define the database query tool
|
64 |
+
def database_tool(question):
|
65 |
+
sql_query = generate_sql_query(question)
|
66 |
+
return run_query(sql_query)
|
67 |
+
|
68 |
+
# Define the weather data retrieval tool
|
69 |
+
def get_weather_data(location="United Arab Emirates"):
|
70 |
+
api_key = open_weather_api_key
|
71 |
+
base_url = "http://api.openweathermap.org/data/2.5/weather?"
|
72 |
+
|
73 |
+
if location is None or location.strip() == "":
|
74 |
+
location = "United Arab Emirates"
|
75 |
+
|
76 |
+
complete_url = f"{base_url}q={location}&appid={api_key}&units=metric"
|
77 |
+
|
78 |
+
response = requests.get(complete_url)
|
79 |
+
data = response.json()
|
80 |
+
|
81 |
+
if data["cod"] != "404":
|
82 |
+
main = data["main"]
|
83 |
+
weather_description = data["weather"][0]["description"]
|
84 |
+
temperature = main["temp"]
|
85 |
+
return f"The current temperature in {location} is {temperature}°C with {weather_description}."
|
86 |
+
else:
|
87 |
+
return "Weather data is not found."
|
88 |
+
#get_weather_data("United Arab Emirates")
|
89 |
+
|
90 |
+
# Define the document data tool
|
91 |
+
def load_and_split_pdf(pdf_path):
|
92 |
+
reader = PdfReader(pdf_path)
|
93 |
+
text = ''
|
94 |
+
for page in reader.pages:
|
95 |
+
text += page.extract_text()
|
96 |
+
|
97 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
98 |
+
texts = text_splitter.split_text(text)
|
99 |
+
return texts
|
100 |
+
|
101 |
+
def create_vector_store(texts):
|
102 |
+
embeddings = OpenAIEmbeddings()
|
103 |
+
vector_store = FAISS.from_texts(texts, embeddings)
|
104 |
+
return vector_store
|
105 |
+
|
106 |
+
def query_vector_store(vector_store, query):
|
107 |
+
docs = vector_store.similarity_search(query)
|
108 |
+
return '\n\n'.join([doc.page_content for doc in docs])
|
109 |
+
|
110 |
+
# Load and process the PDF (ensure the PDF is accessible from your Colab environment)
|
111 |
+
#pdf_path = "The Magic of Analysing Customers Experience in Freight Forwarding Industry -BLOG.pdf"
|
112 |
+
pdf_path = "Inbound.pdf"
|
113 |
+
# Check if the user has the necessary permissions to access the directory
|
114 |
+
# if not os.path.isdir(pdf_path):
|
115 |
+
# raise ValueError(f"Directory {pdf_path} does not exist or you do not have permission to access it.")
|
116 |
+
|
117 |
+
texts = load_and_split_pdf(pdf_path)
|
118 |
+
vector_store = create_vector_store(texts)
|
119 |
+
|
120 |
+
def document_data_tool(query):
|
121 |
+
return query_vector_store(vector_store, query)
|
122 |
+
|
123 |
+
# Initialize the agent with the tools
|
124 |
+
tools = [
|
125 |
+
|
126 |
+
Tool(name="WeatherData", func=get_weather_data, description="Tool to get current weather data for a city or country. Handles questions related to current weather conditions in specific cities or countries.", tool_choice="required"),
|
127 |
+
Tool(name="DocumentData", func=document_data_tool, description="Tool to search and retrieve information from the uploaded document. Provide responses with the maximum of 150 words.", tool_choice="required"),
|
128 |
+
#Tool(name="DatabaseQuery", func=database_tool, description="Tool to query the database based on the user's question. Only handles questions related to the collegedb schema, including tables such as buildings, classrooms, college, course, faculty, interns, person, section, student, and textbook. Ensure to use only the available fields in these tables.Provide responses with the maximum of 150 words.", tool_choice="required"),
|
129 |
+
]
|
130 |
+
|
131 |
+
prompt_template = f"""You are an assistant that helps with database queries, weather information, and document retrieval.
|
132 |
+
For weather-related questions, if the user specifies a city, provide the weather information for that city. If the user specifies a country or does not specify a city, provide the weather information for the specified country or the default location of 'United Arab Emirates'.
|
133 |
+
For document-related questions, search and retrieve information from the uploaded document.
|
134 |
+
{{agent_scratchpad}}
|
135 |
+
Question: {{input}}
|
136 |
+
"""
|
137 |
+
#{memory.buffer}
|
138 |
+
#For SQL database-related questions, only use the fields available in the collegedb schema, which includes tables such as buildings, classrooms, college, course, faculty, interns, person, section, student, and textbook.
|
139 |
+
prompt = ChatPromptTemplate.from_template(prompt_template)
|
140 |
+
|
141 |
+
# Initialize the agent with memory
|
142 |
+
llm_with_memory = llm.bind(memory=memory)
|
143 |
+
#llm_with_memory = llm.bind()
|
144 |
+
agent = create_tool_calling_agent(llm_with_memory, tools, prompt)
|
145 |
+
#agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
|
146 |
+
agent_executor = AgentExecutor(agent=agent, tools=tools, memory= memory, verbose=True)
|
147 |
+
|
148 |
+
# Define the interface function
|
149 |
+
max_iterations = 5
|
150 |
+
iterations = 0
|
151 |
+
|
152 |
+
def answer_question(user_question, city="United Arab Emirates"):
|
153 |
+
global iterations
|
154 |
+
iterations = 0
|
155 |
+
|
156 |
+
while iterations < max_iterations:
|
157 |
+
response = agent_executor.invoke({"input": user_question})
|
158 |
+
if isinstance(response, dict):
|
159 |
+
response_text = response.get("output", "")
|
160 |
+
else:
|
161 |
+
response_text = response
|
162 |
+
if "invalid" not in response_text.lower():
|
163 |
+
break
|
164 |
+
iterations += 1
|
165 |
+
|
166 |
+
if iterations == max_iterations:
|
167 |
+
return "The agent could not generate a valid response within the iteration limit."
|
168 |
+
|
169 |
+
# Print memory buffer for debugging
|
170 |
+
print("Memory Buffer:", memory.buffer)
|
171 |
+
# Print memory buffer for debugging
|
172 |
+
print("Memory Buffer11:", memory.load_memory_variables({}))
|
173 |
+
|
174 |
+
|
175 |
+
# Format the response text
|
176 |
+
response_text = response_text.replace('\n', ' ').replace(' ', ' ').strip()
|
177 |
+
|
178 |
+
return response_text
|
179 |
+
|
180 |
+
# Create the Gradio interface
|
181 |
+
iface = gr.Interface(
|
182 |
+
fn=answer_question,
|
183 |
+
inputs="text",
|
184 |
+
outputs="text",
|
185 |
+
title="Chat with your data",
|
186 |
+
description="Ask a question about the database or a document and get a response in natural language, including current weather information."
|
187 |
+
)
|
188 |
+
|
189 |
+
# Launch the Gradio interface
|
190 |
+
iface.launch(share=True, debug=True)
|