Spaces:
Runtime error
Runtime error
# from langchain_openai import OpenAI | |
from langchain_experimental.agents import create_pandas_dataframe_agent | |
import pandas as pd | |
from dotenv import load_dotenv | |
import os | |
import json | |
from langchain_openai import ChatOpenAI | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_openai import OpenAIEmbeddings | |
import weaviate | |
from langchain_community.vectorstores import Weaviate | |
from weaviate.embedded import EmbeddedOptions | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.document_loaders.pdf import PyPDFLoader | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain.schema.output_parser import StrOutputParser | |
import gradio as gr | |
# from langchain_community.llms import ctransformers | |
# from ctransformers import AutoModelForCausalLM | |
load_dotenv() | |
API_KEY=os.getenv("OPENAI_API_KEY") | |
TEMP_DIR = "../temp" | |
def agent(filename: str): | |
llm = ChatOpenAI( | |
model = "gpt-3.5-turbo-0125", | |
# model = "gpt-4", | |
temperature = 0.0, | |
# max_tokens = 256, | |
# top_p = 0.5, | |
) | |
df = pd.read_csv(filename, encoding='unicode_escape') | |
pandas_df_agent = create_pandas_dataframe_agent(llm, df, verbose=True) | |
return pandas_df_agent | |
def get_response(agent, query): | |
prompt = ( | |
""" | |
For the following query, if it requires drawing a table, reply as follows: | |
{"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}} | |
If the query requires creating a bar chart, reply as follows: | |
{"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}} | |
If the query requires creating a line chart, reply as follows: | |
{"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}} | |
There can only be two types of charts, "bar" and "line". | |
If it is just asking a question that requires neither, reply as follows: | |
{"answer": "answer"} | |
Example: | |
{"answer": "The product with the highest sales is 'Classic Cars.'"} | |
Write supportive numbers if there are any in the answer. | |
Example: | |
{"answer": "The product with the highest sales is 'Classic Cars' with 1111 sales."} | |
If you do not know the answer, reply as follows: | |
{"answer": "I do not know."} | |
Do not hallucinate or make up data. If the data is not available, reply "I do not know." | |
Return all output as a string in double quotes. | |
All strings in "columns" list and data list, should be in double quotes, | |
For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]} | |
Lets think step by step. | |
Below is the query. | |
Query: | |
""" | |
+ query | |
) | |
response = agent.run(prompt) | |
return response.__str__() | |
def return_response(response: str) -> dict: | |
try: | |
return json.loads(response) | |
except json.JSONDecodeError as e: | |
print(f"JSONDecodeError: {e}") | |
return None | |
def write_response(response_dict: dict): | |
if response_dict is not None: | |
if "answer" in response_dict: | |
answer = response_dict["answer"] | |
# st.write(answer) | |
return answer | |
if "bar" in response_dict: | |
data = response_dict["bar"] | |
df = pd.DataFrame.from_dict(data, orient = 'index') | |
df = df.transpose() | |
df.set_index("columns", inplace=True) | |
# st.bar_chart(df) | |
return gr.BarPlot(df) | |
if "line" in response_dict: | |
data = response_dict["line"] | |
df = pd.DataFrame(data) | |
df.set_index("columns", inplace=True) | |
# st.line_chart(df) | |
return gr.LinePlot(df) | |
# if "table" in response_dict: | |
# data = response_dict["table"] | |
# df = pd.DataFrame(data["data"], columns=data["columns"]) | |
# # st.table(df) | |
else: | |
answer = "Decoded response is None. Please retry with a better prompt." | |
return (answer) | |
def ques_csv(data, question: str): | |
csv_agent = agent(data) | |
response = get_response(agent = csv_agent, query = question) | |
decoded_response = return_response(response) | |
answer = write_response(decoded_response) | |
return answer | |
def ques_pdf(data, question: str): | |
doc = load_pdf(data) | |
chunks = split_pdf(doc) | |
retriever = store_retrieve(chunks) | |
prompt = write_prompt() | |
answer = ques_llm(retriever, prompt, question) | |
# st.write(answer) | |
return answer | |
def make_dir(): | |
if not os.path.exists(TEMP_DIR): | |
os.makedirs(TEMP_DIR) | |
def upload(uploaded_file): | |
if uploaded_file is not None: | |
file_path = os.path.join(TEMP_DIR, uploaded_file.name) | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
return file_path | |
def load_pdf(filename: str): | |
loader = PyPDFLoader("{}".format(filename)) | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20) | |
pages = loader.load_and_split(text_splitter = text_splitter) | |
return pages | |
def split_pdf(doc): | |
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
chunks = text_splitter.split_documents(doc) | |
return chunks | |
def store_retrieve(chunks): | |
client = weaviate.Client( | |
embedded_options = EmbeddedOptions() | |
) | |
vectorstore = Weaviate.from_documents( | |
client = client, | |
documents = chunks, | |
embedding = OpenAIEmbeddings(), | |
by_text = False | |
) | |
retriever = vectorstore.as_retriever() | |
return retriever | |
def write_prompt(): | |
template = """You are an assistant for question-answering tasks. | |
Use the following pieces of retrieved context to answer the question. | |
If you don't know the answer, just say that you don't know. | |
Question: {question} | |
Context: {context} | |
Answer: | |
""" | |
prompt = ChatPromptTemplate.from_template(template) | |
return prompt | |
def ques_llm(retriever, prompt, question): | |
llm = ChatOpenAI(model_name="gpt-4", temperature=0) | |
# # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q8_0.bin", temperature=0) | |
# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q4_0.bin", temperature=0) | |
rag_chain = ( | |
{"context": retriever, "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |
ans = rag_chain.invoke(question) | |
return ans |