Spaces:
Runtime error
Runtime error
File size: 6,724 Bytes
7c0f544 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# from langchain_openai import OpenAI
from langchain_experimental.agents import create_pandas_dataframe_agent
import pandas as pd
from dotenv import load_dotenv
import os
import json
from langchain_openai import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import weaviate
from langchain_community.vectorstores import Weaviate
from weaviate.embedded import EmbeddedOptions
from langchain.prompts import ChatPromptTemplate
from langchain.document_loaders.pdf import PyPDFLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
import gradio as gr
# from langchain_community.llms import ctransformers
# from ctransformers import AutoModelForCausalLM
load_dotenv()
API_KEY=os.getenv("OPENAI_API_KEY")
TEMP_DIR = "../temp"
# pdf
def agent(filename: str):
llm = ChatOpenAI(
model = "gpt-3.5-turbo-0125",
# model = "gpt-4",
temperature = 0.0,
# max_tokens = 256,
# top_p = 0.5,
)
df = pd.read_csv(filename, encoding='unicode_escape')
pandas_df_agent = create_pandas_dataframe_agent(llm, df, verbose=True)
return pandas_df_agent
def get_response(agent, query):
prompt = (
"""
For the following query, if it requires drawing a table, reply as follows:
{"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}}
If the query requires creating a bar chart, reply as follows:
{"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
If the query requires creating a line chart, reply as follows:
{"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
There can only be two types of charts, "bar" and "line".
If it is just asking a question that requires neither, reply as follows:
{"answer": "answer"}
Example:
{"answer": "The product with the highest sales is 'Classic Cars.'"}
Write supportive numbers if there are any in the answer.
Example:
{"answer": "The product with the highest sales is 'Classic Cars' with 1111 sales."}
If you do not know the answer, reply as follows:
{"answer": "I do not know."}
Do not hallucinate or make up data. If the data is not available, reply "I do not know."
Return all output as a string in double quotes.
All strings in "columns" list and data list, should be in double quotes,
For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]}
Lets think step by step.
Below is the query.
Query:
"""
+ query
)
response = agent.run(prompt)
return response.__str__()
def return_response(response: str) -> dict:
try:
return json.loads(response)
except json.JSONDecodeError as e:
print(f"JSONDecodeError: {e}")
return None
def write_response(response_dict: dict):
if response_dict is not None:
if "answer" in response_dict:
answer = response_dict["answer"]
# st.write(answer)
return answer
if "bar" in response_dict:
data = response_dict["bar"]
df = pd.DataFrame.from_dict(data, orient = 'index')
df = df.transpose()
df.set_index("columns", inplace=True)
# st.bar_chart(df)
return gr.BarPlot(df)
if "line" in response_dict:
data = response_dict["line"]
df = pd.DataFrame(data)
df.set_index("columns", inplace=True)
# st.line_chart(df)
return gr.LinePlot(df)
# if "table" in response_dict:
# data = response_dict["table"]
# df = pd.DataFrame(data["data"], columns=data["columns"])
# # st.table(df)
else:
answer = "Decoded response is None. Please retry with a better prompt."
return (answer)
def ques_csv(data, question: str):
csv_agent = agent(data)
response = get_response(agent = csv_agent, query = question)
decoded_response = return_response(response)
answer = write_response(decoded_response)
return answer
# pdf
def ques_pdf(data, question: str):
doc = load_pdf(data)
chunks = split_pdf(doc)
retriever = store_retrieve(chunks)
prompt = write_prompt()
answer = ques_llm(retriever, prompt, question)
# st.write(answer)
return answer
def make_dir():
if not os.path.exists(TEMP_DIR):
os.makedirs(TEMP_DIR)
def upload(uploaded_file):
if uploaded_file is not None:
file_path = os.path.join(TEMP_DIR, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getvalue())
return file_path
def load_pdf(filename: str):
loader = PyPDFLoader("{}".format(filename))
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
pages = loader.load_and_split(text_splitter = text_splitter)
return pages
def split_pdf(doc):
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(doc)
return chunks
def store_retrieve(chunks):
client = weaviate.Client(
embedded_options = EmbeddedOptions()
)
vectorstore = Weaviate.from_documents(
client = client,
documents = chunks,
embedding = OpenAIEmbeddings(),
by_text = False
)
retriever = vectorstore.as_retriever()
return retriever
def write_prompt():
template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)
return prompt
def ques_llm(retriever, prompt, question):
llm = ChatOpenAI(model_name="gpt-4", temperature=0)
# # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q8_0.bin", temperature=0)
# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q4_0.bin", temperature=0)
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
ans = rag_chain.invoke(question)
return ans |