Spaces:

adityaagrawal
/

rag-assignment

Runtime error

App Files Files Community

rag-assignment / utils.py

adityaagrawal

Update

69fbc52 verified about 1 year ago

raw

history blame contribute delete

6.72 kB

	# from langchain_openai import OpenAI
	from langchain_experimental.agents import create_pandas_dataframe_agent
	import pandas as pd
	from dotenv import load_dotenv
	import os
	import json
	from langchain_openai import ChatOpenAI
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_openai import OpenAIEmbeddings
	import weaviate
	from langchain_community.vectorstores import Weaviate
	from weaviate.embedded import EmbeddedOptions
	from langchain.prompts import ChatPromptTemplate
	from langchain.document_loaders.pdf import PyPDFLoader
	from langchain.schema.runnable import RunnablePassthrough
	from langchain.schema.output_parser import StrOutputParser
	import gradio as gr
	# from langchain_community.llms import ctransformers
	# from ctransformers import AutoModelForCausalLM

	load_dotenv()
	API_KEY=os.getenv("OPENAI_API_KEY")
	TEMP_DIR = "../temp"

	# pdf
	def agent(filename: str):

	llm = ChatOpenAI(
	model = "gpt-3.5-turbo-0125",
	# model = "gpt-4",
	temperature = 0.0,
	# max_tokens = 256,
	# top_p = 0.5,
	)
	df = pd.read_csv(filename, encoding='unicode_escape')
	pandas_df_agent = create_pandas_dataframe_agent(llm, df, verbose=True)

	return pandas_df_agent

	def get_response(agent, query):
	prompt = (
	"""
	For the following query, if it requires drawing a table, reply as follows:
	{"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}}

	If the query requires creating a bar chart, reply as follows:
	{"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}

	If the query requires creating a line chart, reply as follows:
	{"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}

	There can only be two types of charts, "bar" and "line".

	If it is just asking a question that requires neither, reply as follows:
	{"answer": "answer"}
	Example:
	{"answer": "The product with the highest sales is 'Classic Cars.'"}

	Write supportive numbers if there are any in the answer.
	Example:
	{"answer": "The product with the highest sales is 'Classic Cars' with 1111 sales."}

	If you do not know the answer, reply as follows:
	{"answer": "I do not know."}

	Do not hallucinate or make up data. If the data is not available, reply "I do not know."

	Return all output as a string in double quotes.

	All strings in "columns" list and data list, should be in double quotes,

	For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]}

	Lets think step by step.

	Below is the query.
	Query:
	"""
	+ query
	)

	response = agent.run(prompt)
	return response.__str__()

	def return_response(response: str) -> dict:
	try:
	return json.loads(response)
	except json.JSONDecodeError as e:
	print(f"JSONDecodeError: {e}")
	return None

	def write_response(response_dict: dict):
	if response_dict is not None:
	if "answer" in response_dict:
	answer = response_dict["answer"]
	# st.write(answer)
	return answer

	if "bar" in response_dict:
	data = response_dict["bar"]
	df = pd.DataFrame.from_dict(data, orient = 'index')
	df = df.transpose()
	df.set_index("columns", inplace=True)
	# st.bar_chart(df)
	return gr.BarPlot(df)

	if "line" in response_dict:
	data = response_dict["line"]
	df = pd.DataFrame(data)
	df.set_index("columns", inplace=True)
	# st.line_chart(df)
	return gr.LinePlot(df)

	# if "table" in response_dict:
	# data = response_dict["table"]
	# df = pd.DataFrame(data["data"], columns=data["columns"])
	# # st.table(df)


	else:
	answer = "Decoded response is None. Please retry with a better prompt."
	return (answer)

	def ques_csv(data, question: str):
	csv_agent = agent(data)
	response = get_response(agent = csv_agent, query = question)
	decoded_response = return_response(response)
	answer = write_response(decoded_response)
	return answer

	# pdf
	def ques_pdf(data, question: str):
	doc = load_pdf(data)
	chunks = split_pdf(doc)
	retriever = store_retrieve(chunks)
	prompt = write_prompt()
	answer = ques_llm(retriever, prompt, question)
	# st.write(answer)
	return answer

	def make_dir():
	if not os.path.exists(TEMP_DIR):
	os.makedirs(TEMP_DIR)

	def upload(uploaded_file):
	if uploaded_file is not None:
	file_path = os.path.join(TEMP_DIR, uploaded_file.name)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getvalue())

	return file_path

	def load_pdf(filename: str):
	loader = PyPDFLoader("{}".format(filename))
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
	pages = loader.load_and_split(text_splitter = text_splitter)
	return pages

	def split_pdf(doc):
	text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = text_splitter.split_documents(doc)
	return chunks

	def store_retrieve(chunks):
	client = weaviate.Client(
	embedded_options = EmbeddedOptions()
	)
	vectorstore = Weaviate.from_documents(
	client = client,
	documents = chunks,
	embedding = OpenAIEmbeddings(),
	by_text = False
	)
	retriever = vectorstore.as_retriever()
	return retriever

	def write_prompt():
	template = """You are an assistant for question-answering tasks.
	Use the following pieces of retrieved context to answer the question.
	If you don't know the answer, just say that you don't know.
	Question: {question}
	Context: {context}
	Answer:
	"""
	prompt = ChatPromptTemplate.from_template(template)
	return prompt

	def ques_llm(retriever, prompt, question):
	llm = ChatOpenAI(model_name="gpt-4", temperature=0)
	# # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q8_0.bin", temperature=0)
	# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q4_0.bin", temperature=0)
	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)
	ans = rag_chain.invoke(question)
	return ans