Spaces:

adityaagrawal
/

rag-assignment

Runtime error

App Files Files Community

adityaagrawal commited on May 2

Commit

7c0f544

•

1 Parent(s): 536d66f

Create utils.py

Browse files

Files changed (1) hide show

utils.py +200 -0

utils.py ADDED Viewed

	@@ -0,0 +1,200 @@

+# from langchain_openai import OpenAI
+from langchain_experimental.agents import create_pandas_dataframe_agent
+import pandas as pd
+from dotenv import load_dotenv
+import os
+import streamlit as st
+import json
+from langchain_openai import ChatOpenAI
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+import weaviate
+from langchain_community.vectorstores import Weaviate
+from weaviate.embedded import EmbeddedOptions
+from langchain.prompts import ChatPromptTemplate
+from langchain.document_loaders.pdf import PyPDFLoader
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.output_parser import StrOutputParser
+import gradio as gr
+# from langchain_community.llms import ctransformers
+# from ctransformers import AutoModelForCausalLM
+load_dotenv()
+API_KEY=os.getenv("OPENAI_API_KEY")
+TEMP_DIR = "../temp"
+# pdf
+def agent(filename: str):
+    llm = ChatOpenAI(
+        model = "gpt-3.5-turbo-0125",
+        # model = "gpt-4",
+        temperature = 0.0,
+        # max_tokens = 256,
+        # top_p = 0.5,
+    )
+    df = pd.read_csv(filename, encoding='unicode_escape')
+    pandas_df_agent = create_pandas_dataframe_agent(llm, df, verbose=True)
+    return pandas_df_agent
+def get_response(agent, query):
+    prompt = (
+        """
+            For the following query, if it requires drawing a table, reply as follows:
+            {"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}}
+            If the query requires creating a bar chart, reply as follows:
+            {"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
+            If the query requires creating a line chart, reply as follows:
+            {"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
+            There can only be two types of charts, "bar" and "line".
+            If it is just asking a question that requires neither, reply as follows:
+            {"answer": "answer"}
+            Example:
+            {"answer": "The product with the highest sales is 'Classic Cars.'"}
+            Write supportive numbers if there are any in the answer.
+            Example:
+            {"answer": "The product with the highest sales is 'Classic Cars' with 1111 sales."}
+            If you do not know the answer, reply as follows:
+            {"answer": "I do not know."}
+            Do not hallucinate or make up data. If the data is not available, reply "I do not know."
+            Return all output as a string in double quotes.
+            All strings in "columns" list and data list, should be in double quotes,
+            For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]}
+            Lets think step by step.
+            Below is the query.
+            Query:
+            """
+        + query
+    )
+    response = agent.run(prompt)
+    return response.__str__()
+def return_response(response: str) -> dict:
+    try:
+        return json.loads(response)
+    except json.JSONDecodeError as e:
+        print(f"JSONDecodeError: {e}")
+        return None
+def write_response(response_dict: dict):
+    if response_dict is not None:
+        if "answer" in response_dict:
+            answer = response_dict["answer"]
+            # st.write(answer)
+            return answer
+        if "bar" in response_dict:
+            data = response_dict["bar"]
+            df = pd.DataFrame.from_dict(data, orient = 'index')
+            df = df.transpose()
+            df.set_index("columns", inplace=True)
+            # st.bar_chart(df)
+            return gr.BarPlot(df)
+        if "line" in response_dict:
+            data = response_dict["line"]
+            df = pd.DataFrame(data)
+            df.set_index("columns", inplace=True)
+            # st.line_chart(df)
+            return gr.LinePlot(df)
+        # if "table" in response_dict:
+        #     data = response_dict["table"]
+        #     df = pd.DataFrame(data["data"], columns=data["columns"])
+        #     # st.table(df)
+    else:
+        answer = "Decoded response is None. Please retry with a better prompt."
+        return (answer)
+def ques_csv(data, question: str):
+    csv_agent = agent(data)
+    response = get_response(agent = csv_agent, query = question)
+    decoded_response = return_response(response)
+    answer = write_response(decoded_response)
+    return answer
+# pdf
+def ques_pdf(data, question: str):
+    doc = load_pdf(data)
+    chunks = split_pdf(doc)
+    retriever = store_retrieve(chunks)
+    prompt = write_prompt()
+    answer = ques_llm(retriever, prompt, question)
+    # st.write(answer)
+    return answer
+def make_dir():
+    if not os.path.exists(TEMP_DIR):
+        os.makedirs(TEMP_DIR)
+def upload(uploaded_file):
+    if uploaded_file is not None:
+        file_path = os.path.join(TEMP_DIR, uploaded_file.name)
+        with open(file_path, "wb") as f:
+            f.write(uploaded_file.getvalue())
+    return file_path
+def load_pdf(filename: str):
+    loader = PyPDFLoader("{}".format(filename))
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
+    pages = loader.load_and_split(text_splitter = text_splitter)
+    return pages
+def split_pdf(doc):
+    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = text_splitter.split_documents(doc)
+    return chunks
+def store_retrieve(chunks):
+    client = weaviate.Client(
+    embedded_options = EmbeddedOptions()
+    )
+    vectorstore = Weaviate.from_documents(
+        client = client,
+        documents = chunks,
+        embedding = OpenAIEmbeddings(),
+        by_text = False
+    )
+    retriever = vectorstore.as_retriever()
+    return retriever
+def write_prompt():
+    template = """You are an assistant for question-answering tasks.
+    Use the following pieces of retrieved context to answer the question.
+    If you don't know the answer, just say that you don't know.
+    Question: {question}
+    Context: {context}
+    Answer:
+    """
+    prompt = ChatPromptTemplate.from_template(template)
+    return prompt
+def ques_llm(retriever, prompt, question):
+    llm = ChatOpenAI(model_name="gpt-4", temperature=0)
+    # # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q8_0.bin", temperature=0)
+    # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q4_0.bin", temperature=0)
+    rag_chain = (
+        {"context": retriever,  "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    ans = rag_chain.invoke(question)
+    return ans