Spaces:
Runtime error
Runtime error
| # from langchain_openai import OpenAI | |
| from langchain_experimental.agents import create_pandas_dataframe_agent | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| import os | |
| import json | |
| from langchain_openai import ChatOpenAI | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_openai import OpenAIEmbeddings | |
| import weaviate | |
| from langchain_community.vectorstores import Weaviate | |
| from weaviate.embedded import EmbeddedOptions | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.document_loaders.pdf import PyPDFLoader | |
| from langchain.schema.runnable import RunnablePassthrough | |
| from langchain.schema.output_parser import StrOutputParser | |
| import gradio as gr | |
| # from langchain_community.llms import ctransformers | |
| # from ctransformers import AutoModelForCausalLM | |
| load_dotenv() | |
| API_KEY=os.getenv("OPENAI_API_KEY") | |
| TEMP_DIR = "../temp" | |
| def agent(filename: str): | |
| llm = ChatOpenAI( | |
| model = "gpt-3.5-turbo-0125", | |
| # model = "gpt-4", | |
| temperature = 0.0, | |
| # max_tokens = 256, | |
| # top_p = 0.5, | |
| ) | |
| df = pd.read_csv(filename, encoding='unicode_escape') | |
| pandas_df_agent = create_pandas_dataframe_agent(llm, df, verbose=True) | |
| return pandas_df_agent | |
| def get_response(agent, query): | |
| prompt = ( | |
| """ | |
| For the following query, if it requires drawing a table, reply as follows: | |
| {"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}} | |
| If the query requires creating a bar chart, reply as follows: | |
| {"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}} | |
| If the query requires creating a line chart, reply as follows: | |
| {"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}} | |
| There can only be two types of charts, "bar" and "line". | |
| If it is just asking a question that requires neither, reply as follows: | |
| {"answer": "answer"} | |
| Example: | |
| {"answer": "The product with the highest sales is 'Classic Cars.'"} | |
| Write supportive numbers if there are any in the answer. | |
| Example: | |
| {"answer": "The product with the highest sales is 'Classic Cars' with 1111 sales."} | |
| If you do not know the answer, reply as follows: | |
| {"answer": "I do not know."} | |
| Do not hallucinate or make up data. If the data is not available, reply "I do not know." | |
| Return all output as a string in double quotes. | |
| All strings in "columns" list and data list, should be in double quotes, | |
| For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]} | |
| Lets think step by step. | |
| Below is the query. | |
| Query: | |
| """ | |
| + query | |
| ) | |
| response = agent.run(prompt) | |
| return response.__str__() | |
| def return_response(response: str) -> dict: | |
| try: | |
| return json.loads(response) | |
| except json.JSONDecodeError as e: | |
| print(f"JSONDecodeError: {e}") | |
| return None | |
| def write_response(response_dict: dict): | |
| if response_dict is not None: | |
| if "answer" in response_dict: | |
| answer = response_dict["answer"] | |
| # st.write(answer) | |
| return answer | |
| if "bar" in response_dict: | |
| data = response_dict["bar"] | |
| df = pd.DataFrame.from_dict(data, orient = 'index') | |
| df = df.transpose() | |
| df.set_index("columns", inplace=True) | |
| # st.bar_chart(df) | |
| return gr.BarPlot(df) | |
| if "line" in response_dict: | |
| data = response_dict["line"] | |
| df = pd.DataFrame(data) | |
| df.set_index("columns", inplace=True) | |
| # st.line_chart(df) | |
| return gr.LinePlot(df) | |
| # if "table" in response_dict: | |
| # data = response_dict["table"] | |
| # df = pd.DataFrame(data["data"], columns=data["columns"]) | |
| # # st.table(df) | |
| else: | |
| answer = "Decoded response is None. Please retry with a better prompt." | |
| return (answer) | |
| def ques_csv(data, question: str): | |
| csv_agent = agent(data) | |
| response = get_response(agent = csv_agent, query = question) | |
| decoded_response = return_response(response) | |
| answer = write_response(decoded_response) | |
| return answer | |
| def ques_pdf(data, question: str): | |
| doc = load_pdf(data) | |
| chunks = split_pdf(doc) | |
| retriever = store_retrieve(chunks) | |
| prompt = write_prompt() | |
| answer = ques_llm(retriever, prompt, question) | |
| # st.write(answer) | |
| return answer | |
| def make_dir(): | |
| if not os.path.exists(TEMP_DIR): | |
| os.makedirs(TEMP_DIR) | |
| def upload(uploaded_file): | |
| if uploaded_file is not None: | |
| file_path = os.path.join(TEMP_DIR, uploaded_file.name) | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getvalue()) | |
| return file_path | |
| def load_pdf(filename: str): | |
| loader = PyPDFLoader("{}".format(filename)) | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20) | |
| pages = loader.load_and_split(text_splitter = text_splitter) | |
| return pages | |
| def split_pdf(doc): | |
| text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = text_splitter.split_documents(doc) | |
| return chunks | |
| def store_retrieve(chunks): | |
| client = weaviate.Client( | |
| embedded_options = EmbeddedOptions() | |
| ) | |
| vectorstore = Weaviate.from_documents( | |
| client = client, | |
| documents = chunks, | |
| embedding = OpenAIEmbeddings(), | |
| by_text = False | |
| ) | |
| retriever = vectorstore.as_retriever() | |
| return retriever | |
| def write_prompt(): | |
| template = """You are an assistant for question-answering tasks. | |
| Use the following pieces of retrieved context to answer the question. | |
| If you don't know the answer, just say that you don't know. | |
| Question: {question} | |
| Context: {context} | |
| Answer: | |
| """ | |
| prompt = ChatPromptTemplate.from_template(template) | |
| return prompt | |
| def ques_llm(retriever, prompt, question): | |
| llm = ChatOpenAI(model_name="gpt-4", temperature=0) | |
| # # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q8_0.bin", temperature=0) | |
| # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q4_0.bin", temperature=0) | |
| rag_chain = ( | |
| {"context": retriever, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| ans = rag_chain.invoke(question) | |
| return ans |