Spaces:

Akshayram1
/

rag

Running

App Files Files Community

Akshayram1 commited on May 10

Commit

9806805

•

1 Parent(s): 0b3bd96

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -74

app.py CHANGED Viewed

@@ -34,77 +34,81 @@ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
 if uploaded_file is not None:
     llama_parse_documents = load_or_parse_data(uploaded_file)
-    # Create data directory if it doesn't exist
-    os.makedirs("data", exist_ok=True)
-    # Further processing of the parsed data...
-    # Further processing of the parsed data
-    with open('data/output.md', 'a') as f:
-        for doc in llama_parse_documents:
-            f.write(doc.text + '\n')
-    markdown_path = "data/output.md"
-    loader = UnstructuredMarkdownLoader(markdown_path)
-    documents = loader.load()
-    # Split loaded documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
-    docs = text_splitter.split_documents(documents)
-    # Initialize Embeddings
-    embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
-    # Create and persist a Chroma vector database from the chunked documents
-    vs = Chroma.from_documents(
-        documents=docs,
-        embedding=embed_model,
-        persist_directory="chroma_db_llamaparse1",
-        collection_name="rag"
-    )
-    # Initialize ChatGroq model
-    chat_model = ChatGroq(
-        temperature=0,
-        model_name="mixtral-8x7b-32768",
-        api_key=groq_api_key
-    )
-    # Convert retrieved documents into QA format
-    custom_prompt_template = """
-    Use the following pieces of information to answer the user's question.
-    If you don't know the answer, just say that you don't know, don't try to make up an answer.
-    Context: {context}
-    Question: {question}
-    Only return the helpful answer below and nothing else.
-    Helpful answer:
-    """
-    prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])
-    # Initialize RetrievalQA
-    qa = RetrievalQA.from_chain_type(
-        llm=chat_model,
-        chain_type="stuff",
-        retriever=vs.as_retriever(search_kwargs={'k': 3}),
-        return_source_documents=True,
-        chain_type_kwargs={"prompt": prompt}
-    )
-    # Define function to interactively ask questions and retrieve answers
-    def ask_question(question):
-        response = qa.invoke({"query": question})
-        return response["result"]
-    # Example questions
-    example_questions = [
-        "What is the Balance of UBER TECHNOLOGIES, INC. as of December 31, 2021?",
-        "What is the Cash flows from operating activities associated with bad expense specified in the document?",
-        "What is Loss (income) from equity method investments, net?"
-    ]
-    # Ask questions and display answers
-    for idx, question in enumerate(example_questions, start=1):
-        st.subheader(f"Question {idx}: {question}")
-        answer = ask_question(question)
-        st.write(f"Answer: {answer}")

 if uploaded_file is not None:
     llama_parse_documents = load_or_parse_data(uploaded_file)
+    if llama_parse_documents:
+        # Create data directory if it doesn't exist
+        os.makedirs("data", exist_ok=True)
+        # Further processing of the parsed data...
+        # Further processing of the parsed data
+        with open('data/output.md', 'a') as f:
+            for doc in llama_parse_documents:
+                f.write(doc.text + '\n')
+        markdown_path = "data/output.md"
+        loader = UnstructuredMarkdownLoader(markdown_path)
+        documents = loader.load()
+        # Split loaded documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
+        docs = text_splitter.split_documents(documents)
+        # Initialize Embeddings
+        embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
+        if docs:
+            # Create and persist a Chroma vector database from the chunked documents
+            vs = Chroma.from_documents(
+                documents=docs,
+                embedding=embed_model,
+                persist_directory="chroma_db_llamaparse1",
+                collection_name="rag"
+            )
+            # Initialize ChatGroq model
+            chat_model = ChatGroq(
+                temperature=0,
+                model_name="mixtral-8x7b-32768",
+                api_key=groq_api_key
+            )
+            # Convert retrieved documents into QA format
+            custom_prompt_template = """
+            Use the following pieces of information to answer the user's question.
+            If you don't know the answer, just say that you don't know, don't try to make up an answer.
+            Context: {context}
+            Question: {question}
+            Only return the helpful answer below and nothing else.
+            Helpful answer:
+            """
+            prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])
+            # Initialize RetrievalQA
+            qa = RetrievalQA.from_chain_type(
+                llm=chat_model,
+                chain_type="stuff",
+                retriever=vs.as_retriever(search_kwargs={'k': 3}),
+                return_source_documents=True,
+                chain_type_kwargs={"prompt": prompt}
+            )
+            # Define function to interactively ask questions and retrieve answers
+            def ask_question(question):
+                response = qa.invoke({"query": question})
+                return response["result"]
+            # Example questions
+            example_questions = [
+                "What is the Balance of UBER TECHNOLOGIES, INC. as of December 31, 2021?",
+                "What is the Cash flows from operating activities associated with bad expense specified in the document?",
+                "What is Loss (income) from equity method investments, net?"
+            ]
+            # Ask questions and display answers
+            for idx, question in enumerate(example_questions, start=1):
+                st.subheader(f"Question {idx}: {question}")
+                answer = ask_question(question)
+                st.write(f"Answer: {answer}")
+    else:
+        st.write("No documents were parsed.")