Spaces:
Running
Running
Akshayram1
commited on
Commit
•
9806805
1
Parent(s):
0b3bd96
Update app.py
Browse files
app.py
CHANGED
@@ -34,77 +34,81 @@ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
|
|
34 |
if uploaded_file is not None:
|
35 |
llama_parse_documents = load_or_parse_data(uploaded_file)
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
34 |
if uploaded_file is not None:
|
35 |
llama_parse_documents = load_or_parse_data(uploaded_file)
|
36 |
|
37 |
+
if llama_parse_documents:
|
38 |
+
# Create data directory if it doesn't exist
|
39 |
+
os.makedirs("data", exist_ok=True)
|
40 |
+
|
41 |
+
# Further processing of the parsed data...
|
42 |
+
# Further processing of the parsed data
|
43 |
+
with open('data/output.md', 'a') as f:
|
44 |
+
for doc in llama_parse_documents:
|
45 |
+
f.write(doc.text + '\n')
|
46 |
+
|
47 |
+
markdown_path = "data/output.md"
|
48 |
+
loader = UnstructuredMarkdownLoader(markdown_path)
|
49 |
+
documents = loader.load()
|
50 |
+
|
51 |
+
# Split loaded documents into chunks
|
52 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
|
53 |
+
docs = text_splitter.split_documents(documents)
|
54 |
+
|
55 |
+
# Initialize Embeddings
|
56 |
+
embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
|
57 |
+
|
58 |
+
if docs:
|
59 |
+
# Create and persist a Chroma vector database from the chunked documents
|
60 |
+
vs = Chroma.from_documents(
|
61 |
+
documents=docs,
|
62 |
+
embedding=embed_model,
|
63 |
+
persist_directory="chroma_db_llamaparse1",
|
64 |
+
collection_name="rag"
|
65 |
+
)
|
66 |
+
|
67 |
+
# Initialize ChatGroq model
|
68 |
+
chat_model = ChatGroq(
|
69 |
+
temperature=0,
|
70 |
+
model_name="mixtral-8x7b-32768",
|
71 |
+
api_key=groq_api_key
|
72 |
+
)
|
73 |
+
|
74 |
+
# Convert retrieved documents into QA format
|
75 |
+
custom_prompt_template = """
|
76 |
+
Use the following pieces of information to answer the user's question.
|
77 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
78 |
+
|
79 |
+
Context: {context}
|
80 |
+
Question: {question}
|
81 |
+
|
82 |
+
Only return the helpful answer below and nothing else.
|
83 |
+
Helpful answer:
|
84 |
+
"""
|
85 |
+
prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])
|
86 |
+
|
87 |
+
# Initialize RetrievalQA
|
88 |
+
qa = RetrievalQA.from_chain_type(
|
89 |
+
llm=chat_model,
|
90 |
+
chain_type="stuff",
|
91 |
+
retriever=vs.as_retriever(search_kwargs={'k': 3}),
|
92 |
+
return_source_documents=True,
|
93 |
+
chain_type_kwargs={"prompt": prompt}
|
94 |
+
)
|
95 |
+
|
96 |
+
# Define function to interactively ask questions and retrieve answers
|
97 |
+
def ask_question(question):
|
98 |
+
response = qa.invoke({"query": question})
|
99 |
+
return response["result"]
|
100 |
+
|
101 |
+
# Example questions
|
102 |
+
example_questions = [
|
103 |
+
"What is the Balance of UBER TECHNOLOGIES, INC. as of December 31, 2021?",
|
104 |
+
"What is the Cash flows from operating activities associated with bad expense specified in the document?",
|
105 |
+
"What is Loss (income) from equity method investments, net?"
|
106 |
+
]
|
107 |
+
|
108 |
+
# Ask questions and display answers
|
109 |
+
for idx, question in enumerate(example_questions, start=1):
|
110 |
+
st.subheader(f"Question {idx}: {question}")
|
111 |
+
answer = ask_question(question)
|
112 |
+
st.write(f"Answer: {answer}")
|
113 |
+
else:
|
114 |
+
st.write("No documents were parsed.")
|