Akshayram1 commited on
Commit
9806805
1 Parent(s): 0b3bd96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -74
app.py CHANGED
@@ -34,77 +34,81 @@ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
34
  if uploaded_file is not None:
35
  llama_parse_documents = load_or_parse_data(uploaded_file)
36
 
37
- # Create data directory if it doesn't exist
38
- os.makedirs("data", exist_ok=True)
39
-
40
- # Further processing of the parsed data...
41
- # Further processing of the parsed data
42
- with open('data/output.md', 'a') as f:
43
- for doc in llama_parse_documents:
44
- f.write(doc.text + '\n')
45
-
46
- markdown_path = "data/output.md"
47
- loader = UnstructuredMarkdownLoader(markdown_path)
48
- documents = loader.load()
49
-
50
- # Split loaded documents into chunks
51
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
52
- docs = text_splitter.split_documents(documents)
53
-
54
- # Initialize Embeddings
55
- embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
56
-
57
- # Create and persist a Chroma vector database from the chunked documents
58
- vs = Chroma.from_documents(
59
- documents=docs,
60
- embedding=embed_model,
61
- persist_directory="chroma_db_llamaparse1",
62
- collection_name="rag"
63
- )
64
-
65
- # Initialize ChatGroq model
66
- chat_model = ChatGroq(
67
- temperature=0,
68
- model_name="mixtral-8x7b-32768",
69
- api_key=groq_api_key
70
- )
71
-
72
- # Convert retrieved documents into QA format
73
- custom_prompt_template = """
74
- Use the following pieces of information to answer the user's question.
75
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
76
-
77
- Context: {context}
78
- Question: {question}
79
-
80
- Only return the helpful answer below and nothing else.
81
- Helpful answer:
82
- """
83
- prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])
84
-
85
- # Initialize RetrievalQA
86
- qa = RetrievalQA.from_chain_type(
87
- llm=chat_model,
88
- chain_type="stuff",
89
- retriever=vs.as_retriever(search_kwargs={'k': 3}),
90
- return_source_documents=True,
91
- chain_type_kwargs={"prompt": prompt}
92
- )
93
-
94
- # Define function to interactively ask questions and retrieve answers
95
- def ask_question(question):
96
- response = qa.invoke({"query": question})
97
- return response["result"]
98
-
99
- # Example questions
100
- example_questions = [
101
- "What is the Balance of UBER TECHNOLOGIES, INC. as of December 31, 2021?",
102
- "What is the Cash flows from operating activities associated with bad expense specified in the document?",
103
- "What is Loss (income) from equity method investments, net?"
104
- ]
105
-
106
- # Ask questions and display answers
107
- for idx, question in enumerate(example_questions, start=1):
108
- st.subheader(f"Question {idx}: {question}")
109
- answer = ask_question(question)
110
- st.write(f"Answer: {answer}")
 
 
 
 
 
34
  if uploaded_file is not None:
35
  llama_parse_documents = load_or_parse_data(uploaded_file)
36
 
37
+ if llama_parse_documents:
38
+ # Create data directory if it doesn't exist
39
+ os.makedirs("data", exist_ok=True)
40
+
41
+ # Further processing of the parsed data...
42
+ # Further processing of the parsed data
43
+ with open('data/output.md', 'a') as f:
44
+ for doc in llama_parse_documents:
45
+ f.write(doc.text + '\n')
46
+
47
+ markdown_path = "data/output.md"
48
+ loader = UnstructuredMarkdownLoader(markdown_path)
49
+ documents = loader.load()
50
+
51
+ # Split loaded documents into chunks
52
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
53
+ docs = text_splitter.split_documents(documents)
54
+
55
+ # Initialize Embeddings
56
+ embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
57
+
58
+ if docs:
59
+ # Create and persist a Chroma vector database from the chunked documents
60
+ vs = Chroma.from_documents(
61
+ documents=docs,
62
+ embedding=embed_model,
63
+ persist_directory="chroma_db_llamaparse1",
64
+ collection_name="rag"
65
+ )
66
+
67
+ # Initialize ChatGroq model
68
+ chat_model = ChatGroq(
69
+ temperature=0,
70
+ model_name="mixtral-8x7b-32768",
71
+ api_key=groq_api_key
72
+ )
73
+
74
+ # Convert retrieved documents into QA format
75
+ custom_prompt_template = """
76
+ Use the following pieces of information to answer the user's question.
77
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
78
+
79
+ Context: {context}
80
+ Question: {question}
81
+
82
+ Only return the helpful answer below and nothing else.
83
+ Helpful answer:
84
+ """
85
+ prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])
86
+
87
+ # Initialize RetrievalQA
88
+ qa = RetrievalQA.from_chain_type(
89
+ llm=chat_model,
90
+ chain_type="stuff",
91
+ retriever=vs.as_retriever(search_kwargs={'k': 3}),
92
+ return_source_documents=True,
93
+ chain_type_kwargs={"prompt": prompt}
94
+ )
95
+
96
+ # Define function to interactively ask questions and retrieve answers
97
+ def ask_question(question):
98
+ response = qa.invoke({"query": question})
99
+ return response["result"]
100
+
101
+ # Example questions
102
+ example_questions = [
103
+ "What is the Balance of UBER TECHNOLOGIES, INC. as of December 31, 2021?",
104
+ "What is the Cash flows from operating activities associated with bad expense specified in the document?",
105
+ "What is Loss (income) from equity method investments, net?"
106
+ ]
107
+
108
+ # Ask questions and display answers
109
+ for idx, question in enumerate(example_questions, start=1):
110
+ st.subheader(f"Question {idx}: {question}")
111
+ answer = ask_question(question)
112
+ st.write(f"Answer: {answer}")
113
+ else:
114
+ st.write("No documents were parsed.")