OpenRAG128 commited on
Commit
3abdd86
β€’
1 Parent(s): f0d87c1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ from langchain_community.embeddings import OllamaEmbeddings
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores.chroma import Chroma
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
8
+ from langchain_groq import ChatGroq
9
+
10
+
11
+
12
+ # Function to initialize conversation chain with GROQ language model
13
+ groq_api_key = "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"
14
+
15
+ # Initializing GROQ chat with provided API key, model name, and settings
16
+ llm_groq = ChatGroq(
17
+ groq_api_key=groq_api_key, model_name="mixtral-8x7b-32768", temperature=0.2
18
+ )
19
+
20
+ # Streamlit app
21
+ st.set_page_config(page_title="DocDynamo", layout="wide")
22
+
23
+ st.title("DocDynamoπŸš€")
24
+ uploaded_file = st.file_uploader("Please upload a PDF file to begin!", type="pdf")
25
+
26
+ st.sidebar.title("DocDynamo By OpenRAG")
27
+ st.sidebar.markdown(
28
+ """
29
+ 🌟 **Introducing DocDynamo by OpenRAG: Your PDF Companion!** πŸ“š
30
+
31
+ Welcome, esteemed users, to the groundbreaking release of DocDynamo on May 21, 2024. At OpenRAG, we are committed to pioneering solutions for modern challenges, and DocDynamo is our latest triumph.
32
+
33
+ """
34
+
35
+ )
36
+
37
+ st.sidebar.markdown(
38
+ """
39
+ πŸ’‘ **How DocDynamo Works**
40
+
41
+ Simply upload your PDF, and let DocDynamo work its magic. Once processed, you can ask DocDynamo any question pertaining to the content of your PDF. It's like having a personal assistant at your fingertips, ready to provide instant answers.
42
+ """
43
+
44
+ )
45
+
46
+ st.sidebar.markdown(
47
+ """
48
+ πŸ“§ **Get in Touch**
49
+
50
+ For inquiries or collaboration proposals, please don't hesitate to reach out to us:
51
+ πŸ“© Email: openrag189@gmail.com
52
+ πŸ”— LinkedIn: [OpenRAG](https://www.linkedin.com/company/102036854/admin/dashboard/)
53
+ πŸ“Έ Instagram: [OpenRAG](https://www.instagram.com/open.rag?igsh=MnFwMHd5cjU1OGFj)
54
+
55
+ Experience the future of PDF interaction with DocDynamo. Welcome to a new era of efficiency and productivity. OpenRAG: Empowering You Through Innovation. πŸš€
56
+ """
57
+
58
+ )
59
+ if uploaded_file:
60
+ # Inform the user that processing has started
61
+ with st.spinner(f"Processing `{uploaded_file.name}`..."):
62
+ # Read the PDF file
63
+ pdf = PyPDF2.PdfReader(uploaded_file)
64
+ pdf_text = ""
65
+ for page in pdf.pages:
66
+ pdf_text += page.extract_text()
67
+
68
+ # Split the text into chunks
69
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=50)
70
+ texts = text_splitter.split_text(pdf_text)
71
+
72
+ # Create metadata for each chunk
73
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
74
+
75
+ # Create a Chroma vector store
76
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
77
+ docsearch = Chroma.from_texts(texts, embeddings, metadatas=metadatas)
78
+
79
+ # Initialize message history for conversation
80
+ message_history = ChatMessageHistory()
81
+
82
+ # Memory for conversational context
83
+ memory = ConversationBufferMemory(
84
+ memory_key="chat_history",
85
+ output_key="answer",
86
+ chat_memory=message_history,
87
+ return_messages=True,
88
+ )
89
+
90
+ # Create a chain that uses the Chroma vector store
91
+ chain = ConversationalRetrievalChain.from_llm(
92
+ llm=llm_groq,
93
+ chain_type="stuff",
94
+ retriever=docsearch.as_retriever(),
95
+ memory=memory,
96
+ return_source_documents=True,
97
+ )
98
+
99
+ st.success(f"Processing `{uploaded_file.name}` done. You can now ask questions!")
100
+
101
+ user_input = st.text_input("Ask a question about the PDF:")
102
+
103
+ if user_input:
104
+ # Call the chain with user's message content
105
+ res = chain.invoke(user_input)
106
+ answer = res["answer"]
107
+ source_documents = res["source_documents"]
108
+
109
+ text_elements = [] # Initialize list to store text elements
110
+
111
+ # Process source documents if available
112
+ if source_documents:
113
+ for source_idx, source_doc in enumerate(source_documents):
114
+ source_name = f"source_{source_idx}"
115
+ # Create the text element referenced in the message
116
+ text_elements.append(source_doc.page_content)
117
+ source_names = [f"source_{idx}" for idx in range(len(source_documents))]
118
+
119
+ # Add source references to the answer
120
+ if source_names:
121
+ answer += f"\nSources: {', '.join(source_names)}"
122
+ else:
123
+ answer += "\nNo sources found"
124
+
125
+ # Display the results
126
+ st.markdown(f"**Answer:** {answer}")
127
+
128
+ for idx, element in enumerate(text_elements):
129
+ with st.expander(f"Source {idx}"):
130
+ st.write(element)