Spaces:
Runtime error
Runtime error
Updated code
Browse filesUpdated Chunk split size, Updated the logic to load and split the data, Added MultiQueryRetriever, Added thumbs up and thumbs down emoji in UI.
- Database/PDF_HTML_CHROMA_DB/chroma.sqlite3 +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/data_level0.bin +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/header.bin +1 -1
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/index_metadata.pickle +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/length.bin +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/link_lists.bin +2 -2
- Database/text_chunks_html_pdf.pkl +2 -2
- OPM_Files/OPM_Retirement_backup-20230902T130906Z-001.zip +0 -3
- app.py +1 -0
- utils.py +11 -6
Database/PDF_HTML_CHROMA_DB/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e31d552a7a4981d60910ac3e293b5d53d0ba9503a95933ca21ab3a20b64ebc8
|
3 |
+
size 330657792
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/data_level0.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dbd22f72c4c63507f549d3fe1d8350c50ba0bc9d64cc20f1d136119fb9a892e
|
3 |
+
size 85476000
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/header.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f2080adbc1c9cd3e086e238928ed1f139b21a0ebad87348b410770e6a45b37e
|
3 |
size 100
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/index_metadata.pickle
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a37d72452ff59cb80ed779d0ff9ed91f9d6fe7c12adf909845168311e578c06b
|
3 |
+
size 2956679
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/length.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a50b6a6ebd0528902d0cbaa4d5d1c60af3a3fdc95a0738162eef134668c4d735
|
3 |
+
size 204000
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/link_lists.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b00b245cd31e8691cd94191f3afefc59417c252bdabc5ec443aa58cf84328d4
|
3 |
+
size 426496
|
Database/text_chunks_html_pdf.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8248c4c27db5e0950be6c4bf560164990dbd348cddb2b419d6f6764011a5a605
|
3 |
+
size 22550517
|
OPM_Files/OPM_Retirement_backup-20230902T130906Z-001.zip
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:51bf0b1f9298ed989624d7f19d7f59e12fcb89e2ba087a2a0ae91204728523b4
|
3 |
-
size 168746379
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -238,6 +238,7 @@ if st.session_state["vector_db"] and st.session_state["llm"]:
|
|
238 |
st.write("---") # Add a separator between entries
|
239 |
message = {"role": "assistant", "content": full_response, "Source":merged_source_doc}
|
240 |
st.session_state.messages.append(message)
|
|
|
241 |
# else:
|
242 |
# with st.expander("source"):
|
243 |
# message = {"role": "assistant", "content": full_response, "Source":""}
|
|
|
238 |
st.write("---") # Add a separator between entries
|
239 |
message = {"role": "assistant", "content": full_response, "Source":merged_source_doc}
|
240 |
st.session_state.messages.append(message)
|
241 |
+
st.markdown("👍 👎 Create Ticket")
|
242 |
# else:
|
243 |
# with st.expander("source"):
|
244 |
# message = {"role": "assistant", "content": full_response, "Source":""}
|
utils.py
CHANGED
@@ -16,7 +16,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
|
|
16 |
from langchain.document_loaders import PyPDFLoader
|
17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
19 |
-
from langchain.memory import ConversationBufferMemory
|
20 |
from langchain.chains import ConversationalRetrievalChain
|
21 |
from langchain.prompts.prompt import PromptTemplate
|
22 |
from langchain.vectorstores import Chroma
|
@@ -33,6 +33,8 @@ from langchain.agents import load_tools
|
|
33 |
from langchain.chat_models import ChatOpenAI
|
34 |
from langchain.retrievers.multi_query import MultiQueryRetriever
|
35 |
from langchain.chains import RetrievalQA
|
|
|
|
|
36 |
|
37 |
load_dotenv()
|
38 |
|
@@ -254,11 +256,14 @@ def load_text_chunks(text_chunks_pkl_dir):
|
|
254 |
def load_ensemble_retriver(text_chunks, embeddings, chroma_vectorstore):
|
255 |
"""Load ensemble retiriever with BM25 and Chroma as individual retrievers"""
|
256 |
bm25_retriever = BM25Retriever.from_documents(text_chunks)
|
257 |
-
bm25_retriever.k =
|
258 |
-
chroma_retriever = chroma_vectorstore.as_retriever(search_kwargs={"k":
|
259 |
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.3, 0.7])
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
262 |
|
263 |
|
264 |
def load_conversational_retrievel_chain(retriever, llm):
|
@@ -310,7 +315,7 @@ def load_conversational_retrievel_chain(retriever, llm):
|
|
310 |
Helpful Answer:"""
|
311 |
|
312 |
prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template)
|
313 |
-
memory =
|
314 |
|
315 |
qa = RetrievalQA.from_chain_type(
|
316 |
llm=llm,
|
|
|
16 |
from langchain.document_loaders import PyPDFLoader
|
17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
19 |
+
from langchain.memory import ConversationBufferMemory
|
20 |
from langchain.chains import ConversationalRetrievalChain
|
21 |
from langchain.prompts.prompt import PromptTemplate
|
22 |
from langchain.vectorstores import Chroma
|
|
|
33 |
from langchain.chat_models import ChatOpenAI
|
34 |
from langchain.retrievers.multi_query import MultiQueryRetriever
|
35 |
from langchain.chains import RetrievalQA
|
36 |
+
import logging
|
37 |
+
|
38 |
|
39 |
load_dotenv()
|
40 |
|
|
|
256 |
def load_ensemble_retriver(text_chunks, embeddings, chroma_vectorstore):
|
257 |
"""Load ensemble retiriever with BM25 and Chroma as individual retrievers"""
|
258 |
bm25_retriever = BM25Retriever.from_documents(text_chunks)
|
259 |
+
bm25_retriever.k = 2
|
260 |
+
chroma_retriever = chroma_vectorstore.as_retriever(search_kwargs={"k": 3})
|
261 |
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.3, 0.7])
|
262 |
+
logging.basicConfig()
|
263 |
+
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
|
264 |
+
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=ensemble_retriever,
|
265 |
+
llm=ChatOpenAI(temperature=0))
|
266 |
+
return retriever_from_llm
|
267 |
|
268 |
|
269 |
def load_conversational_retrievel_chain(retriever, llm):
|
|
|
315 |
Helpful Answer:"""
|
316 |
|
317 |
prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template)
|
318 |
+
memory = ConversationBufferMemory(input_key="question", memory_key="history")
|
319 |
|
320 |
qa = RetrievalQA.from_chain_type(
|
321 |
llm=llm,
|