# import required libraries | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.llms import HuggingFaceHub | |
from langchain.vectorstores import Chroma | |
from langchain_community.vectorstores import Chroma | |
from langchain.chains import ConversationalRetrievalChain | |
#from langchain.text_splitter import NLTKTextSplitter | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import streamlit as st | |
import sys,yaml,Utilities as ut | |
import os | |
print('HF_TOKEN',os.getenv('HF_TOKEN')) | |
def get_data(query): | |
chat_history = [] | |
initdict={} | |
initdict = ut.get_tokens() | |
hf_token = os.getenv('HF_TOKEN') | |
#hf_token = initdict["hf_token"] | |
embedding_model_id = initdict["embedding_model"] | |
chromadbpath = initdict["chatPDF_chroma_db"] | |
llm_repo_id = initdict["llm_repoid"] | |
# We will use HuggingFace embeddings | |
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_id) | |
#retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 1}) | |
# load from disk | |
db = Chroma(persist_directory=chromadbpath, embedding_function=embeddings) | |
retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 2}) | |
llm = HuggingFaceHub(huggingfacehub_api_token=hf_token, | |
repo_id=llm_repo_id, model_kwargs={"temperature":0.2, "max_new_tokens":50}) | |
#llm = HuggingFaceHub(repo_id=llm_repo_id, model_kwargs={"temperature":0.2, "max_new_tokens":50}) | |
# Create the Conversational Retrieval Chain | |
qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever,return_source_documents=True) | |
result = qa_chain({'question': query, 'chat_history': chat_history}) | |
chat_history.append(result) | |
print('Answer: ' + result['answer'] + '\n') | |
print (result) | |
return result['answer'] | |
st.title("PatentGuru Document Reader") | |
# Main chat form | |
with st.form("chat_form"): | |
query = st.text_input("Chat with PDF: ") | |
clear_history = st.checkbox('Clear Chat History') | |
submit_button = st.form_submit_button("Send") | |
if submit_button: | |
if clear_history: | |
st.write("Cleared previous chat history") | |
response = get_data(query) | |
if len(response)>0: | |
response = str(response.partition("Answer: ")[-1]) | |
else: response = "No results" | |
# write results | |
st.write (response) | |