import streamlit as st from run_llama import load_models import variables as vr from load_documents import load_documents_fn import torch from langchain.vectorstores import Chroma from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from variables import EMBEDDING_MODEL_NAME,MODEL_ID,MODEL_BASENAME from chromadb.utils import embedding_functions # print(f"Is CUDA available: {torch.cuda.is_available()}") # # True # print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") # Tesla T4 def model_memory(): # Adding history to the model. template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\ just say that you don't know, don't try to make up an answer. {context} Question: {question} Helpful Answer:""" # template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\ # try to make up an short and sweet answer. # if the context does not exists or has no relevancy then try answer it on your previous knowledge base.But keep the answer short and precise. # {context} # # # Question: {question} # Helpful Answer:""" prompt = PromptTemplate(input_variables=["context", "question"], template=template) # memory = ConversationBufferMemory(input_key="question") return prompt with st.sidebar: st.subheader("Your documents") global docs docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True,type=["pdf","docx","csv","xlsx","html"]) if st.button("Process"): with st.spinner("Processing"): # raw_text = extract_text_from_pdfs(docs) # all_loaders =classify_and_load_files_into_respective_loaders(docs) # chroma_vectorstore = index_initializing_upserting_chroma_db(all_loaders) if docs: loaded_documents = load_documents_fn(docs) else: st.error("Error While loading the documents!!!Try Again!!!") if "EMBEDDINGS" not in st.session_state: EMBEDDINGS = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL_NAME) # EMBEDDINGS = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2") st.session_state.EMBEDDINGS = EMBEDDINGS # if "DB" not in st.session_state: # DB = Chroma( # persist_directory=loaded_documents, # embedding_function=st.session_state.EMBEDDINGS, # client_settings=CHROMA_SETTINGS, # ) DB = Chroma.from_documents(loaded_documents, st.session_state.EMBEDDINGS,persist_directory="db") st.session_state.DB = DB if "RETRIEVER" not in st.session_state: RETRIEVER = DB.as_retriever() st.session_state.RETRIEVER = RETRIEVER if "LLM" not in st.session_state: LLM = load_models(model_id=MODEL_ID, model_basename=MODEL_BASENAME) # st.session_state["LLM"] = LLM if "QA" not in st.session_state: prompt = model_memory() QA = RetrievalQA.from_chain_type( llm=LLM, chain_type="stuff", retriever=RETRIEVER, return_source_documents=True, chain_type_kwargs={"prompt": prompt}, ) st.session_state["QA"] = QA st.success("LLM Initialized !!! You Chat with your documents!!") st.title('Chat With Your Documents') prompt = st.text_input('Input your prompt here') # while True: if docs is None: prompt = "" # If the user hits enter if prompt: # Then pass the prompt to the LLM response = st.session_state["QA"](prompt) answer, docs = response["result"], response["source_documents"] # ...and write it out to the screen st.write(answer)