from data import create_retriever from model import initialize_llmchain import streamlit as st import os from langchain.chains import RetrievalQA from streamlit_chat import message import sys __import__('pysqlite3') sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') st.set_page_config(page_title="🤗Chat💬") embed_model_dict = { "MiniLM-L6": "nreimers/MiniLM-L6-H384-uncased", "Mpnet-Base": "sentence-transformers/all-mpnet-base-v2", } llm_model_dict = { "Llama-2 7B (Free)" : "daryl149/llama-2-7b-chat-hf", "Gemma 7B": "google/gemma-7b", "Gemma 2B": "google/gemma-2b", "Gemma 7B-it": "google/gemma-7b-it", "Gemma 2B-it": "google/gemma-2b-it", "Llama-2 7B Chat HF": "meta-llama/Llama-2-7b-chat-hf", "Llama-2 70B Chat HF": "meta-llama/Llama-2-70b-chat-hf", "Llama-2 13B Chat HF": "meta-llama/Llama-2-13b-chat-hf", "Llama-2 70B": "meta-llama/Llama-2-70b", "Llama-2 13B": "meta-llama/Llama-2-13b", "Llama-2 7B": "meta-llama/Llama-2-7b", } def save_uploadedfile(uploadedfile): if not os.path.exists("./tempfolder"): os.makedirs("./tempfolder") full_path = os.path.join("tempfolder", uploadedfile.name) with open(full_path, "wb") as f: f.write(uploadedfile.getbuffer()) return st.success("Saved File") retriever = None llm = None with st.sidebar: st.markdown( f""" """, unsafe_allow_html=True, ) st.header("Choose and Configure your Embedding Model", divider="rainbow") uploaded_files = st.file_uploader( "Choose a file", type=["pdf"], accept_multiple_files=True ) embed_model = embed_model_dict[ st.selectbox("Select Embedding Model", ("MiniLM-L6", "Mpnet-Base")) ] for file in uploaded_files: save_uploadedfile(file) chunksize = st.slider("Chunk Size", 256, 1024, 400, 10) chunkoverlap = st.slider("Chunk Overlap", 100, 500, 300, 10) st.header("Choose and Configure your LLM Model", divider="rainbow") llm_model = llm_model_dict[ st.selectbox("Select LLM Model", (llm_model_dict.keys())) ] access_token = st.text_input("Enter HuggingFace Access Token") temperature = st.slider("Temperature", 256, 1024, 400, 10) max_tokens = st.slider("Max Tokens", 256, 1024, 400, 10) top_k = st.slider("top_k", 256, 1024, 400, 10) if st.button("Submit"): with st.spinner("Loading.... Processing PDFs..."): retriever = create_retriever( pdf_directory="./tempfolder", chunk_size=chunksize, chunk_overlap=chunkoverlap, embedding_model_name=embed_model, ) with st.spinner("Loading LLM Model...."): llm = initialize_llmchain( llm_model=llm_model, temperature=temperature, max_tokens=max_tokens, top_k=top_k, access_token=access_token, ) st.title("💬 Chat With PDFs") st.markdown("- Choose 🚀 and Configure your Embedding Model") st.markdown("- Choose 🚀 and COnfigure your LLM Model.") st.markdown("- Enter your HuggingFace Token ❗️(Only Llama-2 7B (Free) will work without HF Token)") st.markdown( """

It will take some time ⏳ to download and load the models.

Once download is complete you can start Chatting!.

""", unsafe_allow_html=True, ) st.markdown(''' ''', unsafe_allow_html=True) if llm != None and retriever!=None: qa_pdf = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever.as_retriever(), return_source_documents=True ) if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if prompt := st.chat_input("What is up?", key="user_input"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): response = qa_pdf({'query':message})['result'] st.session_state.messages.append({"role": "assistant", "content": response}) st.markdown(response)