Spaces:

anilkumar-kanasani
/

chat-with-your-pdf

Runtime error

File size: 5,552 Bytes

import streamlit as st
from PyPDF2 import PdfReader
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain, ConversationalRetrievalChain
from utils import (get_hf_embeddings,
                  get_openAI_chat_model,
                  get_hf_model,
                  get_local_gpt4_model,
                  set_LangChain_tracking,
                  check_password)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.docstore.document import Document

embeddings = get_hf_embeddings()
openai_chat_model = get_openAI_chat_model()
#local_model = get_local_gpt4_model(model = "GPT4All-13B-snoozy.ggmlv3.q4_0.bin")
hf_chat_model = get_hf_model(repo_id = "tiiuae/falcon-40b")

## Preparing Prompt
from langchain.prompts import PromptTemplate
entity_extraction_template = """
Extract all top 10 important entites from the following context \
return as python list \
{input_text} \
List of entities:"""
ENTITY_EXTRACTION_PROMPT = PromptTemplate.from_template(entity_extraction_template)

def get_qa_prompt(List_of_entities):
    qa_template = """
    Use the following pieces of context to answer the question at the end. \
    Use the following list of entities as your working scope. \
    If the question is out of given list of entities, just say that your question \
    is out of scope and give them the list of entities as your working scope \
    If you dont know the answer, just say that you don't know and tell \
    the user to seach web for more information, don't try to make up \
    an answer. Use three sentences maximum and keep the answer as \
    concise as possible.\
    list of entities: \
    """ + str(List_of_entities) + """ \
    context: {context} \
    Question: {question} \
    Helpful Answer:"""
    print(qa_template)
    QA_CHAIN_PROMPT = PromptTemplate.from_template(qa_template)
    
    return QA_CHAIN_PROMPT

if check_password():
    st.title("Chat with your PDF ")
    st.session_state.file_tracking = "new_run"
    with st.expander("Upload your PDF : ", expanded=True):
        st.session_state.lc_tracking = st.text_input("Please give a name to your session?")
        input_file = st.file_uploader(label = "Upload a file",
                            accept_multiple_files=False, 
                            type=["pdf"],
                            )
        if st.button("Process the file"):
            st.session_state.file_tracking = "req_to_process"
            try:
                set_LangChain_tracking(project=str(st.session_state.lc_tracking))
            except:
                set_LangChain_tracking(project="default")
        if st.session_state.file_tracking == "req_to_process" and input_file is not None: 
            # Load Text Data
            input_text = ''
            bytes_data = PdfReader(input_file)
            for page in bytes_data.pages:
                input_text += page.extract_text()

            st.session_state.ner_chain = LLMChain(llm=hf_chat_model, prompt=ENTITY_EXTRACTION_PROMPT)  
            st.session_state.ners = st.session_state.ner_chain.run(input_text=input_text, verbose=True)

            input_text = input_text.replace('\n', '')
            text_doc_chunks = [Document(page_content=x, metadata={}) for x  in input_text.split('.')]

            # Embed and VectorStore
            vector_store = FAISS.from_documents(text_doc_chunks, embeddings)
            st.session_state.chat_history = []
            st.session_state.formatted_prompt = get_qa_prompt(st.session_state.ners)
            st.session_state.chat_chain = ConversationalRetrievalChain.from_llm(
                                                                    hf_chat_model,
                                                                    chain_type="stuff", # "stuff", "map_reduce", "refine", "map_rerank"
                                                                    verbose=True,
                                                                    retriever=vector_store.as_retriever(), 
                                                                    # search_type="mmr"
                                                                    # search_kwargs={"k": 1}
                                                                    # search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5}
                                                                    combine_docs_chain_kwargs={"prompt": st.session_state.formatted_prompt},
                                                                    )
        if "chat_chain" in st.session_state:
            st.header("We are ready to start chat with your pdf")
            st.subheader("The scope of your PDF is: ")
            st.markdown(st.session_state.ners)
        else:
            st.header("Upload and Process your file first")

    
    if "chat_chain" in st.session_state and st.session_state.chat_history is not None:
        if question := st.chat_input("Please type some thing here?"):
            response = st.session_state.chat_chain({"question": question, "chat_history": st.session_state.chat_history})
            st.session_state.chat_history.append((question, response["answer"]))
        
        # Display chat messages from history on app rerun
        for message in st.session_state.chat_history:
            with st.chat_message("user"):
                st.markdown(message[0]) 
            with st.chat_message("assistant"):
                st.markdown(message[1])