Spaces:
Runtime error
Runtime error
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.vectorstores import FAISS | |
from langchain.chains import LLMChain, ConversationalRetrievalChain | |
from utils import (get_hf_embeddings, | |
get_openAI_chat_model, | |
get_hf_model, | |
get_local_gpt4_model, | |
set_LangChain_tracking, | |
check_password) | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.memory import ConversationBufferMemory | |
from langchain.docstore.document import Document | |
embeddings = get_hf_embeddings() | |
openai_chat_model = get_openAI_chat_model() | |
#local_model = get_local_gpt4_model(model = "GPT4All-13B-snoozy.ggmlv3.q4_0.bin") | |
hf_chat_model = get_hf_model(repo_id = "tiiuae/falcon-40b") | |
## Preparing Prompt | |
from langchain.prompts import PromptTemplate | |
entity_extraction_template = """ | |
Extract all top 10 important entites from the following context \ | |
return as python list \ | |
{input_text} \ | |
List of entities:""" | |
ENTITY_EXTRACTION_PROMPT = PromptTemplate.from_template(entity_extraction_template) | |
def get_qa_prompt(List_of_entities): | |
qa_template = """ | |
Use the following pieces of context to answer the question at the end. \ | |
Use the following list of entities as your working scope. \ | |
If the question is out of given list of entities, just say that your question \ | |
is out of scope and give them the list of entities as your working scope \ | |
If you dont know the answer, just say that you don't know and tell \ | |
the user to seach web for more information, don't try to make up \ | |
an answer. Use three sentences maximum and keep the answer as \ | |
concise as possible.\ | |
list of entities: \ | |
""" + str(List_of_entities) + """ \ | |
context: {context} \ | |
Question: {question} \ | |
Helpful Answer:""" | |
print(qa_template) | |
QA_CHAIN_PROMPT = PromptTemplate.from_template(qa_template) | |
return QA_CHAIN_PROMPT | |
if check_password(): | |
st.title("Chat with your PDF ") | |
st.session_state.file_tracking = "new_run" | |
with st.expander("Upload your PDF : ", expanded=True): | |
st.session_state.lc_tracking = st.text_input("Please give a name to your session?") | |
input_file = st.file_uploader(label = "Upload a file", | |
accept_multiple_files=False, | |
type=["pdf"], | |
) | |
if st.button("Process the file"): | |
st.session_state.file_tracking = "req_to_process" | |
try: | |
set_LangChain_tracking(project=str(st.session_state.lc_tracking)) | |
except: | |
set_LangChain_tracking(project="default") | |
if st.session_state.file_tracking == "req_to_process" and input_file is not None: | |
# Load Text Data | |
input_text = '' | |
bytes_data = PdfReader(input_file) | |
for page in bytes_data.pages: | |
input_text += page.extract_text() | |
st.session_state.ner_chain = LLMChain(llm=hf_chat_model, prompt=ENTITY_EXTRACTION_PROMPT) | |
st.session_state.ners = st.session_state.ner_chain.run(input_text=input_text, verbose=True) | |
input_text = input_text.replace('\n', '') | |
text_doc_chunks = [Document(page_content=x, metadata={}) for x in input_text.split('.')] | |
# Embed and VectorStore | |
vector_store = FAISS.from_documents(text_doc_chunks, embeddings) | |
st.session_state.chat_history = [] | |
st.session_state.formatted_prompt = get_qa_prompt(st.session_state.ners) | |
st.session_state.chat_chain = ConversationalRetrievalChain.from_llm( | |
hf_chat_model, | |
chain_type="stuff", # "stuff", "map_reduce", "refine", "map_rerank" | |
verbose=True, | |
retriever=vector_store.as_retriever(), | |
# search_type="mmr" | |
# search_kwargs={"k": 1} | |
# search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5} | |
combine_docs_chain_kwargs={"prompt": st.session_state.formatted_prompt}, | |
) | |
if "chat_chain" in st.session_state: | |
st.header("We are ready to start chat with your pdf") | |
st.subheader("The scope of your PDF is: ") | |
st.markdown(st.session_state.ners) | |
else: | |
st.header("Upload and Process your file first") | |
if "chat_chain" in st.session_state and st.session_state.chat_history is not None: | |
if question := st.chat_input("Please type some thing here?"): | |
response = st.session_state.chat_chain({"question": question, "chat_history": st.session_state.chat_history}) | |
st.session_state.chat_history.append((question, response["answer"])) | |
# Display chat messages from history on app rerun | |
for message in st.session_state.chat_history: | |
with st.chat_message("user"): | |
st.markdown(message[0]) | |
with st.chat_message("assistant"): | |
st.markdown(message[1]) |