Spaces:

anilkumar-kanasani
/

chat-with-your-pdf

Runtime error

App Files Files Community

chat-with-your-pdf / app.py

anilkumar-kanasani

Update app.py

dec2b7d about 1 year ago

raw

history blame contribute delete

No virus

5.55 kB

	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.vectorstores import FAISS
	from langchain.chains import LLMChain, ConversationalRetrievalChain
	from utils import (get_hf_embeddings,
	get_openAI_chat_model,
	get_hf_model,
	get_local_gpt4_model,
	set_LangChain_tracking,
	check_password)
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.memory import ConversationBufferMemory
	from langchain.docstore.document import Document

	embeddings = get_hf_embeddings()
	openai_chat_model = get_openAI_chat_model()
	#local_model = get_local_gpt4_model(model = "GPT4All-13B-snoozy.ggmlv3.q4_0.bin")
	hf_chat_model = get_hf_model(repo_id = "tiiuae/falcon-40b")

	## Preparing Prompt
	from langchain.prompts import PromptTemplate
	entity_extraction_template = """
	Extract all top 10 important entites from the following context \
	return as python list \
	{input_text} \
	List of entities:"""
	ENTITY_EXTRACTION_PROMPT = PromptTemplate.from_template(entity_extraction_template)

	def get_qa_prompt(List_of_entities):
	qa_template = """
	Use the following pieces of context to answer the question at the end. \
	Use the following list of entities as your working scope. \
	If the question is out of given list of entities, just say that your question \
	is out of scope and give them the list of entities as your working scope \
	If you dont know the answer, just say that you don't know and tell \
	the user to seach web for more information, don't try to make up \
	an answer. Use three sentences maximum and keep the answer as \
	concise as possible.\
	list of entities: \
	""" + str(List_of_entities) + """ \
	context: {context} \
	Question: {question} \
	Helpful Answer:"""
	print(qa_template)
	QA_CHAIN_PROMPT = PromptTemplate.from_template(qa_template)

	return QA_CHAIN_PROMPT

	if check_password():
	st.title("Chat with your PDF ")
	st.session_state.file_tracking = "new_run"
	with st.expander("Upload your PDF : ", expanded=True):
	st.session_state.lc_tracking = st.text_input("Please give a name to your session?")
	input_file = st.file_uploader(label = "Upload a file",
	accept_multiple_files=False,
	type=["pdf"],
	)
	if st.button("Process the file"):
	st.session_state.file_tracking = "req_to_process"
	try:
	set_LangChain_tracking(project=str(st.session_state.lc_tracking))
	except:
	set_LangChain_tracking(project="default")
	if st.session_state.file_tracking == "req_to_process" and input_file is not None:
	# Load Text Data
	input_text = ''
	bytes_data = PdfReader(input_file)
	for page in bytes_data.pages:
	input_text += page.extract_text()

	st.session_state.ner_chain = LLMChain(llm=hf_chat_model, prompt=ENTITY_EXTRACTION_PROMPT)
	st.session_state.ners = st.session_state.ner_chain.run(input_text=input_text, verbose=True)

	input_text = input_text.replace('\n', '')
	text_doc_chunks = [Document(page_content=x, metadata={}) for x in input_text.split('.')]

	# Embed and VectorStore
	vector_store = FAISS.from_documents(text_doc_chunks, embeddings)
	st.session_state.chat_history = []
	st.session_state.formatted_prompt = get_qa_prompt(st.session_state.ners)
	st.session_state.chat_chain = ConversationalRetrievalChain.from_llm(
	hf_chat_model,
	chain_type="stuff", # "stuff", "map_reduce", "refine", "map_rerank"
	verbose=True,
	retriever=vector_store.as_retriever(),
	# search_type="mmr"
	# search_kwargs={"k": 1}
	# search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5}
	combine_docs_chain_kwargs={"prompt": st.session_state.formatted_prompt},
	)
	if "chat_chain" in st.session_state:
	st.header("We are ready to start chat with your pdf")
	st.subheader("The scope of your PDF is: ")
	st.markdown(st.session_state.ners)
	else:
	st.header("Upload and Process your file first")


	if "chat_chain" in st.session_state and st.session_state.chat_history is not None:
	if question := st.chat_input("Please type some thing here?"):
	response = st.session_state.chat_chain({"question": question, "chat_history": st.session_state.chat_history})
	st.session_state.chat_history.append((question, response["answer"]))

	# Display chat messages from history on app rerun
	for message in st.session_state.chat_history:
	with st.chat_message("user"):
	st.markdown(message[0])
	with st.chat_message("assistant"):
	st.markdown(message[1])