Spaces:

dineshabeysinghe
/

Chat_with_document

Running

App Files Files Community

Chat_with_document / app,py

dineshabeysinghe

Create app.py

2057158 verified 3 months ago

raw

history blame

No virus

5.14 kB

	import streamlit as st
	import os
	from PyPDF2 import PdfReader
	import openpyxl
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import GooglePalmEmbeddings
	from langchain.llms import GooglePalm
	from langchain.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	os.environ['GOOGLE_API_KEY'] = 'AIzaSyD8uzXToT4I2ABs7qo_XiuKh8-L2nuWCEM'

	def get_pdf_text(pdf_docs):
	text = ""
	for pdf in pdf_docs:
	pdf_reader = PdfReader(pdf)
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def get_excel_text(excel_docs):
	text = ""
	for excel_doc in excel_docs:
	workbook = openpyxl.load_workbook(filename=excel_doc)
	for sheet in workbook:
	for row in sheet:
	for cell in row:
	text += str(cell.value) + " "
	return text.strip()

	def get_text_chunks(text):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
	chunks = text_splitter.split_text(text)
	return chunks

	def get_vector_store(text_chunks):
	embeddings = GooglePalmEmbeddings()
	vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
	return vector_store

	def get_conversational_chain(vector_store):
	llm = GooglePalm()
	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
	return conversation_chain

	def get_user_input(user_question):
	with st.container():
	response = st.session_state.conversation({'question': user_question})
	st.session_state.chatHistory = response['chat_history']
	file_contents = ""
	left , right = st.columns((2,1))
	with left:
	for i, message in enumerate(st.session_state.chatHistory):
	if i % 2 == 0:
	st.markdown(f'<div style="background-color: rgb(30 24 17 / 77%); border-radius: 10px; padding: 10px; margin-bottom: 5px; text-align: end;"><span style="text-align: end;">User:</span> {message.content}</div>', unsafe_allow_html=True)
	else:
	st.markdown(f'<div style="background-color: rgb(145 74 1 / 25%); border-radius: 10px; padding: 10px; margin-bottom: 5px; ">Bot: {message.content}</div>', unsafe_allow_html=True)
	with right:
	for message in st.session_state.chatHistory:
	file_contents += f"{message.content}\n"
	file_name = "Chat_History.txt"

	def main():
	st.set_page_config("DocChat")
	# Define Streamlit app layout
	st.markdown("<h3 style='color: orange;'>🧾 DocChat - Chat with multiple documents</h3>", unsafe_allow_html=True)
	st.caption("🚀 Chat bot developed By :- [Dinesh Abeysinghe](https://www.linkedin.com/in/dinesh-abeysinghe-bb773293) \| [GitHub Source Code](https://github.com/dineshabey/AI-TypeTalkChat.git) \| [About model](https://arxiv.org/abs/2004.13637) ")
	st.markdown("<div style= 'text-align: center;'>First need to upload PDF file or Excel file. Then you can start chat with document related things <span style='color: orange;'>Please click like button</span>❤️ and support me and enjoy it.</div>", unsafe_allow_html=True)
	st.write("---")
	with st.container():
	with st.sidebar:
	st.title("Settings")
	st.subheader("Upload Documents")
	st.markdown("PDF files:")
	pdf_docs = st.file_uploader("Upload PDF Files", accept_multiple_files=True)
	if st.button("Process PDF file"):
	with st.spinner("Processing PDFs..."):
	raw_text = get_pdf_text(pdf_docs)
	text_chunks = get_text_chunks(raw_text)
	vector_store = get_vector_store(text_chunks)
	st.session_state.conversation = get_conversational_chain(vector_store)
	st.success("PDF processed successfully!")

	st.markdown("Excel files:")
	excel_docs = st.file_uploader("Upload Excel Files", accept_multiple_files=True)
	if st.button("Process Excel file"):
	with st.spinner("Processing Excel files..."):
	raw_text = get_excel_text(excel_docs)
	text_chunks = get_text_chunks(raw_text)
	vector_store = get_vector_store(text_chunks)
	st.session_state.conversation = get_conversational_chain(vector_store)
	st.success("Excel file processed successfully!")

	with st.container():
	st.subheader("Document Q&A")
	user_question = st.text_input("Ask a Question from the document")
	if "conversation" not in st.session_state:
	st.session_state.conversation = None
	if "chatHistory" not in st.session_state:
	st.session_state.chatHistory = None
	if user_question:
	get_user_input(user_question)

	if __name__ == "__main__":
	main()