Spaces:

AbuZayDin
/

chat_your_pdfs

Configuration error

App Files Files Community

chat_your_pdfs / app.py

AbuZayDin

Upload 7 files

8f3f965 verified 2 months ago

raw

history blame

No virus

7.98 kB

	from dotenv import load_dotenv
	load_dotenv()

	import streamlit as st
	import json
	import os
	from datetime import datetime, timedelta

	import google.generativeai as genai
	from langchain_community.vectorstores import Chroma
	from langchain.chains.question_answering import load_qa_chain
	from langchain import PromptTemplate
	from langchain_community.embeddings import SentenceTransformerEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.prompts import ChatPromptTemplate
	from langchain_community.document_loaders import PyPDFDirectoryLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	from transformers import pipeline
	from PyPDF2 import PdfReader
	import docx2txt

	print(genai.configure(api_key=os.getenv("GOOGLE_API_KEY")))

	model = genai.GenerativeModel("gemini-pro")
	chat = model.start_chat(history=[])

	if 'chat_history' not in st.session_state:
	st.session_state['chat_history'] = []

	if "messages" not in st.session_state:
	st.session_state["messages"] = []

	if 'level' not in st.session_state:
	st.session_state['level'] = 'Beginner'

	# Add initial assistant message if chat history is empty
	if not st.session_state["messages"]:
	st.session_state["messages"].append({"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"})










	prompt_template = """
	Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
	provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
	Context:\n {context}?\n
	Question: \n{question}\n

	Answer:
	"""

	model = ChatGoogleGenerativeAI(model="gemini-pro",
	temperature=0.3)

	prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
	chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)





	def get_file_text(files):
	text = []
	for file in files:
	if file.type == "application/pdf": # Handle PDF files
	pdf_reader = PdfReader(file)
	for page in pdf_reader.pages:
	text.append(page.extract_text())
	elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": # Handle DOCX files
	text.append(docx2txt.process(file))
	elif file.type == "text/plain": # Handle text files
	with file.open(encoding="utf-8") as f: # Adjust encoding if needed
	text.append(f.read())
	else: # Handle unsupported file types
	st.error(f"Unsupported file type: {file.type}")
	return text


	def format_timestamp(timestamp):
	now = datetime.now()
	if timestamp.date() == now.date():
	return "today"
	elif timestamp.date() == (now - timedelta(days=1)).date():
	return "yesterday"
	else:
	return timestamp.strftime('%Y-%m-%d %H:%M:%S')


	# document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1")
	# document=get_file_text(document1)


	# text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)
	# context= ", ".join(map(str,document))



	# text_chunks= text_splitter.split_text(context)





	def main():
	c1, c2, c3 = st.columns([1, 2, 1])
	pth = "Logo.jpg"
	c1.image(pth, width=130)
	c2.title("Chat Your Pdfs")



	chat_container = st.container()
	input_container = st.container()

	with chat_container:
	# for msg in st.session_state.messages:
	for i, msg in enumerate(st.session_state.messages):

	if msg["role"] == "user":
	# st.markdown(f"<div style='text-align: left; color: black; bac kground-color: #90EE90; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
	# st.markdown(f"<div style='text-align: right; color: black; background-color: #d3d3d3; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
	st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #dae1e0; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)

	else:
	# st.markdown(f"<div style='text-align: left; color: black; background-color: #90EE90; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
	# st.markdown(f"<div style='text-align: left; color: white; background-color: #1a73e8; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)
	st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #f4f4f4; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)

	document1=st.sidebar.file_uploader("Document 1 (question)",accept_multiple_files=True,key="document1")
	document=get_file_text(document1)


	text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)
	context= ", ".join(map(str,document))

	embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')


	text_chunks= text_splitter.split_text(context)



	for chuck in text_chunks:
	if embeddings:

	vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
	vector_store.save_local("faiss_index")


	# embeddings =SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
	c2.markdown('##')

	if prompt := st.chat_input():

	with input_container:
	if prompt:
	with chat_container:
	for i, msg in enumerate(st.session_state.messages):
	if msg["role"] == "user":
	st.markdown(f"<div id='{i}' style='text-align: left; color: black; background-color: #dae1e0; padding: 10px; border-radius: 10px; margin: 10px 0;'>{msg['content']}</div>", unsafe_allow_html=True)




	new_db = FAISS.load_local("faiss_index", embeddings)
	docs = new_db.similarity_search(prompt)

	response_text = chain(
	{"input_documents":docs, "question": prompt}
	, return_only_outputs=False)

	timestamp = datetime.now()
	st.session_state['chat_history'].append(("You", prompt, timestamp))
	st.session_state['chat_history'].append(("Bot", response_text['output_text'], timestamp))

	st.session_state["messages"].append({"role": "user", "content": prompt})
	st.session_state["messages"].append({"role": "assistant", "content": response_text["output_text"]})

	st.experimental_rerun()
	st.sidebar.title("Chat History")
	for entry in st.session_state['chat_history']:
	role, content, timestamp = entry
	if role == "You":
	st.sidebar.write(f"{content} ({format_timestamp(timestamp)})")

	st.sidebar.button('Clear Chat History', on_click=clear_chat_history)



	def clear_chat_history():
	st.session_state.messages = [{"role": "assistant", "content": "Ask Me Anything About The Uploaded Pdfs"}]
	st.session_state['chat_history'] = []
	st.experimental_rerun()