Spaces:

Darpan07
/

Personalized-ChatBot

Sleeping

App Files Files Community

Personalized-ChatBot / app.py

Darpan07

Update app.py

4cf62a0 verified 12 months ago

raw

history blame

8.28 kB

	# importing necessary libraries
	import os
	import time
	import streamlit as st
	from dotenv import load_dotenv
	from PyPDF2 import PdfReader
	from docx import Document
	from docx.text.paragraph import Paragraph
	from docx.table import Table
	from langchain_openai import ChatOpenAI, OpenAIEmbeddings
	from langchain.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from langchain.memory import ConversationBufferWindowMemory
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS


	# load the environment variables into the python script
	load_dotenv()
	# fetching the openai_api_key environment variable
	openai_api_key = os.getenv("OPENAI_API_KEY")


	# Initialize session states
	if "vectorDB" not in st.session_state:
	st.session_state.vectorDB = None
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "bot_name" not in st.session_state:
	st.session_state.bot_name = ""
	if "chain" not in st.session_state:
	st.session_state.chain = None


	def process_paragraph(paragraph):
	"""This Function returns the content of the paragraph present inside the DOC file"""
	return paragraph.text


	def process_table(table):
	"""This function extracts the content from the table present inside the DOC file"""
	text = ""
	for row in table.rows:
	for cell in row.cells:
	text += cell.text

	return text


	def read_docx(file_path):
	"""This function extracts the text from the DOC file"""
	doc = Document(file_path)
	text = []

	for element in doc.iter_inner_content():
	if isinstance(element, Paragraph):
	text.append(process_paragraph(element))
	elif isinstance(element, Table):
	text.append(process_table(element))

	return " ".join(text)


	def read_text_file(text_file):
	"""This function extracts the text from the TEXT file"""
	try:
	text = text_file.read().decode("utf-8")
	return text

	except Exception as e:
	st.error(f"Error while reading {text_file.name} file : {e}")
	return None


	def get_pdf_text(pdf):
	"""This function extracts the text from the PDF file"""
	try:
	text = []
	pdf_reader = PdfReader(pdf)
	for page in pdf_reader.pages:
	text.append(page.extract_text())

	return " ".join(text)

	except Exception as e:
	st.error(f"Error while reading {pdf.name} file : {e}")
	return None


	def get_vectorstore(text_chunks):
	"""This function will create a vector database as well as create & store the embedding of the text chunks into the VectorDB"""
	embeddings = OpenAIEmbeddings()
	vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
	return vectorstore


	def get_text_chunks(text: str):
	"""This function will split the text into the smaller chunks"""
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=50,
	length_function=len,
	is_separator_regex=False,
	)
	chunks = text_splitter.split_text(text)
	return chunks


	def processing(files):
	"""This function"""

	data = []
	for file in files:
	if file.name.endswith(".docx"):
	text = read_docx(file)

	elif file.name.endswith(".pdf"):
	text = get_pdf_text(file)

	else:
	text = read_text_file(file)

	data.append(text)

	raw_text = " ".join(data)

	# divinding the raw text into smaller chunks
	text_chunks = get_text_chunks(raw_text)

	# Creating and storing the chunks in vector database
	vectorDB = get_vectorstore(text_chunks)

	return vectorDB


	def get_response(query: str):
	"""This function will return the output of the user query!"""

	# getting the context from the database that is similar to the user query
	query_context = st.session_state.vectorDB.similarity_search(query=query)
	# calling the chain to get the output from the LLM
	response = st.session_state.chain.invoke(
	{
	"human_input": query,
	"context": query_context[0].page_content,
	"name": st.session_state.bot_name,
	}
	)["text"]
	# Iterate through each word in the 'response' string after splitting it based on whitespace
	for word in response.split():
	# Yield the current word followed by a space, effectively creating a generator
	yield word + " "

	# Pause execution for 0.05 seconds (50 milliseconds) to introduce a delay
	time.sleep(0.05)


	def get_conversation_chain(vectorDB):
	"""This function will create and return a LLM-Chain"""

	# using OPENAI ChatModel
	llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo-16k")

	# creating a template to pass into LLM
	template = """You are a friendly customer support ChatBot with a name: {name} for the company, aiming to enhance the customer experience by providing tailored assistance and information.
	Answer the question as detailed as possible and to the point from the context: {context}\n\n.
	If the answer is not in the provided context then only just say, "answer is not available in the context", do not provide the wrong answer\n\n
	{chat_history}
	Human: {human_input}
	AI: """

	# creating a prompt that is used to format the input of the user
	prompt = PromptTemplate(
	template=template,
	input_variables=["chat_history", "human_input", "name", "context"],
	)

	# creating a memory that will store the chat history between chatbot and user
	memory = ConversationBufferWindowMemory(
	memory_key="chat_history", input_key="human_input", k=5
	)

	chain = LLMChain(llm=llm, prompt=prompt, memory=memory, verbose=True)

	return chain


	if __name__ == "__main__":
	# setting the config of WebPage
	st.set_page_config(page_title="Personalized ChatBot", page_icon="🤖")
	st.header("Personalized Customer Support Chatbot 🤖", divider="rainbow")

	# taking input( bot name and pdf file) from the user
	with st.sidebar:
	st.caption("Please enter the Bot Name and Upload PDF File!")

	bot_name = st.text_input(
	label="Bot Name", placeholder="Enter the bot name here....", key="bot_name"
	)

	files = st.file_uploader(
	label="Upload Files!",
	type=["pdf", "txt", "docx"],
	accept_multiple_files=True,
	)

	# moving forward only when both the inputs are given by the user
	if files and bot_name:
	# the Process File button will process the pdf file and save the chunks into the vector database
	if st.button("Process File"):
	# if there is existing chat history we will delete it
	if st.session_state.messages != []:
	st.session_state.messages = []

	with st.spinner("Processing....."):
	st.session_state["vectorDB"] = processing(files)
	st.session_state["chain"] = get_conversation_chain(
	st.session_state["vectorDB"]
	)
	st.success("File Processed", icon="✅")

	# if the vector database is ready to use then only show the chatbot interface
	if st.session_state.vectorDB:
	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.write(message["content"])

	# taking the input i.e. query from the user (walrus operator)
	if prompt := st.chat_input(f"Message {st.session_state.bot_name}"):
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})
	# Display user message in chat message container
	with st.chat_message("user"):
	st.write(prompt)

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	response = st.write_stream(get_response(prompt))
	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": response})