Spaces:

OmkarGhugarkar
/

Chatbot

Runtime error

App Files Files Community

Chatbot / app.py

OmkarGhugarkar

Update app.py

9d13d11 verified 9 months ago

raw

history blame contribute delete

4.74 kB

	from langchain_openai import ChatOpenAI
	from langchain.prompts import PromptTemplate
	from langchain.memory import ConversationBufferMemory
	from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	import os
	import shutil


	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader
	import os
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.embeddings.sentence_transformer import (
	SentenceTransformerEmbeddings,
	)
	from langchain_chroma import Chroma

	from langchain_core.messages import AIMessage, HumanMessage
	from langchain_openai import OpenAIEmbeddings

	def predict(message, history):

	llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
	template = """You are a general purpose chatbot. Be friendly and kind. Help people answer their questions. Use the context below to answer the questions. Even if you don't know the exact answer, feel free to give an approximate answer. Answer as if you are answering in an exam. The answer should be upto point, nothing extra.
	{context}
	Question: {question}
	Helpful Answer:"""
	QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)
	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True
	)

	embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
	db = Chroma(persist_directory="./chroma", embedding_function=embeddings, collection_name="Autism1")
	retriever = db.as_retriever(k=10)

	contextualize_q_system_prompt = """Given a chat history and the latest user question \
	which might reference context in the chat history, formulate a standalone question \
	which can be understood without the chat history. Do NOT answer the question, \
	just reformulate it if needed and otherwise return it as is."""
	contextualize_q_prompt = ChatPromptTemplate.from_messages(
	[
	("system", contextualize_q_system_prompt),
	MessagesPlaceholder(variable_name="chat_history"),
	("human", "{question}"),
	]
	)
	contextualize_q_chain = contextualize_q_prompt \| llm \| StrOutputParser()
	def contextualized_question(input: dict):
	if input.get("chat_history"):
	return contextualize_q_chain
	else:
	return input["question"]

	rag_chain = (
	RunnablePassthrough.assign(
	context=contextualized_question \| retriever
	)
	\| QA_CHAIN_PROMPT
	\| llm
	)
	history = []
	ai_msg = rag_chain.invoke({"question": message, "chat_history": history})
	print(ai_msg)
	bot_response = ai_msg.content.strip()

	# Ensure history is correctly formatted as a list of tuples (user_message, bot_response)
	history.append((HumanMessage(content=message), AIMessage(content=bot_response)))

	docs = db.similarity_search(message,k=2)
	extra = "\n"2 +""100 + "\n" + "Source:" "\n" + ""*100 + "\n"
	additional_info = []
	for d in docs:
	citations = d.metadata["source"] + " pg." + str(d.metadata["page"])
	additional_info = d.page_content
	extra += citations + "\n" + additional_info + "\n" + ""100 + "\n"
	# Return the bot's response and the updated history
	return bot_response + " " + extra

	def upload_file(files):

	gr.Info("Wait for query Message")
	file_paths = [file.name for file in files]
	loaders = []
	for file_path in file_paths:
	if file_path.endswith(".pdf"):
	print(file_path)
	loaders.append(PyPDFLoader(file_path))

	documents = []
	for loader in loaders:
	documents.extend(loader.load())

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=16)
	docs = text_splitter.split_documents(documents)

	collection_name = "Autism1"
	persist_directory = "./chroma"
	print(len(docs))
	embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
	if os.path.isdir(persist_directory):
	shutil.rmtree(persist_directory)
	db = Chroma.from_documents(docs, embeddings, collection_name=collection_name, persist_directory=persist_directory)
	print("Done Processing, you can query")
	gr.Info("You can query now")

	with gr.Blocks() as demo:
	with gr.Column():
	upload_button = gr.UploadButton("Click to Upload a File", file_types=["file"], file_count="multiple")
	upload_button.upload(upload_file, upload_button)
	chatbot = gr.Chatbot(height=500)
	gr.ChatInterface(fn = predict,chatbot=chatbot)#, fill_height=True, fill_width=True, chatbot=chatbot)

	demo.launch(debug=True)