Spaces:

hohieu
/

sheet_rag_chatbot

Runtime error

App Files Files Community

sheet_rag_chatbot / app.py

hohieu

change prompt

4ddd366 over 1 year ago

raw

history blame contribute delete

4.52 kB

	import streamlit as st
	from pyvi.ViTokenizer import tokenize
	from src.services.generate_embedding import generate_embedding
	import pymongo
	import time
	from src.indexing import indexData, SHEET_ID, SHEET_NAME
	from langchain_openai import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate
	import os

	# Connect DB
	client = pymongo.MongoClient(
	"mongodb+srv://rag:p9vojYc9fafYwxE9@rag.xswi7nq.mongodb.net/?retryWrites=true&w=majority&appName=RAG"
	)
	db = client.rag
	collection = db.questionAndAnswers

	with st.expander('Dataset'):
	col1 , col2 = st.columns(2)
	with col1:
	st.markdown(
	"""
	<div style="display:flex; gap: 16px; align-items: center">
	<a style="font-size: 14px"
	href="https://docs.google.com/spreadsheets/d/1MKB6MHgL_lrPB1I69fj2VcVrgmSAMLVNZR1EwSyTSeA/edit#gid=0">Link
	question & answers</a>
	</div>
	""",
	unsafe_allow_html=True,
	)

	with col2:
	if st.button('Re-train'):
	placeholder = st.empty()
	placeholder.empty()
	placeholder.write('Training ...')
	indexData(SHEET_ID, SHEET_NAME)
	placeholder.write('Completed')



	def generateAnswer(context: str, question: str):
	prompt = ChatPromptTemplate.from_messages(
	[
	(
	"user","""Trả lời câu hỏi dựa trên thông tin trong thẻ <context>. Mỗi cặp câu hỏi và trả lời được ngăn cách bằng dấu <enter/>. Câu hỏi và trả lời được phân tách bằng dấu <space/>.
	Nếu không có thông tin liên quan trong context, chỉ trả lời "Tôi không biết".
	Câu trả lời phải đầy đủ thông tin, nhấn mạnh vào những điểm chính từ thông tin trong context.
	<context>{context}</context> Câu hỏi: {question}""",
	),
	]
	)
	messages = prompt.invoke({"context": context, "question": question});
	print(messages)
	chat = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.8)
	response = chat.invoke(messages)
	return response.content


	def stream_response(answer: str):
	for word in answer.split(" "):
	yield word + " "
	time.sleep(0.03)


	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"], unsafe_allow_html=True)

	# React to user input
	if prompt := st.chat_input(""):
	tokenized_prompt = tokenize(prompt)

	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": tokenized_prompt})

	# Display user message in chat message container
	with st.chat_message("user"):
	st.markdown(tokenized_prompt)

	embedding = generate_embedding(tokenized_prompt)
	results = collection.aggregate(
	[
	{
	"$vectorSearch": {
	"queryVector": embedding,
	"path": "question_embedding",
	"numCandidates": 10,
	"limit": 10,
	"index": "vector_index",
	}
	}
	]
	)

	posibleQuestions = ""
	context = ""
	question = ""
	index = 0
	for document in results:
	posibleQuestions = posibleQuestions + f"<li>{document['question']}</li>"
	context =context + "<enter/>" + document['question'] + "<space/> " + document['answer']
	if index == 0:
	question = document["question"]
	index = index + 1
	posibleQuestions = f"""<ol> <p style="font-weight: 600">Câu hỏi liên quan: </p> {posibleQuestions}</ol>"""

	answer = generateAnswer(context, prompt);
	response = f"""<p>{answer}</p>
	{posibleQuestions}
	"""

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	st.markdown(response, unsafe_allow_html=True)
	# st.markdown(f"""<p style="font-weight: 600">Question: {question}</p>""", unsafe_allow_html=True)
	# st.write_stream(stream_response(answer))
	# st.markdown(posibleQuestions, unsafe_allow_html=True)

	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": response})