import streamlit as st
from pyvi.ViTokenizer import tokenize
from src.services.generate_embedding import generate_embedding
import pymongo
import time
from src.indexing import indexData, SHEET_ID, SHEET_NAME
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import os
# Connect DB
client = pymongo.MongoClient(
"mongodb+srv://rag:p9vojYc9fafYwxE9@rag.xswi7nq.mongodb.net/?retryWrites=true&w=majority&appName=RAG"
)
db = client.rag
collection = db.questionAndAnswers
with st.expander('Dataset'):
col1 , col2 = st.columns(2)
with col1:
st.markdown(
"""
""",
unsafe_allow_html=True,
)
with col2:
if st.button('Re-train'):
placeholder = st.empty()
placeholder.empty()
placeholder.write('Training ...')
indexData(SHEET_ID, SHEET_NAME)
placeholder.write('Completed')
def generateAnswer(context: str, question: str):
prompt = ChatPromptTemplate.from_messages(
[
(
"user","""Trả lời câu hỏi dựa trên thông tin trong thẻ . Mỗi cặp câu hỏi và trả lời được ngăn cách bằng dấu . Câu hỏi và trả lời được phân tách bằng dấu .
Nếu không có thông tin liên quan trong context, chỉ trả lời "Tôi không biết".
Câu trả lời phải đầy đủ thông tin, nhấn mạnh vào những điểm chính từ thông tin trong context.
{context} Câu hỏi: {question}""",
),
]
)
messages = prompt.invoke({"context": context, "question": question});
print(messages)
chat = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.8)
response = chat.invoke(messages)
return response.content
def stream_response(answer: str):
for word in answer.split(" "):
yield word + " "
time.sleep(0.03)
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"], unsafe_allow_html=True)
# React to user input
if prompt := st.chat_input(""):
tokenized_prompt = tokenize(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": tokenized_prompt})
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(tokenized_prompt)
embedding = generate_embedding(tokenized_prompt)
results = collection.aggregate(
[
{
"$vectorSearch": {
"queryVector": embedding,
"path": "question_embedding",
"numCandidates": 10,
"limit": 10,
"index": "vector_index",
}
}
]
)
posibleQuestions = ""
context = ""
question = ""
index = 0
for document in results:
posibleQuestions = posibleQuestions + f"{document['question']}"
context =context + "" + document['question'] + " " + document['answer']
if index == 0:
question = document["question"]
index = index + 1
posibleQuestions = f""" Câu hỏi liên quan:
{posibleQuestions}
"""
answer = generateAnswer(context, prompt);
response = f"""{answer}
{posibleQuestions}
"""
# Display assistant response in chat message container
with st.chat_message("assistant"):
st.markdown(response, unsafe_allow_html=True)
# st.markdown(f"""Question: {question}
""", unsafe_allow_html=True)
# st.write_stream(stream_response(answer))
# st.markdown(posibleQuestions, unsafe_allow_html=True)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})