File size: 4,304 Bytes
205b451
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
from pyvi.ViTokenizer import tokenize
from src.services.generate_embedding import generate_embedding
import pymongo
import time
from src.indexing import indexData, SHEET_ID, SHEET_NAME
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import os

# Connect DB
client = pymongo.MongoClient(
    "mongodb+srv://rag:p9vojYc9fafYwxE9@rag.xswi7nq.mongodb.net/?retryWrites=true&w=majority&appName=RAG"
)
db = client.rag
collection = db.questionAndAnswers

with st.expander('Dataset'):
    col1 , col2 = st.columns(2)
    with col1: 
        st.markdown(
            """
            <div style="display:flex; gap: 16px; align-items: center">
                <a style="font-size: 14px"
                    href="https://docs.google.com/spreadsheets/d/1MKB6MHgL_lrPB1I69fj2VcVrgmSAMLVNZR1EwSyTSeA/edit#gid=0">Link
                    question & answers</a>
            </div>
            """,
            unsafe_allow_html=True,
        )

    with col2:
        if st.button('Re-train'):
            placeholder = st.empty()
            placeholder.empty()
            placeholder.write('Training ...')
            indexData(SHEET_ID, SHEET_NAME)
            placeholder.write('Completed')
            


def generateAnswer(context: str, question: str):
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "user","""Trả lời câu hỏi của người dùng dựa vào thông tin có trong thẻ <context> </context> được cho bên dưới. Nếu context không chứa những thông tin liên quan tới câu hỏi, thì đừng trả lời và chỉ trả lời là "Tôi không biết". <context> {context} </context> Câu hỏi: {question}""",
            ),
        ]
    )
    messages = prompt.invoke({"context": context, "question": question});
    print(messages)
    chat = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.8)
    response = chat.invoke(messages)
    return response.content


def stream_response(answer: str):
    for word in answer.split(" "):
        yield word + " "
        time.sleep(0.03)


# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"], unsafe_allow_html=True)

# React to user input
if prompt := st.chat_input(""):
    tokenized_prompt = tokenize(prompt)

    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": tokenized_prompt})

    # Display user message in chat message container
    with st.chat_message("user"):
        st.markdown(tokenized_prompt)

    embedding = generate_embedding(tokenized_prompt)
    results = collection.aggregate(
        [
            {
                "$vectorSearch": {
                    "queryVector": embedding,
                    "path": "question_embedding",
                    "numCandidates": 10,
                    "limit": 10,
                    "index": "vector_index",
                }
            }
        ]
    )

    posibleQuestions = ""
    context = ""
    question = ""
    index = 0
    for document in results:
        posibleQuestions = posibleQuestions + f"<li>{document['question']}</li>"
        context =context + "\n\n" + document['question'] + ": " + document['answer']
        if index == 0:
            question = document["question"]
        index = index + 1
    posibleQuestions = f"""<ol> <p style="font-weight: 600">Câu hỏi liên quan: </p> {posibleQuestions}</ol>"""

    answer = generateAnswer(context, prompt);
    response = f"""<p>{answer}</p>
                    {posibleQuestions}
                    """

    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        st.markdown(response, unsafe_allow_html=True)
        # st.markdown(f"""<p style="font-weight: 600">Question: {question}</p>""", unsafe_allow_html=True)
        # st.write_stream(stream_response(answer))
        # st.markdown(posibleQuestions, unsafe_allow_html=True)

    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": response})