File size: 3,649 Bytes
ef1adf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a13fc7
 
ef1adf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a13fc7
ef1adf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ece7757
 
 
 
8b8be5b
ece7757
2d5ca71
 
 
ef1adf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dbd639
66aca7d
ef1adf7
 
 
 
 
ece7757
ef1adf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import nltk
from transformers.models.roberta.modeling_roberta import *
from transformers import RobertaForQuestionAnswering
from nltk import word_tokenize
import json
import pandas as pd
# import re
import base64

print("===================================================================")
@st.cache_data
def download_nltk_punkt():
    nltk.download('punkt')

# Cache loading PhoBert model and tokenizer
@st.cache_data
def load_phoBert():
    model = AutoModelForSequenceClassification.from_pretrained('minhdang14902/Phobert_Law')
    tokenizer = AutoTokenizer.from_pretrained('minhdang14902/Phobert_Law')
    return model, tokenizer



# Call the cached functions
download_nltk_punkt()
phoBert_model, phoBert_tokenizer = load_phoBert()

# Initialize the pipeline with the loaded PhoBert model and tokenizer
chatbot_pipeline = pipeline("sentiment-analysis", model=phoBert_model, tokenizer=phoBert_tokenizer)

# Load spaCy Vietnamese model
# nlp = spacy.load('vi_core_news_lg')

# Load intents from json file
def load_json_file(filename):
    with open(filename) as f:
        file = json.load(f)
    return file

filename = './Law_2907.json'
intents = load_json_file(filename)

def create_df():
    df = pd.DataFrame({
        'Pattern': [],
        'Tag': []
    })
    return df

df = create_df()

def extract_json_info(json_file, df):
    for intent in json_file['intents']:
        for pattern in intent['patterns']:
            sentence_tag = [pattern, intent['tag']]
            df.loc[len(df.index)] = sentence_tag
    return df

df = extract_json_info(intents, df)
df2 = df.copy()

labels = df2['Tag'].unique().tolist()
labels = [s.strip() for s in labels]
num_labels = len(labels)
id2label = {id: label for id, label in enumerate(labels)}
label2id = {label: id for id, label in enumerate(labels)}

def chatPhobert(text):
    label = label2id[chatbot_pipeline(text)[0]['label']]
    response = intents['intents'][label]['responses']
    print(response[0])
    return response[0]

st.title("Chatbot Phobert Law")
st.write("Hi! Tôi là trợ lý của bạn trong việc trả lời các câu hỏi về pháp luật. Nếu câu trả lời trống trơn, đừng lo, chỉ là hệ thống không thấy câu trả lời phù hợp!!")
text = st.text_input("User: ", key="input")

if 'chat_history' not in st.session_state:
    st.session_state['chat_history'] = []

# prompt = st.chat_input("Hãy chat gì đó!")
# if prompt:
#     result = chatRoberta(text)
#     st.write(f"HUIT Chatbot: {result[0]['answer']}")

# if st.button("Chat!"):
#     if text:
#         result = chatRoberta(text)
#         st.write(f"Chatbot: {result[0]['answer']}")
#     else:
#         st.write("Hãy chat gì đó!")

def get_response(text):
    st.subheader("The Answer is:")
    st.write(text)
    answer = chatPhobert(text)
    result = answer
    return result

if st.button("Chat!"):
    st.session_state['chat_history'].append(("User", text))

    response = get_response(text)

    st.subheader("The Response is:")
    message = st.empty()
    result = ""
    for chunk in response:
        result += chunk
        message.markdown(result + "❚ ")
    message.markdown(result)
    st.session_state['chat_history'].append(("Bot", result))

for i, (sender, message) in enumerate(st.session_state['chat_history']):
    if sender == "User":
        st.text_area(f"User:", value=message, height=100, max_chars=None, key=f"user_{i}")
    else:
        st.text_area(f"Bot:", value=message, height=100, max_chars=None, key=f"bot_{i}")