Hoang Vu Minh commited on
Commit
2c7e9b4
1 Parent(s): 87b1021

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
3
+ import tensorflow as tf
4
+ import numpy as np
5
+
6
+ def convert_label_to_title(label):
7
+ convert_dict = {
8
+ 0: "SỨC KHỎE",
9
+ 1: "GIÁO DỤC",
10
+ 2: "THỂ THAO",
11
+ 3: "PHÁP LUẬT",
12
+ 4: "KHOA HỌC",
13
+ 5: "DU LỊCH",
14
+ 6: "GIẢI TRÍ",
15
+ 7: "KINH DOANH"
16
+ }
17
+ return convert_dict[label]
18
+
19
+ def predict_sentence(model, tokenizer, sentence):
20
+ input_data = tokenizer(sentence, return_tensors='tf', padding=True, truncation=True)
21
+ logits = model(input_data['input_ids'], attention_mask=input_data['attention_mask']).logits
22
+ probabilities = tf.nn.softmax(logits, axis=1)
23
+ predicted_class = tf.argmax(logits, axis=1).numpy()[0]
24
+ highest_probability = probabilities.numpy()[0, predicted_class]
25
+ title = convert_label_to_title(predicted_class)
26
+ return title, probabilities.numpy(), highest_probability
27
+
28
+ def load_model(checkpoint, num_class):
29
+ model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_class)
30
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
31
+ return model, tokenizer
32
+
33
+ checkpoint = 'distilbert-base-multilingual-cased'
34
+ model, tokenizer = load_model(checkpoint, 8)
35
+ model.load_weights('best_model_weights.h5')
36
+
37
+ text = st.text_area('Nhập tiêu đề vào đây')
38
+
39
+ if text:
40
+ title, probabilities, highest = predict_sentence(model, tokenizer, text)
41
+ out = {
42
+ 'title': title,
43
+ 'prob': probabilities
44
+ }
45
+ st.json(out)
46
+
47
+