Hoang Vu Minh
Create app.py
2c7e9b4 verified
raw
history blame
1.52 kB
import streamlit as st
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
import tensorflow as tf
import numpy as np
def convert_label_to_title(label):
convert_dict = {
0: "SỨC KHỎE",
1: "GIÁO DỤC",
2: "THỂ THAO",
3: "PHÁP LUẬT",
4: "KHOA HỌC",
5: "DU LỊCH",
6: "GIẢI TRÍ",
7: "KINH DOANH"
}
return convert_dict[label]
def predict_sentence(model, tokenizer, sentence):
input_data = tokenizer(sentence, return_tensors='tf', padding=True, truncation=True)
logits = model(input_data['input_ids'], attention_mask=input_data['attention_mask']).logits
probabilities = tf.nn.softmax(logits, axis=1)
predicted_class = tf.argmax(logits, axis=1).numpy()[0]
highest_probability = probabilities.numpy()[0, predicted_class]
title = convert_label_to_title(predicted_class)
return title, probabilities.numpy(), highest_probability
def load_model(checkpoint, num_class):
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_class)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
return model, tokenizer
checkpoint = 'distilbert-base-multilingual-cased'
model, tokenizer = load_model(checkpoint, 8)
model.load_weights('best_model_weights.h5')
text = st.text_area('Nhập tiêu đề vào đây')
if text:
title, probabilities, highest = predict_sentence(model, tokenizer, text)
out = {
'title': title,
'prob': probabilities
}
st.json(out)