Spaces:

minnehwg
/

vnexpress-title-classification

Runtime error

App Files Files Community

Hoang Vu Minh commited on Feb 26, 2024

Commit

2c7e9b4

verified ·

1 Parent(s): 87b1021

Create app.py

Browse files

Files changed (1) hide show

app.py +47 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import streamlit as st
+from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
+import tensorflow as tf
+import numpy as np
+def convert_label_to_title(label):
+  convert_dict = {
+    0: "SỨC KHỎE",
+    1: "GIÁO DỤC",
+    2: "THỂ THAO",
+    3: "PHÁP LUẬT",
+    4: "KHOA HỌC",
+    5: "DU LỊCH",
+    6: "GIẢI TRÍ",
+    7: "KINH DOANH"
+  }
+  return convert_dict[label]
+def predict_sentence(model, tokenizer, sentence):
+    input_data = tokenizer(sentence, return_tensors='tf', padding=True, truncation=True)
+    logits = model(input_data['input_ids'], attention_mask=input_data['attention_mask']).logits
+    probabilities = tf.nn.softmax(logits, axis=1)
+    predicted_class = tf.argmax(logits, axis=1).numpy()[0]
+    highest_probability = probabilities.numpy()[0, predicted_class]
+    title = convert_label_to_title(predicted_class)
+    return title, probabilities.numpy(), highest_probability
+def load_model(checkpoint, num_class):
+  model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_class)
+  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+  return model, tokenizer
+checkpoint = 'distilbert-base-multilingual-cased'
+model, tokenizer = load_model(checkpoint, 8)
+model.load_weights('best_model_weights.h5')
+text = st.text_area('Nhập tiêu đề vào đây')
+if text:
+    title, probabilities, highest = predict_sentence(model, tokenizer, text)
+    out = {
+        'title': title,
+        'prob': probabilities
+    }
+    st.json(out)