Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import BertTokenizer, AlbertForSequenceClassification
|
2 |
+
model_path = "clhuang/albert-news-classification"
|
3 |
+
model = AlbertForSequenceClassification.from_pretrained(model_path)
|
4 |
+
tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
|
5 |
+
|
6 |
+
# get category probability
|
7 |
+
def get_category_proba( text ):
|
8 |
+
max_length = 250
|
9 |
+
# prepare our text into tokenized sequence
|
10 |
+
inputs = tokenizer([text], padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
11 |
+
# perform inference to our model
|
12 |
+
outputs = model(**inputs)
|
13 |
+
# get output probabilities by doing softmax
|
14 |
+
probs = outputs[0].softmax(1)
|
15 |
+
|
16 |
+
# executing argmax function to get the candidate label
|
17 |
+
# probs.argmax()
|
18 |
+
label_index = probs.argmax(dim=1)[0].tolist() # convert tensor to int
|
19 |
+
# label_index = np.argmax(probs.detach(), axis=1)
|
20 |
+
|
21 |
+
label = idx2cate[ label_index ]
|
22 |
+
|
23 |
+
# Note that result is numpy format and it should be convert to float
|
24 |
+
proba = round(float(probs.tolist()[0][label_index]),2)
|
25 |
+
|
26 |
+
response = {'label': label, 'proba': proba}
|
27 |
+
|
28 |
+
return response
|
29 |
+
|
30 |
+
get_category_proba('俄羅斯2月24日入侵烏克蘭至今不到3個月,芬蘭已準備好扭轉奉行了75年的軍事不結盟政策,申請加入北約。芬蘭總理馬林昨天表示,「希望我們下星期能與瑞典一起提出申請」。')
|