clhuang commited on
Commit
1920585
1 Parent(s): a7fa3c6

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +30 -0
README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizer, AlbertForSequenceClassification
2
+ model_path = "clhuang/albert-news-classification"
3
+ model = AlbertForSequenceClassification.from_pretrained(model_path)
4
+ tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
5
+
6
+ # get category probability
7
+ def get_category_proba( text ):
8
+ max_length = 250
9
+ # prepare our text into tokenized sequence
10
+ inputs = tokenizer([text], padding=True, truncation=True, max_length=max_length, return_tensors="pt")
11
+ # perform inference to our model
12
+ outputs = model(**inputs)
13
+ # get output probabilities by doing softmax
14
+ probs = outputs[0].softmax(1)
15
+
16
+ # executing argmax function to get the candidate label
17
+ # probs.argmax()
18
+ label_index = probs.argmax(dim=1)[0].tolist() # convert tensor to int
19
+ # label_index = np.argmax(probs.detach(), axis=1)
20
+
21
+ label = idx2cate[ label_index ]
22
+
23
+ # Note that result is numpy format and it should be convert to float
24
+ proba = round(float(probs.tolist()[0][label_index]),2)
25
+
26
+ response = {'label': label, 'proba': proba}
27
+
28
+ return response
29
+
30
+ get_category_proba('俄羅斯2月24日入侵烏克蘭至今不到3個月,芬蘭已準備好扭轉奉行了75年的軍事不結盟政策,申請加入北約。芬蘭總理馬林昨天表示,「希望我們下星期能與瑞典一起提出申請」。')