clhuang
/

albert-news-classification

Text Classification

Inference Endpoints

Model card Files Files and versions Community

clhuang commited on Jun 7, 2022

Commit

1920585

•

1 Parent(s): a7fa3c6

Create README.md

Files changed (1) hide show

README.md +30 -0

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+    from transformers import BertTokenizer, AlbertForSequenceClassification
+    model_path = "clhuang/albert-news-classification"
+    model = AlbertForSequenceClassification.from_pretrained(model_path)
+    tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
+    # get category probability
+    def get_category_proba( text ):
+        max_length = 250
+        # prepare our text into tokenized sequence
+        inputs = tokenizer([text], padding=True, truncation=True, max_length=max_length, return_tensors="pt")
+        # perform inference to our model
+        outputs = model(**inputs)
+        # get output probabilities by doing softmax
+        probs = outputs[0].softmax(1)
+        # executing argmax function to get the candidate label
+        # probs.argmax()
+        label_index = probs.argmax(dim=1)[0].tolist() # convert tensor to int
+        # label_index = np.argmax(probs.detach(), axis=1)
+        label = idx2cate[ label_index ]
+        # Note that result is numpy format and it should be convert to float
+        proba = round(float(probs.tolist()[0][label_index]),2)
+        response = {'label': label, 'proba': proba}
+        return response
+    get_category_proba('俄羅斯2月24日入侵烏克蘭至今不到3個月，芬蘭已準備好扭轉奉行了75年的軍事不結盟政策，申請加入北約。芬蘭總理馬林昨天表示，「希望我們下星期能與瑞典一起提出申請」。')