NusaBERT / model.py
w11wo's picture
Initial Prototype
76d290c
raw
history blame
4.66 kB
from utils import text_analysis_interface, token_classification_interface, text_interface
from transformers import pipeline
import os
auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
models = {
"Text Analysis": {
"title": "Text Analysis",
"examples": [
"Allianz adalah persuhaan asuransi yang di dirikan pada tanggal February 5, 1890 di Berlin, Jerman.",
"Restaurant ini sangat tidak enak. Enakan Pizza Hut.",
"Kacau lu ngerusakin rumah orang. Nih rumah yang punya Pak Presiden Jokowi.",
],
"output_label": [
"Sentiment Analysis",
"Emotion Classifier",
"POS Tagging",
"NER Tagging",
],
"desc": "A tool to showcase the full capabilities of text analysis NusaBERT fine-tuning has to offer.",
"interface": text_analysis_interface,
"pipe": [
pipeline(model="LazarusNLP/NusaBERT-base-EmoT", auth_token=auth_token),
pipeline(model="LazarusNLP/NusaBERT-base-EmoT", auth_token=auth_token),
pipeline(model="LazarusNLP/NusaBERT-base-POSP", auth_token=auth_token),
pipeline(model="LazarusNLP/NusaBERT-base-NERP", auth_token=auth_token),
],
},
"Sentiment Analysis": {
"title": "Sentiment Analysis",
"examples": [
"saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.",
"saya sudah transfer ratusan ribu dan sesuai nominal transfer. tapi tiket belum muncul juga. harus diwaspadai ini aplikasi ini.",
"keren sekali aplikasi ini bisa menunjukan data diri secara detail, sangat di rekomendasikan untuk di pakai.",
],
"output_label": "Sentiment Analysis",
"desc": "A sentiment-text-classification model based on the BERT model. The model was originally the pre-trained NusaBERT Base model, which is then fine-tuned on indonlu's SmSA dataset consisting of Indonesian comments and reviews.",
"interface": text_interface,
"pipe": pipeline(model="LazarusNLP/NusaBERT-base-EmoT", auth_token=auth_token),
},
"Emotion Detection": {
"title": "Emotion Classifier",
"examples": [
"iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold",
"saya ganteng, kalau tidak-suka mati saja kamu",
"Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
],
"output_label": "Emotion Classifier",
"desc": "An emotion classifier based on the BERT model. The model was originally the pre-trained NusaBERT Base model, which is then fine-tuned on indonlu's EmoT dataset",
"interface": text_interface,
"pipe": pipeline(model="LazarusNLP/NusaBERT-base-EmoT", auth_token=auth_token),
},
"POS Tagging": {
"title": "POS Tagging",
"examples": [
"iya semoga itu karya terbaik mu adalah skripsi mu dan lucua2n mu tapi harapan aku dari kamu adalah kesembuhanmu nold",
"saya ganteng, kalau tidak-suka mati saja kamu",
"Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
],
"output_label": "POS Tagging",
"desc": "A part-of-speech token-classification model based on the BERT model. The model was originally the pre-trained NusaBERT Base model, which is then fine-tuned on indonlu's POSP dataset consisting of tag-labelled news.",
"interface": token_classification_interface,
"pipe": pipeline(model="LazarusNLP/NusaBERT-base-POSP", auth_token=auth_token),
},
"NER Tagging": {
"title": "NER Tagging",
"examples": [
"Paris adalah ibukota dari negara Prancis.",
"Kuasa hukum teamster berasal dari Edmonton.",
"Jakarta, Indonesia akan menjadi bagian salah satu tempat yang akan didatangi.",
],
"output_label": "NER Tagging",
"desc": "A NER Tagging token-classification model based on the BERT model. The model was originally the pre-trained NusaBERT Base model, which is then fine-tuned on indonlu's NERP dataset consisting of tag-labelled news.",
"interface": token_classification_interface,
"pipe": pipeline(model="LazarusNLP/NusaBERT-base-NERP", auth_token=auth_token),
},
}