tyqiangz commited on
Commit
cfe845b
1 Parent(s): fe70db6

Adding indobert-lite-large-p2 model finetuned on IndoNLU(SmSA)

Browse files
README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: id
3
+ tags:
4
+ - indobert
5
+ - indobenchmark
6
+ - indonlu
7
+ license: mit
8
+ inference: false
9
+ datasets:
10
+ - Indo4B
11
+ - IndoNLU (SmSA)
12
+ ---
13
+
14
+ # IndoBERT-Lite Large Model (phase2 - uncased) Finetuned on IndoNLU SmSA dataset
15
+
16
+ ## How to use
17
+
18
+ ### Load model and tokenizer
19
+
20
+ ```python
21
+ from transformers import BertTokenizer, AutoModel
22
+ tokenizer = BertTokenizer.from_pretrained("tyqiangz/indobert-lite-large-p2-smsa")
23
+ model = AutoModel.from_pretrained("tyqiangz/indobert-lite-large-p2-smsa")
24
+ ```
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "indobenchmark/indobert-lite-large-p2",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "AlbertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0,
8
+ "bos_token_id": 2,
9
+ "classifier_dropout_prob": 0.1,
10
+ "down_scale_factor": 1,
11
+ "embedding_size": 128,
12
+ "eos_token_id": 3,
13
+ "gap_size": 0,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0,
16
+ "hidden_size": 1024,
17
+ "id2label": {
18
+ "0": "LABEL_0",
19
+ "1": "LABEL_1",
20
+ "2": "LABEL_2"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "inner_group_num": 1,
24
+ "intermediate_size": 4096,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1,
28
+ "LABEL_2": 2
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "albert",
33
+ "net_structure_type": 0,
34
+ "num_attention_heads": 16,
35
+ "num_hidden_groups": 1,
36
+ "num_hidden_layers": 24,
37
+ "num_memory_blocks": 0,
38
+ "output_past": true,
39
+ "pad_token_id": 0,
40
+ "position_embedding_type": "absolute",
41
+ "problem_type": "single_label_classification",
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.11.2",
44
+ "type_vocab_size": 2,
45
+ "vocab_size": 30000
46
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05fb85c6421835bd1922fd0cc16d66e6450953ae5379fc2067858db559f84f1b
3
+ size 70763217
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/root/.cache/huggingface/transformers/079b012b9db3ee4852c05e54afd77ca52783f176d619068fe301bdc3fdce319a.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_file": null, "name_or_path": "indobenchmark/indobert-lite-large-p2", "tokenizer_class": "BertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff