klue/bert-base, split O, sports data, batch 16

Browse files

Files changed (6) hide show

config.json +5 -9
pytorch_model.bin +2 -2
special_tokens_map.json +0 -2
tokenizer.json +13 -13
tokenizer_config.json +2 -4
vocab.txt +2 -2

config.json CHANGED Viewed

@@ -1,29 +1,25 @@
 {
-  "_name_or_path": "klue/roberta-base",
   "architectures": [
     "BertForMaskedLM"
   ],
   "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
   "classifier_dropout": null,
-  "eos_token_id": 2,
-  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
-  "pad_token_id": 1,
   "position_embedding_type": "absolute",
-  "tokenizer_class": "BertTokenizer",
   "torch_dtype": "float32",
   "transformers_version": "4.24.0",
-  "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 32000
 }

 {
+  "_name_or_path": "klue/bert-base",
   "architectures": [
     "BertForMaskedLM"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.24.0",
+  "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 32000
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1f9b76b6636e9cda315948a2b09326d45814fd55c090a6a1af325eeb4ae24f2
-size 442678315

 version https://git-lfs.github.com/spec/v1
+oid sha256:ece410cdd22316c89dac5619c177f41bbe8c2e069bef037f1b23ba27e3938a26
+size 442676985

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,5 @@
 {
-  "bos_token": "[CLS]",
   "cls_token": "[CLS]",
-  "eos_token": "[SEP]",
   "mask_token": "[MASK]",
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",

 {
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",

tokenizer.json CHANGED Viewed

@@ -12,14 +12,14 @@
     },
     "direction": "Right",
     "pad_to_multiple_of": null,
-    "pad_id": 1,
     "pad_type_id": 0,
     "pad_token": "[PAD]"
   },
   "added_tokens": [
     {
       "id": 0,
-      "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -28,7 +28,7 @@
     },
     {
       "id": 1,
-      "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -37,7 +37,7 @@
     },
     {
       "id": 2,
-      "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -46,7 +46,7 @@
     },
     {
       "id": 3,
-      "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -117,13 +117,13 @@
       {
         "Sequence": {
           "id": "B",
-          "type_id": 0
         }
       },
       {
         "SpecialToken": {
           "id": "[SEP]",
-          "type_id": 0
         }
       }
     ],
@@ -131,7 +131,7 @@
       "[CLS]": {
         "id": "[CLS]",
         "ids": [
-          0
         ],
         "tokens": [
           "[CLS]"
@@ -140,7 +140,7 @@
       "[SEP]": {
         "id": "[SEP]",
         "ids": [
-          2
         ],
         "tokens": [
           "[SEP]"
@@ -159,10 +159,10 @@
     "continuing_subword_prefix": "##",
     "max_input_chars_per_word": 100,
     "vocab": {
-      "[CLS]": 0,
-      "[PAD]": 1,
-      "[SEP]": 2,
-      "[UNK]": 3,
       "[MASK]": 4,
       "!": 5,
       "\"": 6,

     },
     "direction": "Right",
     "pad_to_multiple_of": null,
+    "pad_id": 0,
     "pad_type_id": 0,
     "pad_token": "[PAD]"
   },
   "added_tokens": [
     {
       "id": 0,
+      "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 1,
+      "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 2,
+      "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 3,
+      "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
       {
         "Sequence": {
           "id": "B",
+          "type_id": 1
         }
       },
       {
         "SpecialToken": {
           "id": "[SEP]",
+          "type_id": 1
         }
       }
     ],
       "[CLS]": {
         "id": "[CLS]",
         "ids": [
+          2
         ],
         "tokens": [
           "[CLS]"
       "[SEP]": {
         "id": "[SEP]",
         "ids": [
+          3
         ],
         "tokens": [
           "[SEP]"
     "continuing_subword_prefix": "##",
     "max_input_chars_per_word": 100,
     "vocab": {
+      "[PAD]": 0,
+      "[UNK]": 1,
+      "[CLS]": 2,
+      "[SEP]": 3,
       "[MASK]": 4,
       "!": 5,
       "\"": 6,

tokenizer_config.json CHANGED Viewed

@@ -1,16 +1,14 @@
 {
-  "bos_token": "[CLS]",
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
   "do_lower_case": false,
-  "eos_token": "[SEP]",
   "mask_token": "[MASK]",
   "model_max_length": 512,
-  "name_or_path": "klue/roberta-base",
   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
-  "special_tokens_map_file": "/home/sifter/.cache/huggingface/hub/models--klue--roberta-base/snapshots/67dd433d36ebc66a42c9aaa85abcf8d2620e41d9/special_tokens_map.json",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",

 {
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
   "do_lower_case": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
+  "name_or_path": "klue/bert-base",
   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "special_tokens_map_file": "/data1/hanyang/.cache/huggingface/hub/models--klue--bert-base/snapshots/34b965303f98bc5214daca7f76b7fb82d2dc6183/special_tokens_map.json",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",

vocab.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-[CLS]
 [PAD]
-[SEP]
 [UNK]
 [MASK]
 !
 "

 [PAD]
 [UNK]
+[CLS]
+[SEP]
 [MASK]
 !
 "