Update model and tokenizer file

Browse files

Files changed (5) hide show

config.json +8 -6
pytorch_model.bin +2 -2
special_tokens_map.json +5 -1
tokenizer.json +101 -14
tokenizer_config.json +5 -0

config.json CHANGED Viewed

@@ -1,8 +1,9 @@
 {
-  "_name_or_path": "/home/kibrq/sampling-from-normal-closure/results/commutator-translation/4-free-group/bert-bert-250/checkpoint-50000",
   "architectures": [
     "EncoderDecoderModel"
   ],
   "decoder": {
     "_name_or_path": "",
     "add_cross_attention": true,
@@ -53,7 +54,7 @@
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
-    "pad_token_id": 12,
     "position_embedding_type": "absolute",
     "prefix": null,
     "problem_type": null,
@@ -78,7 +79,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 13
   },
   "decoder_start_token_id": 11,
   "encoder": {
@@ -131,7 +132,7 @@
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
-    "pad_token_id": 12,
     "position_embedding_type": "absolute",
     "prefix": null,
     "problem_type": null,
@@ -156,11 +157,12 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
-    "vocab_size": 13
   },
   "is_encoder_decoder": true,
   "model_type": "encoder-decoder",
-  "pad_token_id": 12,
   "torch_dtype": "float32",
   "transformers_version": null
 }

 {
+  "_name_or_path": "/home/kibrq/draft/commutator-translator/bert-bert-512/checkpoint-419000",
   "architectures": [
     "EncoderDecoderModel"
   ],
+  "bos_token_id": 11,
   "decoder": {
     "_name_or_path": "",
     "add_cross_attention": true,
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
+    "pad_token_id": 13,
     "position_embedding_type": "absolute",
     "prefix": null,
     "problem_type": null,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 14
   },
   "decoder_start_token_id": 11,
   "encoder": {
     "output_attentions": false,
     "output_hidden_states": false,
     "output_scores": false,
+    "pad_token_id": 13,
     "position_embedding_type": "absolute",
     "prefix": null,
     "problem_type": null,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_cache": true,
+    "vocab_size": 14
   },
+  "eos_token_id": 12,
   "is_encoder_decoder": true,
   "model_type": "encoder-decoder",
+  "pad_token_id": 13,
   "torch_dtype": "float32",
   "transformers_version": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d22f6c52b973dd2e02be4e76967ed8ceab03c46731d8f144d1a405e76a60aab0
-size 23610219

 version https://git-lfs.github.com/spec/v1
+oid sha256:e02c5c9098359dd6f2cb71eb401ca5889935b907919d2fbde3564fa71e894817
+size 23603883

special_tokens_map.json CHANGED Viewed

	@@ -1 +1,5 @@
1	- {}

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>"
+}

tokenizer.json CHANGED Viewed

@@ -3,9 +3,81 @@
   "truncation": null,
   "padding": null,
   "added_tokens": [
     {
       "id": 8,
-      "content": ",",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -14,7 +86,7 @@
     },
     {
       "id": 9,
-      "content": "[",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -23,7 +95,7 @@
     },
     {
       "id": 10,
-      "content": "]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -41,6 +113,15 @@
     },
     {
       "id": 12,
       "content": "<pad>",
       "single_word": false,
       "lstrip": false,
@@ -61,6 +142,12 @@
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [
       {
         "Sequence": {
           "id": "A",
@@ -69,7 +156,7 @@
       },
       {
         "SpecialToken": {
-          "id": "<s>",
           "type_id": 0
         }
       }
@@ -89,6 +176,15 @@
       }
     ],
     "special_tokens": {
       "<s>": {
         "id": "<s>",
         "ids": [
@@ -103,16 +199,7 @@
   "decoder": null,
   "model": {
     "type": "WordLevel",
-    "vocab": {
-      "-4": 0,
-      "-3": 1,
-      "-2": 2,
-      "-1": 3,
-      "1": 4,
-      "2": 5,
-      "3": 6,
-      "4": 7
-    },
     "unk_token": "<unk>"
   }
 }

   "truncation": null,
   "padding": null,
   "added_tokens": [
+    {
+      "id": 0,
+      "content": "1",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 1,
+      "content": "-1",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 2,
+      "content": "2",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 3,
+      "content": "-2",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 4,
+      "content": "3",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 5,
+      "content": "-3",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 6,
+      "content": "4",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 7,
+      "content": "-4",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
     {
       "id": 8,
+      "content": "[",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 9,
+      "content": "]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 10,
+      "content": ",",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 12,
+      "content": "</s>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 13,
       "content": "<pad>",
       "single_word": false,
       "lstrip": false,
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [
+      {
+        "SpecialToken": {
+          "id": "<s>",
+          "type_id": 0
+        }
+      },
       {
         "Sequence": {
           "id": "A",
       },
       {
         "SpecialToken": {
+          "id": "</s>",
           "type_id": 0
         }
       }
       }
     ],
     "special_tokens": {
+      "</s>": {
+        "id": "</s>",
+        "ids": [
+          12
+        ],
+        "tokens": [
+          "</s>"
+        ]
+      },
       "<s>": {
         "id": "<s>",
         "ids": [
   "decoder": null,
   "model": {
     "type": "WordLevel",
+    "vocab": {},
     "unk_token": "<unk>"
   }
 }

tokenizer_config.json CHANGED Viewed

@@ -1,3 +1,8 @@
 {
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

 {
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "name_or_path": "/home/kibrq/draft/tokenizer/word-level-tokenizer-4",
+  "pad_token": "<pad>",
+  "special_tokens_map_file": "/home/kibrq/draft/tokenizer/word-level-tokenizer-4/special_tokens_map.json",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }