princeton-nlp
/

Llama-3-Instruct-8B-SimPO

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

xiamengzhou commited on Jun 17, 2024

Commit

b3d3584

·

1 Parent(s): 0c33894

update

Files changed (3) hide show

special_tokens_map.json +7 -0
tokenizer.json +4 -64
tokenizer_config.json +2 -1

special_tokens_map.json CHANGED Viewed

@@ -12,5 +12,12 @@
     "normalized": false,
     "rstrip": false,
     "single_word": false
   }
 }

     "normalized": false,
     "rstrip": false,
     "single_word": false
+  },
+  "pad_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
   }
 }

tokenizer.json CHANGED Viewed

@@ -2329,69 +2329,10 @@
     ]
   },
   "post_processor": {
-    "type": "Sequence",
-    "processors": [
-      {
-        "type": "ByteLevel",
-        "add_prefix_space": true,
-        "trim_offsets": false,
-        "use_regex": true
-      },
-      {
-        "type": "TemplateProcessing",
-        "single": [
-          {
-            "SpecialToken": {
-              "id": "<|begin_of_text|>",
-              "type_id": 0
-            }
-          },
-          {
-            "Sequence": {
-              "id": "A",
-              "type_id": 0
-            }
-          }
-        ],
-        "pair": [
-          {
-            "SpecialToken": {
-              "id": "<|begin_of_text|>",
-              "type_id": 0
-            }
-          },
-          {
-            "Sequence": {
-              "id": "A",
-              "type_id": 0
-            }
-          },
-          {
-            "SpecialToken": {
-              "id": "<|begin_of_text|>",
-              "type_id": 1
-            }
-          },
-          {
-            "Sequence": {
-              "id": "B",
-              "type_id": 1
-            }
-          }
-        ],
-        "special_tokens": {
-          "<|begin_of_text|>": {
-            "id": "<|begin_of_text|>",
-            "ids": [
-              128000
-            ],
-            "tokens": [
-              "<|begin_of_text|>"
-            ]
-          }
-        }
-      }
-    ]
   },
   "decoder": {
     "type": "ByteLevel",
@@ -2407,7 +2348,6 @@
     "end_of_word_suffix": null,
     "fuse_unk": false,
     "byte_fallback": false,
-    "ignore_merges": true,
     "vocab": {
       "!": 0,
       "\"": 1,

     ]
   },
   "post_processor": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": false,
+    "use_regex": true
   },
   "decoder": {
     "type": "ByteLevel",
     "end_of_word_suffix": null,
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
       "!": 0,
       "\"": 1,

tokenizer_config.json CHANGED Viewed

@@ -2057,6 +2057,7 @@
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 1000000000000000019884624838656,
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 2048,
+  "pad_token": "<|end_of_text|>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }