allenai
/

llama-3-tulu-2-dpo-8b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

hamishivi commited on Aug 5, 2024

Commit

5c45008

·

verified ·

1 Parent(s): 4053f74

Upload 3 files

Files changed (3) hide show

special_tokens_map.json +21 -2
tokenizer.json +3 -2
tokenizer_config.json +4 -3

special_tokens_map.json CHANGED Viewed

@@ -1,4 +1,23 @@
 {
-  "bos_token": "<|begin_of_text|>",
-  "eos_token": "<|end_of_text|>"
 }

 {
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

@@ -2300,7 +2300,7 @@
     },
     {
       "id": 128255,
-      "content": "<|reserved_special_token_250|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -2348,6 +2348,7 @@
     "end_of_word_suffix": null,
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
       "!": 0,
       "\"": 1,
@@ -410500,4 +410501,4 @@
       "éĶ ¦"
     ]
   }
-}

     },
     {
       "id": 128255,
+      "content": "<pad>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     "end_of_word_suffix": null,
     "fuse_unk": false,
     "byte_fallback": false,
+    "ignore_merges": false,
     "vocab": {
       "!": 0,
       "\"": 1,
       "éĶ ¦"
     ]
   }
+}

tokenizer_config.json CHANGED Viewed

@@ -2041,7 +2041,7 @@
       "special": true
     },
     "128255": {
-      "content": "<|reserved_special_token_250|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -2050,6 +2050,7 @@
     }
   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "model_input_names": [
@@ -2057,6 +2058,6 @@
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
-  "tokenizer_class": "PreTrainedTokenizerFast",
-  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
 }

       "special": true
     },
     "128255": {
+      "content": "<pad>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
     }
   },
   "bos_token": "<|begin_of_text|>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "model_input_names": [
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "PreTrainedTokenizerFast"
 }