danielhanchen commited on
Commit
6b87fd9
1 Parent(s): 3df376e

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -0
  2. tokenizer_config.json +5 -2
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -29,9 +32,9 @@
29
  "clean_up_tokenization_spaces": false,
30
  "eos_token": "</s>",
31
  "legacy": false,
32
- "model_max_length": 2048,
33
  "pad_token": "<unk>",
34
- "padding_side": "right",
35
  "sp_model_kwargs": {},
36
  "tokenizer_class": "LlamaTokenizer",
37
  "unk_token": "<unk>",
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
  "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
  "pad_token": "<unk>",
37
+ "padding_side": "left",
38
  "sp_model_kwargs": {},
39
  "tokenizer_class": "LlamaTokenizer",
40
  "unk_token": "<unk>",