RonanMcGovern commited on
Commit
8568264
1 Parent(s): f569ce3

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -7,5 +7,5 @@
7
  "single_word": false
8
  },
9
  "eos_token": "<|eot_id|>",
10
- "pad_token": "<|pad|>"
11
  }
 
7
  "single_word": false
8
  },
9
  "eos_token": "<|eot_id|>",
10
+ "pad_token": "<|eot_id|>"
11
  }
tokenizer.json CHANGED
@@ -2306,15 +2306,6 @@
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
2309
- },
2310
- {
2311
- "id": 128256,
2312
- "content": "<|pad|>",
2313
- "single_word": false,
2314
- "lstrip": false,
2315
- "rstrip": false,
2316
- "normalized": false,
2317
- "special": true
2318
  }
2319
  ],
2320
  "normalizer": null,
 
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
 
 
 
 
 
 
 
 
 
2309
  }
2310
  ],
2311
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -2047,14 +2047,6 @@
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
2050
- },
2051
- "128256": {
2052
- "content": "<|pad|>",
2053
- "lstrip": false,
2054
- "normalized": false,
2055
- "rstrip": false,
2056
- "single_word": false,
2057
- "special": true
2058
  }
2059
  },
2060
  "bos_token": "<|begin_of_text|>",
@@ -2065,8 +2057,7 @@
2065
  "input_ids",
2066
  "attention_mask"
2067
  ],
2068
- "model_max_length": 8192,
2069
- "pad_token": "<|pad|>",
2070
- "padding_side": "left",
2071
  "tokenizer_class": "PreTrainedTokenizerFast"
2072
  }
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
 
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
+ "model_max_length": 1000000000000000019884624838656,
2061
+ "pad_token": "<|eot_id|>",
 
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }