SummerSigh commited on
Commit
121c1be
1 Parent(s): b0966d6

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +17 -2
  2. tokenizer_config.json +2 -3
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -293,6 +307,7 @@
293
  "continuing_subword_prefix": null,
294
  "end_of_word_suffix": null,
295
  "fuse_unk": false,
 
296
  "vocab": {
297
  "<|endoftext|>": 0,
298
  "<|padding|>": 1,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 300,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 300
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 50280,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
307
  "continuing_subword_prefix": null,
308
  "end_of_word_suffix": null,
309
  "fuse_unk": false,
310
+ "byte_fallback": false,
311
  "vocab": {
312
  "<|endoftext|>": 0,
313
  "<|padding|>": 1,
tokenizer_config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
 
4
  "eos_token": "<|endoftext|>",
5
- "model_max_length": 1000000000000000019884624838656,
6
- "name_or_path": "EleutherAI/pythia-410m",
7
- "special_tokens_map_file": "/admin/home-hailey/.cache/huggingface/hub/models--EleutherAI--gpt-neox-20b/snapshots/4e49eadb5d14bd22f314ec3f45b69a87b88c7691/special_tokens_map.json",
8
  "tokenizer_class": "GPTNeoXTokenizer",
9
  "unk_token": "<|endoftext|>"
10
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",
6
+ "model_max_length": 300,
 
 
7
  "tokenizer_class": "GPTNeoXTokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }