Locutusque commited on
Commit
2d1692a
1 Parent(s): 2e8c387

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +7 -0
tokenizer_config.json CHANGED
@@ -317,8 +317,15 @@
317
  "bos_token": "<|endoftext|>",
318
  "clean_up_tokenization_spaces": true,
319
  "eos_token": "<|endoftext|>",
 
320
  "model_max_length": 2048,
 
321
  "pad_token": "<|endoftext|>",
 
 
 
322
  "tokenizer_class": "CodeGenTokenizer",
 
 
323
  "unk_token": "<|endoftext|>"
324
  }
 
317
  "bos_token": "<|endoftext|>",
318
  "clean_up_tokenization_spaces": true,
319
  "eos_token": "<|endoftext|>",
320
+ "max_length": 512,
321
  "model_max_length": 2048,
322
+ "pad_to_multiple_of": null,
323
  "pad_token": "<|endoftext|>",
324
+ "pad_token_type_id": 0,
325
+ "padding_side": "right",
326
+ "stride": 0,
327
  "tokenizer_class": "CodeGenTokenizer",
328
+ "truncation_side": "right",
329
+ "truncation_strategy": "longest_first",
330
  "unk_token": "<|endoftext|>"
331
  }