MichelNivard commited on
Commit
cad288d
1 Parent(s): d3e26d6

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 32768
3
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "<|endoftext|>",
3
+ "pad_token": "[PAD]",
4
+ "unk_token": "<|unk|>"
5
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e1ba8b7df0701723d2d901c7a42182fe77bf0045173f2cdb474ca6ea3eb1c02
3
+ size 707660
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoTokenizer": [
4
+ "replit/replit-code-v1-3b--replit_lm_tokenizer.ReplitLMTokenizer",
5
+ null
6
+ ]
7
+ },
8
+ "bos_token": null,
9
+ "clean_up_tokenization_spaces": false,
10
+ "eos_token": "<|endoftext|>",
11
+ "model_max_length": 2048,
12
+ "pad_token": "<|pad|>",
13
+ "padding_side": "right",
14
+ "sep_token": null,
15
+ "sp_model_kwargs": {},
16
+ "tokenizer_class": "ReplitLMTokenizer",
17
+ "unk_token": "<|unk|>"
18
+ }