aidystark commited on
Commit
b9ab9e7
1 Parent(s): ecaf05d

Upload tokenizer

Browse files
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [],
6
+ "normalizer": {
7
+ "type": "NFKC"
8
+ },
9
+ "pre_tokenizer": {
10
+ "type": "Metaspace",
11
+ "replacement": "▁",
12
+ "prepend_scheme": "always",
13
+ "split": true
14
+ },
15
+ "post_processor": null,
16
+ "decoder": {
17
+ "type": "Metaspace",
18
+ "replacement": "▁",
19
+ "prepend_scheme": "always",
20
+ "split": true
21
+ },
22
+ "model": {
23
+ "type": "BPE",
24
+ "dropout": null,
25
+ "unk_token": "<unk>",
26
+ "continuing_subword_prefix": null,
27
+ "end_of_word_suffix": null,
28
+ "fuse_unk": false,
29
+ "byte_fallback": false,
30
+ "ignore_merges": false,
31
+ "vocab": {},
32
+ "merges": []
33
+ }
34
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {},
3
+ "clean_up_tokenization_spaces": true,
4
+ "model_max_length": 1000000000000000019884624838656,
5
+ "tokenizer_class": "PreTrainedTokenizerFast"
6
+ }