bigmorning commited on
Commit
ff84a6d
1 Parent(s): b00a02a

add tokenizer

Browse files
.gitattributes CHANGED
@@ -29,3 +29,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
33
+ vocab.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 27671
3
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "<|endoftext|>",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4028d2189159ae52d62a38d4b0cd2d572e497c904b0ad364f228c6cdffc2197
3
+ size 635247
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": true,
6
+ "eos_token": "<|endoftext|>",
7
+ "mask_token": "[MASK]",
8
+ "name_or_path": "bigmorning/distilgpt_new2_0060",
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_token": [
12
+ "[PAD]",
13
+ "[UNK]",
14
+ "[CLS]",
15
+ "[SEP]",
16
+ "[MASK]"
17
+ ],
18
+ "special_tokens_map_file": "/root/.cache/huggingface/transformers/f9dd91ec01fc25f5ae3f4dd46e36ac7fc9a7c2e42c82a107dca01bc6b97764ec.7da70648c6cb9951e284c9685f9ba7ae083dd59ed1d6d84bdfc0584a4ea94b6d",
19
+ "strip_accents": null,
20
+ "tokenize_chinese_chars": true,
21
+ "tokenizer_class": "GPT2Tokenizer",
22
+ "unk_token": "[UNK]"
23
+ }
vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1389526b69c5a65ba6c9d3b7fe2ad76192337ca89162d6f448e80994e5889fe
3
+ size 412436