bigmorning commited on
Commit
67e3413
1 Parent(s): 2414ade

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 27671
3
+ }
special_tokens_map.json CHANGED
@@ -1,5 +1,7 @@
1
  {
 
2
  "cls_token": "[CLS]",
 
3
  "mask_token": "[MASK]",
4
  "pad_token": "[PAD]",
5
  "sep_token": "[SEP]",
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
  "cls_token": "[CLS]",
4
+ "eos_token": "<|endoftext|>",
5
  "mask_token": "[MASK]",
6
  "pad_token": "[PAD]",
7
  "sep_token": "[SEP]",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de5777e998e5c48ab0a00c26fc35218b989d44fe0f9dc7073c40f0da39505630
3
- size 634170
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4028d2189159ae52d62a38d4b0cd2d572e497c904b0ad364f228c6cdffc2197
3
+ size 635247
tokenizer_config.json CHANGED
@@ -1,7 +1,11 @@
1
  {
 
 
2
  "cls_token": "[CLS]",
3
  "do_lower_case": true,
 
4
  "mask_token": "[MASK]",
 
5
  "pad_token": "[PAD]",
6
  "sep_token": "[SEP]",
7
  "special_token": [
@@ -11,8 +15,9 @@
11
  "[SEP]",
12
  "[MASK]"
13
  ],
 
14
  "strip_accents": null,
15
  "tokenize_chinese_chars": true,
16
- "tokenizer_class": "DistilBertTokenizer",
17
  "unk_token": "[UNK]"
18
  }
 
1
  {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
  "cls_token": "[CLS]",
5
  "do_lower_case": true,
6
+ "eos_token": "<|endoftext|>",
7
  "mask_token": "[MASK]",
8
+ "name_or_path": "bigmorning/distilgpt_new2_0060",
9
  "pad_token": "[PAD]",
10
  "sep_token": "[SEP]",
11
  "special_token": [
 
15
  "[SEP]",
16
  "[MASK]"
17
  ],
18
+ "special_tokens_map_file": "/root/.cache/huggingface/transformers/f9dd91ec01fc25f5ae3f4dd46e36ac7fc9a7c2e42c82a107dca01bc6b97764ec.7da70648c6cb9951e284c9685f9ba7ae083dd59ed1d6d84bdfc0584a4ea94b6d",
19
  "strip_accents": null,
20
  "tokenize_chinese_chars": true,
21
+ "tokenizer_class": "GPT2Tokenizer",
22
  "unk_token": "[UNK]"
23
  }