RyanJT commited on
Commit
fabb051
1 Parent(s): 26c69bf

Upload folder using huggingface_hub

Browse files
added_tokens.json CHANGED
@@ -1,3 +1,7 @@
1
  {
2
- "[PAD]": 50257
 
 
 
 
3
  }
 
1
  {
2
+ "[BOS]": 50258,
3
+ "[EOS]": 50259,
4
+ "[PAD]": 50257,
5
+ "[SPECIAL1]": 50260,
6
+ "[SPECIAL2]": 50261
7
  }
config.json CHANGED
@@ -5,9 +5,9 @@
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
- "bos_token_id": 50256,
9
  "embd_pdrop": 0.1,
10
- "eos_token_id": 50256,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
@@ -35,5 +35,5 @@
35
  "torch_dtype": "float32",
36
  "transformers_version": "4.42.0.dev0",
37
  "use_cache": true,
38
- "vocab_size": 50258
39
  }
 
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
+ "bos_token_id": 50258,
9
  "embd_pdrop": 0.1,
10
+ "eos_token_id": 50259,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
 
35
  "torch_dtype": "float32",
36
  "transformers_version": "4.42.0.dev0",
37
  "use_cache": true,
38
+ "vocab_size": 50262
39
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d71ac2acc0503279083b0b107a139cdc31b18b5cfd651c905fba22e0d84d53b
3
- size 31352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f089e8a1ea43139ea3294744b1f5c083de93c571f2695b08a8e54753bb11a619
3
+ size 656965030
special_tokens_map.json CHANGED
@@ -1,15 +1,31 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
- "content": "<|endoftext|>",
4
  "lstrip": false,
5
- "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
- "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[SPECIAL1]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "[SPECIAL2]",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
  "bos_token": {
19
+ "content": "[BOS]",
20
  "lstrip": false,
21
+ "normalized": false,
22
  "rstrip": false,
23
  "single_word": false
24
  },
25
  "eos_token": {
26
+ "content": "[EOS]",
27
  "lstrip": false,
28
+ "normalized": false,
29
  "rstrip": false,
30
  "single_word": false
31
  },
tokenizer_config.json CHANGED
@@ -17,11 +17,47 @@
17
  "rstrip": false,
18
  "single_word": false,
19
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
  },
22
- "bos_token": "<|endoftext|>",
 
 
 
 
23
  "clean_up_tokenization_spaces": true,
24
- "eos_token": "<|endoftext|>",
25
  "errors": "replace",
26
  "model_max_length": 1024,
27
  "pad_token": "[PAD]",
 
17
  "rstrip": false,
18
  "single_word": false,
19
  "special": true
20
+ },
21
+ "50258": {
22
+ "content": "[BOS]",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "50259": {
30
+ "content": "[EOS]",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "50260": {
38
+ "content": "[SPECIAL1]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "50261": {
46
+ "content": "[SPECIAL2]",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
  }
53
  },
54
+ "additional_special_tokens": [
55
+ "[SPECIAL1]",
56
+ "[SPECIAL2]"
57
+ ],
58
+ "bos_token": "[BOS]",
59
  "clean_up_tokenization_spaces": true,
60
+ "eos_token": "[EOS]",
61
  "errors": "replace",
62
  "model_max_length": 1024,
63
  "pad_token": "[PAD]",