mary905el commited on
Commit
c7de8d4
1 Parent(s): c3708d6
Files changed (1) hide show
  1. config.json +11 -1
config.json CHANGED
@@ -1,10 +1,12 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
  "bos_token_id": 50256,
 
8
  "embd_pdrop": 0.1,
9
  "eos_token_id": 50261,
10
  "gradient_checkpointing": false,
@@ -17,11 +19,19 @@
17
  "n_inner": null,
18
  "n_layer": 24,
19
  "n_positions": 2048,
 
 
20
  "resid_pdrop": 0.1,
 
 
 
21
  "summary_activation": null,
22
  "summary_first_dropout": 0.1,
23
  "summary_proj_to_labels": true,
24
  "summary_type": "cls_index",
25
  "summary_use_proj": true,
 
 
 
26
  "vocab_size": 50265
27
- }
 
1
  {
2
+ "_name_or_path": "test_trainer/checkpoint-6800",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
9
+ "decoder_start_token_id": 50263,
10
  "embd_pdrop": 0.1,
11
  "eos_token_id": 50261,
12
  "gradient_checkpointing": false,
 
19
  "n_inner": null,
20
  "n_layer": 24,
21
  "n_positions": 2048,
22
+ "pad_token_id": 50262,
23
+ "reorder_and_upcast_attn": false,
24
  "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "sep_token_id": 50261,
28
  "summary_activation": null,
29
  "summary_first_dropout": 0.1,
30
  "summary_proj_to_labels": true,
31
  "summary_type": "cls_index",
32
  "summary_use_proj": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.17.0",
35
+ "use_cache": true,
36
  "vocab_size": 50265
37
+ }