J38 commited on
Commit
b0357d6
1 Parent(s): e090839

checkpoint-225000 contents

Browse files
Files changed (4) hide show
  1. config.json +6 -2
  2. latest +1 -1
  3. pytorch_model.bin +1 -1
  4. trainer_state.json +2 -2
config.json CHANGED
@@ -1,7 +1,8 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
- "MistralGPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
  "bos_token_id": 50256,
@@ -19,7 +20,10 @@
19
  "n_positions": 1024,
20
  "n_special": 0,
21
  "predict_special_tokens": true,
 
22
  "resid_pdrop": 0.1,
 
 
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
25
  "summary_proj_to_labels": true,
@@ -31,7 +35,7 @@
31
  "max_length": 50
32
  }
33
  },
34
- "transformers_version": "4.5.0",
35
  "use_cache": false,
36
  "vocab_size": 50257
37
  }
1
  {
2
+ "_name_or_path": "stanford-crfm/celebrimbor-gpt2-medium-x81",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
+ "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
20
  "n_positions": 1024,
21
  "n_special": 0,
22
  "predict_special_tokens": true,
23
+ "reorder_and_upcast_attn": true,
24
  "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": true,
26
+ "scale_attn_weights": true,
27
  "summary_activation": null,
28
  "summary_first_dropout": 0.1,
29
  "summary_proj_to_labels": true,
35
  "max_length": 50
36
  }
37
  },
38
+ "transformers_version": "4.12.5",
39
  "use_cache": false,
40
  "vocab_size": 50257
41
  }
latest CHANGED
@@ -1 +1 @@
1
- global_step224011
1
+ global_step225011
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb1387226a204beb240f551b88f183d99903f7fca6a85e1a0fb95b7fdaf5a242
3
  size 734885928
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eedc682dfbf68e284ce780a6e74b8ee35de86847a6d60f007c8294ffa85a40a5
3
  size 734885928
trainer_state.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab15f7b14dd53416bc7ad71fa88e82e50a944cde9da707484f962e51dba0c31d
3
- size 15668929
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c493251f1ae70afa04ce3ebce39ddc567b0cdee84e6ef527112bba84652d5361
3
+ size 15738950