J38 commited on
Commit
f66a8a7
1 Parent(s): 936e987

checkpoint-134000 contents

Browse files
config.json CHANGED
@@ -1,7 +1,8 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
- "MistralGPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
7
  "bos_token_id": 50256,
@@ -19,7 +20,10 @@
19
  "n_positions": 1024,
20
  "n_special": 0,
21
  "predict_special_tokens": true,
 
22
  "resid_pdrop": 0.1,
 
 
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
25
  "summary_proj_to_labels": true,
@@ -31,7 +35,7 @@
31
  "max_length": 50
32
  }
33
  },
34
- "transformers_version": "4.5.0",
35
  "use_cache": false,
36
  "vocab_size": 50257
37
  }
 
1
  {
2
+ "_name_or_path": "stanford-crfm/beren-gpt2-medium-x49",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
+ "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
 
20
  "n_positions": 1024,
21
  "n_special": 0,
22
  "predict_special_tokens": true,
23
+ "reorder_and_upcast_attn": true,
24
  "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": true,
26
+ "scale_attn_weights": true,
27
  "summary_activation": null,
28
  "summary_first_dropout": 0.1,
29
  "summary_proj_to_labels": true,
 
35
  "max_length": 50
36
  }
37
  },
38
+ "transformers_version": "4.12.5",
39
  "use_cache": false,
40
  "vocab_size": 50257
41
  }
global_step134002/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad38830e67ec25987fda03d24ec4555bf7c7873d86fdd333b92ba5ff643c3e03
3
+ size 734884708
global_step134002/zero_pp_rank_0_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e792a70c4f0163cfa475767455c11d81bcc8a5bc48126457e23596decce43a6
3
+ size 266119230
global_step134002/zero_pp_rank_10_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a1a392d6d11c2732da21a90da7ed86668ff5b513cd711a5713c3fcb5cbdda9
3
+ size 266119230
global_step134002/zero_pp_rank_11_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22436b5a129bd494ef9f7d2e4b1486faf719ac2e2cf1eee288376a4536b6204
3
+ size 266119230
global_step134002/zero_pp_rank_12_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a8dbfed7e3c96f71578865890d6edbfc4a81ca0b34879ff3efdb32f7c714d3
3
+ size 266119230
global_step134002/zero_pp_rank_13_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71485daa15beb34d98b61b470d2bb26a3e54ea6a3f94f19808002433908865af
3
+ size 266119230
global_step134002/zero_pp_rank_14_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffede73946d49177b055f819e4a9ebf2f84ef4922233a092ed0f5d2f9a659b45
3
+ size 266119230
global_step134002/zero_pp_rank_15_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad252bbde1844e3a1c3d919654faceac5bc268ea49dd11490aefc40a883c570
3
+ size 266119230
global_step134002/zero_pp_rank_1_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a8df09b13ac54bdf67cc4c3f1f58b1d870fc15673f49ef582945fd64badd11
3
+ size 266119230
global_step134002/zero_pp_rank_2_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b21d333e6c8d8a8b7300d75db7c7c99e8e3fd9c4325286a400d651c25db3b6d9
3
+ size 266119230
global_step134002/zero_pp_rank_3_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0137bda59c3e1762d083b5f771bad13e500955aafba3e8576e0fe07320cbc1b4
3
+ size 266119230
global_step134002/zero_pp_rank_4_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a4e801925c9896c80815dc7a2904a70fde570480877e2f0c48dace7f3ac6066
3
+ size 266119230
global_step134002/zero_pp_rank_5_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd76d2d911c0e4badb9cbc1e7c47fa9be75a364d29b2a41ca1217edbfbedc7de
3
+ size 266119230
global_step134002/zero_pp_rank_6_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1763ed3e9b54a098ce5bf51304e1f0804cf1dc1e2e32084badf8465147c011
3
+ size 266119230
global_step134002/zero_pp_rank_7_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57cfac681c2c794fcf74abea99f85c894af611396a5bdf2d3994b48f28af6c47
3
+ size 266119230
global_step134002/zero_pp_rank_8_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcfeb106b11f3c0043a24028cc0498633106c7315ed491da0c972e088d2ccc9
3
+ size 266119230
global_step134002/zero_pp_rank_9_mp_rank_00optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc00ca2e7e097e17744d949a68c14753048d71bd158928becc74cb366c2dcb93
3
+ size 266119230
latest CHANGED
@@ -1 +1 @@
1
- global_step9000
 
1
+ global_step134002
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e763c64bfe4d20aab8d7c2d03732d5105a4fbe190b8993fa7036ccf3fe9c044
3
- size 734883945
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5b739b03d29ce988229216fbffd3b5eabe4216a8cf56d25800ef7b93811a12
3
+ size 734884388
trainer_state.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c298942206ad7652b047e60577afae423eeaf21ab73fa497b9e155dc5a27f823
3
- size 633606
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:120315ab7795345090636c1196460db4a68b16e526dec383b033f09eb9088abb
3
+ size 9374094