mattbonnell commited on
Commit
32ad5e6
·
verified ·
1 Parent(s): a7e43a2

Training in progress, step 15000, checkpoint

Browse files
last-checkpoint/global_step15000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fab0a63b3ac16d6f94d5f0f35a8502fdc5bcc8b548d6aad2c7d44d818ba8b498
3
+ size 197282509
last-checkpoint/global_step15000/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:419156b9b803fc2e093681e99198c30c3374b4ab2c00b50d36abe443fd2e571d
3
+ size 180416968
last-checkpoint/global_step15000/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0d28cbb974efed0531d71b009d0cce220c7a463f2932322c26fdd886eb76612
3
+ size 180416776
last-checkpoint/global_step15000/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14969eec15404f14580812284af2f9dfdaf3d876b031afdb33a017e8ea88f83
3
+ size 180416776
last-checkpoint/global_step15000/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bec6a62d1338f558c492ba7f375dca5db9d73f85ce67c1225eb6414af7b4c19
3
+ size 180416904
last-checkpoint/global_step15000/zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba49ad34558a56e4299a444243b9bfd826b1ab2e1f2cfe0b7d60837554b2cc3f
3
+ size 180416712
last-checkpoint/global_step15000/zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05de279c3af47689f94d74717c0d293c73f5846394ddb9c7de03c96cb220e28f
3
+ size 180417096
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step14500
 
1
+ global_step15000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdfe5d80b8466f67df68d58c1f025b2a9248b3623b683f12c5053bf268eeaecf
3
  size 188836816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7751a0b7a8cc9049584600df855e963a7058189eba36e4a9a6a6c3bf0dcd0a5a
3
  size 188836816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f8e41b22f51ba8429f45cf70fdc7db78f1bf8e6f36ab058a9a566a8813e2f5c
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6999d7fefb97e6a9a099cd30916802bc69ef7a40283d48060dd44d4594d50081
3
+ size 15536
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf42681b58e346c56679ddfd584610ba1aaf67c13a9383e2eda2adf9f37b06de
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcab6b00011077d0bd66c568639f31df0fa198017171e68b52aea1079faea821
3
  size 15536
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d188a9ea6ce32f82a365fe7cfe87b7a82a5e648b2675220f89e4c888ca309928
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b0ae0abb6b790ed8ac27b65cea139e3676b521cf203d1e2285b653dd58c7fb
3
  size 15472
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0979f7770405cf2858c7a61eea82710064015ee25dbeb2024c5c8b7cb1c700b7
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a4137a91687576c9e4b6b3a040c0affa99daa815a899287efc0849b18471f66
3
  size 15536
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a56e5d2d62658862b7f732d1f40c379802471cffbd4263f7d988598ef0553ae
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047b316b318f4373add1f5f3e4149c3d9b4a629a8627239189aa5dd5bb6920b3
3
  size 15536
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c26b8af937481fdefd1349b522c921efe27435606e855a5251653f860a6693d
3
- size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9297cb3deb97e247f257c18cfcbeb0473ce2a067efd2926a19f8087aa5515f2c
3
+ size 15472
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17364c7ad99e967fab84faa6f13827b1965ab3b2872701cc727ec583c0ae20ba
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf09ca0b8297e8f1a255617385be78112ae10fd31037fd3bd56d97071a010fe5
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 391.8918918918919,
5
  "eval_steps": 1500,
6
- "global_step": 14500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2109,6 +2109,85 @@
2109
  "learning_rate": 0.0001,
2110
  "loss": 0.0138,
2111
  "step": 14500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2112
  }
2113
  ],
2114
  "logging_steps": 50,
@@ -2128,7 +2207,7 @@
2128
  "attributes": {}
2129
  }
2130
  },
2131
- "total_flos": 4.070280500572114e+20,
2132
  "train_batch_size": 64,
2133
  "trial_name": null,
2134
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 405.4054054054054,
5
  "eval_steps": 1500,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2109
  "learning_rate": 0.0001,
2110
  "loss": 0.0138,
2111
  "step": 14500
2112
+ },
2113
+ {
2114
+ "epoch": 393.2432432432432,
2115
+ "grad_norm": 0.23838981986045837,
2116
+ "learning_rate": 0.0001,
2117
+ "loss": 0.0137,
2118
+ "step": 14550
2119
+ },
2120
+ {
2121
+ "epoch": 394.5945945945946,
2122
+ "grad_norm": 0.2961066663265228,
2123
+ "learning_rate": 0.0001,
2124
+ "loss": 0.0136,
2125
+ "step": 14600
2126
+ },
2127
+ {
2128
+ "epoch": 395.94594594594594,
2129
+ "grad_norm": 0.24311979115009308,
2130
+ "learning_rate": 0.0001,
2131
+ "loss": 0.0133,
2132
+ "step": 14650
2133
+ },
2134
+ {
2135
+ "epoch": 397.2972972972973,
2136
+ "grad_norm": 0.3343033492565155,
2137
+ "learning_rate": 0.0001,
2138
+ "loss": 0.0138,
2139
+ "step": 14700
2140
+ },
2141
+ {
2142
+ "epoch": 398.64864864864865,
2143
+ "grad_norm": 0.23256798088550568,
2144
+ "learning_rate": 0.0001,
2145
+ "loss": 0.0133,
2146
+ "step": 14750
2147
+ },
2148
+ {
2149
+ "epoch": 400.0,
2150
+ "grad_norm": 0.31679514050483704,
2151
+ "learning_rate": 0.0001,
2152
+ "loss": 0.0131,
2153
+ "step": 14800
2154
+ },
2155
+ {
2156
+ "epoch": 401.35135135135135,
2157
+ "grad_norm": 0.24046526849269867,
2158
+ "learning_rate": 0.0001,
2159
+ "loss": 0.0115,
2160
+ "step": 14850
2161
+ },
2162
+ {
2163
+ "epoch": 402.7027027027027,
2164
+ "grad_norm": 0.2563251852989197,
2165
+ "learning_rate": 0.0001,
2166
+ "loss": 0.0121,
2167
+ "step": 14900
2168
+ },
2169
+ {
2170
+ "epoch": 404.05405405405406,
2171
+ "grad_norm": 0.18860304355621338,
2172
+ "learning_rate": 0.0001,
2173
+ "loss": 0.0118,
2174
+ "step": 14950
2175
+ },
2176
+ {
2177
+ "epoch": 405.4054054054054,
2178
+ "grad_norm": 0.27949538826942444,
2179
+ "learning_rate": 0.0001,
2180
+ "loss": 0.0117,
2181
+ "step": 15000
2182
+ },
2183
+ {
2184
+ "epoch": 405.4054054054054,
2185
+ "eval_loss": 0.7209838628768921,
2186
+ "eval_runtime": 55.6252,
2187
+ "eval_samples_per_second": 28.171,
2188
+ "eval_steps_per_second": 0.09,
2189
+ "eval_wer": 0.19634897049458713,
2190
+ "step": 15000
2191
  }
2192
  ],
2193
  "logging_steps": 50,
 
2207
  "attributes": {}
2208
  }
2209
  },
2210
+ "total_flos": 4.210588042772464e+20,
2211
  "train_batch_size": 64,
2212
  "trial_name": null,
2213
  "trial_params": null