stefania-radu commited on
Commit
c8065c4
1 Parent(s): 5791552

Training in progress, step 300000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86d436dc49f5f51ea77f6836f11593647aadd3d2d015e3b7016978a45cdea4be
3
  size 893441530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e419578f524e1611c3d5902d97ccf8efc92603d3b33f13949516891bb5476e00
3
  size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30e23f392129c2b6012431f5749433f649dc915f8d436528a227eb4caa64cc8
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3926e99c54fb7c961027b62d11498ced043c43266a692f441538601c7a5f10f7
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d397fa0d192b129facc18c0551253ed4f60480e5eab4c8aacf636029150284
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42cb907bbd858ffd2b5bae4767d672ae3b753bd7ac85cb51990a0992d4df69a
3
+ size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e51ba9fab0cc2d23119085e21856e86faf00d221719139ebcc44bc3aa4c220a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b25c62270eb67709cff9418808f83a4a7710e7ce508a964ce593dde6417e23
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.29,
5
- "global_step": 290000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2152,11 +2152,85 @@
2152
  "eval_samples_per_second": 103.835,
2153
  "eval_steps_per_second": 12.979,
2154
  "step": 290000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2155
  }
2156
  ],
2157
  "max_steps": 1000000,
2158
  "num_train_epochs": 9223372036854775807,
2159
- "total_flos": 2.56783695023702e+21,
2160
  "trial_name": null,
2161
  "trial_params": null
2162
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3,
5
+ "global_step": 300000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2152
  "eval_samples_per_second": 103.835,
2153
  "eval_steps_per_second": 12.979,
2154
  "step": 290000
2155
+ },
2156
+ {
2157
+ "epoch": 0.29,
2158
+ "learning_rate": 1.7432579625275767e-05,
2159
+ "loss": 0.3557,
2160
+ "step": 291000
2161
+ },
2162
+ {
2163
+ "epoch": 0.29,
2164
+ "learning_rate": 1.7422214882158484e-05,
2165
+ "loss": 0.3569,
2166
+ "step": 292000
2167
+ },
2168
+ {
2169
+ "epoch": 0.29,
2170
+ "learning_rate": 1.7411816815187455e-05,
2171
+ "loss": 0.3564,
2172
+ "step": 293000
2173
+ },
2174
+ {
2175
+ "epoch": 0.29,
2176
+ "learning_rate": 1.7401385538074276e-05,
2177
+ "loss": 0.3574,
2178
+ "step": 294000
2179
+ },
2180
+ {
2181
+ "epoch": 0.29,
2182
+ "learning_rate": 1.7390921164893724e-05,
2183
+ "loss": 0.3573,
2184
+ "step": 295000
2185
+ },
2186
+ {
2187
+ "epoch": 0.29,
2188
+ "eval_runtime": 3244.7523,
2189
+ "eval_samples_per_second": 104.677,
2190
+ "eval_steps_per_second": 13.085,
2191
+ "step": 295000
2192
+ },
2193
+ {
2194
+ "epoch": 0.3,
2195
+ "learning_rate": 1.7380423810082507e-05,
2196
+ "loss": 0.353,
2197
+ "step": 296000
2198
+ },
2199
+ {
2200
+ "epoch": 0.3,
2201
+ "learning_rate": 1.7369893588438012e-05,
2202
+ "loss": 0.3568,
2203
+ "step": 297000
2204
+ },
2205
+ {
2206
+ "epoch": 0.3,
2207
+ "learning_rate": 1.7359330615117058e-05,
2208
+ "loss": 0.3537,
2209
+ "step": 298000
2210
+ },
2211
+ {
2212
+ "epoch": 0.3,
2213
+ "learning_rate": 1.734873500563463e-05,
2214
+ "loss": 0.3576,
2215
+ "step": 299000
2216
+ },
2217
+ {
2218
+ "epoch": 0.3,
2219
+ "learning_rate": 1.7338106875862617e-05,
2220
+ "loss": 0.3575,
2221
+ "step": 300000
2222
+ },
2223
+ {
2224
+ "epoch": 0.3,
2225
+ "eval_runtime": 3130.8127,
2226
+ "eval_samples_per_second": 108.487,
2227
+ "eval_steps_per_second": 13.561,
2228
+ "step": 300000
2229
  }
2230
  ],
2231
  "max_steps": 1000000,
2232
  "num_train_epochs": 9223372036854775807,
2233
+ "total_flos": 2.656383051969331e+21,
2234
  "trial_name": null,
2235
  "trial_params": null
2236
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d30e23f392129c2b6012431f5749433f649dc915f8d436528a227eb4caa64cc8
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3926e99c54fb7c961027b62d11498ced043c43266a692f441538601c7a5f10f7
3
  size 454197066