k4black commited on
Commit
104d921
1 Parent(s): d2f4eba

Training in progress, step 6800

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19a0167071d6077d8f26fcfb7304a29ea809d034ec769f851d56de0c8bea89c8
3
  size 2843228158
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43b746406019bd322df48fd0f5832641bf2c23f192d1ba2c1bcf2cc6ec4661ff
3
  size 2843228158
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64bf1490035f1c45fec685049f0d6a1e6e790b80c14856fab49fcd0b7114687
3
  size 1421588461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70a2b61096d5fd16d601664f84ccd8cb72633596aa2bd5105a81863e667c6b3
3
  size 1421588461
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:493e658da36031d3825842e705fcbf542f533c87882496ee53fda9fcd47858eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11b41a45679a6cc286e868bec1372008710d641cbd28a995ea530dff638bd50
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda94d2ae823993ee852e2514dee8cef8ce45edf13d5b4966c68949e7923c4ab
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad34206adf67443db75af07189b4384fe36c9a4922d3cd2b4375c9ae9591e609
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49b37a86f5ceab52bac4fb464c21074a6c79d3fab0788b80ac9b85da08906513
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce44e4c1c23e749cd9382e42f9e5375956ba931527c483dc2fade7c594fb4a25
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.9206360860900841,
3
- "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-base/checkpoint-5200",
4
- "epoch": 0.6057781919850885,
5
- "global_step": 5200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -214,11 +214,75 @@
214
  "eval_samples_per_second": 974.901,
215
  "eval_steps_per_second": 15.254,
216
  "step": 5200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  }
218
  ],
219
  "max_steps": 25752,
220
  "num_train_epochs": 3,
221
- "total_flos": 3.2222917166779776e+16,
222
  "trial_name": null,
223
  "trial_params": null
224
  }
 
1
  {
2
+ "best_metric": 0.9274853061519247,
3
+ "best_model_checkpoint": "/home2/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli-base/checkpoint-6000",
4
+ "epoch": 0.7921714818266542,
5
+ "global_step": 6800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
214
  "eval_samples_per_second": 974.901,
215
  "eval_steps_per_second": 15.254,
216
  "step": 5200
217
+ },
218
+ {
219
+ "epoch": 0.65,
220
+ "learning_rate": 8.23945389143231e-06,
221
+ "loss": 0.288,
222
+ "step": 5600
223
+ },
224
+ {
225
+ "epoch": 0.65,
226
+ "eval_accuracy": 0.9222718959561065,
227
+ "eval_f1": 0.9220252130752401,
228
+ "eval_loss": 0.23495520651340485,
229
+ "eval_runtime": 9.9904,
230
+ "eval_samples_per_second": 985.146,
231
+ "eval_steps_per_second": 15.415,
232
+ "step": 5600
233
+ },
234
+ {
235
+ "epoch": 0.7,
236
+ "learning_rate": 8.07635709614127e-06,
237
+ "loss": 0.2746,
238
+ "step": 6000
239
+ },
240
+ {
241
+ "epoch": 0.7,
242
+ "eval_accuracy": 0.9277585856533225,
243
+ "eval_f1": 0.9274853061519247,
244
+ "eval_loss": 0.22077496349811554,
245
+ "eval_runtime": 9.9415,
246
+ "eval_samples_per_second": 989.988,
247
+ "eval_steps_per_second": 15.491,
248
+ "step": 6000
249
+ },
250
+ {
251
+ "epoch": 0.75,
252
+ "learning_rate": 7.912851536952257e-06,
253
+ "loss": 0.2756,
254
+ "step": 6400
255
+ },
256
+ {
257
+ "epoch": 0.75,
258
+ "eval_accuracy": 0.9215606584027637,
259
+ "eval_f1": 0.9208559714907353,
260
+ "eval_loss": 0.23040013015270233,
261
+ "eval_runtime": 9.916,
262
+ "eval_samples_per_second": 992.538,
263
+ "eval_steps_per_second": 15.53,
264
+ "step": 6400
265
+ },
266
+ {
267
+ "epoch": 0.79,
268
+ "learning_rate": 7.749345977763243e-06,
269
+ "loss": 0.272,
270
+ "step": 6800
271
+ },
272
+ {
273
+ "epoch": 0.79,
274
+ "eval_accuracy": 0.9237959764275554,
275
+ "eval_f1": 0.9236808650336354,
276
+ "eval_loss": 0.2243068963289261,
277
+ "eval_runtime": 11.5455,
278
+ "eval_samples_per_second": 852.451,
279
+ "eval_steps_per_second": 13.338,
280
+ "step": 6800
281
  }
282
  ],
283
  "max_steps": 25752,
284
  "num_train_epochs": 3,
285
+ "total_flos": 4.208639555132851e+16,
286
  "trial_name": null,
287
  "trial_params": null
288
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64bf1490035f1c45fec685049f0d6a1e6e790b80c14856fab49fcd0b7114687
3
  size 1421588461
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70a2b61096d5fd16d601664f84ccd8cb72633596aa2bd5105a81863e667c6b3
3
  size 1421588461