bobox commited on
Commit
6956e20
1 Parent(s): 8f0b1cf

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -908,6 +908,16 @@ You can finetune this model on your own dataset.
908
  | 0.8 | 14056 | 0.5737 | 0.4942 | 0.5019 | 0.1500 | - |
909
  | 0.9 | 15813 | 0.5896 | 0.4757 | 0.4804 | 0.1465 | - |
910
  | 1.0 | 17570 | 0.5174 | 0.5253 | 0.4587 | 0.0534 | - |
 
 
 
 
 
 
 
 
 
 
911
 
912
 
913
  ### Framework Versions
 
908
  | 0.8 | 14056 | 0.5737 | 0.4942 | 0.5019 | 0.1500 | - |
909
  | 0.9 | 15813 | 0.5896 | 0.4757 | 0.4804 | 0.1465 | - |
910
  | 1.0 | 17570 | 0.5174 | 0.5253 | 0.4587 | 0.0534 | - |
911
+ | 1.1 | 19327 | 0.5059 | 0.5493 | 0.4587 | 0.0278 | - |
912
+ | 1.2 | 21084 | 0.4654 | 0.4850 | 0.4415 | 0.0517 | - |
913
+ | 1.3 | 22841 | 0.4224 | 0.4292 | 0.3957 | 0.0938 | - |
914
+ | 1.4 | 24598 | 0.4125 | 0.4624 | 0.3794 | 0.0839 | - |
915
+ | 1.5 | 26355 | 0.4072 | 0.4481 | 0.3878 | 0.0681 | - |
916
+ | 1.6 | 28112 | 0.3572 | 0.4953 | 0.3716 | 0.0674 | - |
917
+ | 1.7 | 29869 | 0.371 | 0.4767 | 0.3622 | 0.0600 | - |
918
+ | 1.8 | 31626 | 0.3332 | 0.4659 | 0.3600 | 0.0561 | - |
919
+ | 1.9 | 33383 | 0.3695 | 0.4604 | 0.3567 | 0.0614 | - |
920
+ | 2.0 | 35140 | 0.3315 | 0.4712 | 0.3597 | 0.0540 | - |
921
 
922
 
923
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6e30cd1973ffd581d9841a4142304c15933bd2e35a8c8da4f5748dff2786807
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caaeeb3bee414e6382503e0e4566778971da5035d9883fa6c4013148b09c280c
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f56c48926de377ce9d4614f603c07171ebd1e50e5dd83e538cfc95f815b9f5c
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7823fbc9158972fe4a2cec91802737230154657abd47b1f0f2065b9a223127
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35737710db5d432ce9a1d2a028fb95e0f070338de7e2dd89045bd562c011764
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d04c63c792c38c2033263ef312d4b227d8d109c687d4efdc325699d09450706d
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6909db50d7352964f2947bab64e77e4b2204326b328911ba1924aee34ca6ed39
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292a922960da915a02f1436dd2a4aea0ac1e9584691974a2c6e06cc13c6d0a3b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 1757,
6
- "global_step": 17570,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -317,6 +317,316 @@
317
  "eval_qnli-contrastive_samples_per_second": 352.263,
318
  "eval_qnli-contrastive_steps_per_second": 22.053,
319
  "step": 17570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  }
321
  ],
322
  "logging_steps": 1757,
@@ -331,7 +641,7 @@
331
  "should_evaluate": false,
332
  "should_log": false,
333
  "should_save": true,
334
- "should_training_stop": false
335
  },
336
  "attributes": {}
337
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 1757,
6
+ "global_step": 35140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
317
  "eval_qnli-contrastive_samples_per_second": 352.263,
318
  "eval_qnli-contrastive_steps_per_second": 22.053,
319
  "step": 17570
320
+ },
321
+ {
322
+ "epoch": 1.1,
323
+ "grad_norm": 18.839372634887695,
324
+ "learning_rate": 9.75831232890717e-06,
325
+ "loss": 0.5059,
326
+ "step": 19327
327
+ },
328
+ {
329
+ "epoch": 1.1,
330
+ "eval_nli-pairs_loss": 0.45871272683143616,
331
+ "eval_nli-pairs_runtime": 22.8984,
332
+ "eval_nli-pairs_samples_per_second": 297.313,
333
+ "eval_nli-pairs_steps_per_second": 18.604,
334
+ "step": 19327
335
+ },
336
+ {
337
+ "epoch": 1.1,
338
+ "eval_scitail-pairs-pos_loss": 0.5492986440658569,
339
+ "eval_scitail-pairs-pos_runtime": 5.1782,
340
+ "eval_scitail-pairs-pos_samples_per_second": 251.824,
341
+ "eval_scitail-pairs-pos_steps_per_second": 15.836,
342
+ "step": 19327
343
+ },
344
+ {
345
+ "epoch": 1.1,
346
+ "eval_qnli-contrastive_loss": 0.027841920033097267,
347
+ "eval_qnli-contrastive_runtime": 15.522,
348
+ "eval_qnli-contrastive_samples_per_second": 351.952,
349
+ "eval_qnli-contrastive_steps_per_second": 22.033,
350
+ "step": 19327
351
+ },
352
+ {
353
+ "epoch": 1.2,
354
+ "grad_norm": 6.800241947174072,
355
+ "learning_rate": 9.051905444616243e-06,
356
+ "loss": 0.4654,
357
+ "step": 21084
358
+ },
359
+ {
360
+ "epoch": 1.2,
361
+ "eval_nli-pairs_loss": 0.44151321053504944,
362
+ "eval_nli-pairs_runtime": 23.1311,
363
+ "eval_nli-pairs_samples_per_second": 294.323,
364
+ "eval_nli-pairs_steps_per_second": 18.417,
365
+ "step": 21084
366
+ },
367
+ {
368
+ "epoch": 1.2,
369
+ "eval_scitail-pairs-pos_loss": 0.4850437045097351,
370
+ "eval_scitail-pairs-pos_runtime": 5.2939,
371
+ "eval_scitail-pairs-pos_samples_per_second": 246.321,
372
+ "eval_scitail-pairs-pos_steps_per_second": 15.49,
373
+ "step": 21084
374
+ },
375
+ {
376
+ "epoch": 1.2,
377
+ "eval_qnli-contrastive_loss": 0.05170624330639839,
378
+ "eval_qnli-contrastive_runtime": 15.7737,
379
+ "eval_qnli-contrastive_samples_per_second": 346.336,
380
+ "eval_qnli-contrastive_steps_per_second": 21.682,
381
+ "step": 21084
382
+ },
383
+ {
384
+ "epoch": 1.3,
385
+ "grad_norm": 0.41899746656417847,
386
+ "learning_rate": 7.948320938272786e-06,
387
+ "loss": 0.4224,
388
+ "step": 22841
389
+ },
390
+ {
391
+ "epoch": 1.3,
392
+ "eval_nli-pairs_loss": 0.39569494128227234,
393
+ "eval_nli-pairs_runtime": 23.2638,
394
+ "eval_nli-pairs_samples_per_second": 292.643,
395
+ "eval_nli-pairs_steps_per_second": 18.312,
396
+ "step": 22841
397
+ },
398
+ {
399
+ "epoch": 1.3,
400
+ "eval_scitail-pairs-pos_loss": 0.42922988533973694,
401
+ "eval_scitail-pairs-pos_runtime": 5.2769,
402
+ "eval_scitail-pairs-pos_samples_per_second": 247.114,
403
+ "eval_scitail-pairs-pos_steps_per_second": 15.539,
404
+ "step": 22841
405
+ },
406
+ {
407
+ "epoch": 1.3,
408
+ "eval_qnli-contrastive_loss": 0.0938122496008873,
409
+ "eval_qnli-contrastive_runtime": 15.6681,
410
+ "eval_qnli-contrastive_samples_per_second": 348.67,
411
+ "eval_qnli-contrastive_steps_per_second": 21.828,
412
+ "step": 22841
413
+ },
414
+ {
415
+ "epoch": 1.4,
416
+ "grad_norm": 3.0029168128967285,
417
+ "learning_rate": 6.556983832253587e-06,
418
+ "loss": 0.4125,
419
+ "step": 24598
420
+ },
421
+ {
422
+ "epoch": 1.4,
423
+ "eval_nli-pairs_loss": 0.3794442415237427,
424
+ "eval_nli-pairs_runtime": 23.2107,
425
+ "eval_nli-pairs_samples_per_second": 293.313,
426
+ "eval_nli-pairs_steps_per_second": 18.354,
427
+ "step": 24598
428
+ },
429
+ {
430
+ "epoch": 1.4,
431
+ "eval_scitail-pairs-pos_loss": 0.4623956084251404,
432
+ "eval_scitail-pairs-pos_runtime": 5.2884,
433
+ "eval_scitail-pairs-pos_samples_per_second": 246.577,
434
+ "eval_scitail-pairs-pos_steps_per_second": 15.506,
435
+ "step": 24598
436
+ },
437
+ {
438
+ "epoch": 1.4,
439
+ "eval_qnli-contrastive_loss": 0.0838843286037445,
440
+ "eval_qnli-contrastive_runtime": 15.7017,
441
+ "eval_qnli-contrastive_samples_per_second": 347.924,
442
+ "eval_qnli-contrastive_steps_per_second": 21.781,
443
+ "step": 24598
444
+ },
445
+ {
446
+ "epoch": 1.5,
447
+ "grad_norm": 10.91913890838623,
448
+ "learning_rate": 5.012516292320938e-06,
449
+ "loss": 0.4072,
450
+ "step": 26355
451
+ },
452
+ {
453
+ "epoch": 1.5,
454
+ "eval_nli-pairs_loss": 0.3877629041671753,
455
+ "eval_nli-pairs_runtime": 23.1072,
456
+ "eval_nli-pairs_samples_per_second": 294.627,
457
+ "eval_nli-pairs_steps_per_second": 18.436,
458
+ "step": 26355
459
+ },
460
+ {
461
+ "epoch": 1.5,
462
+ "eval_scitail-pairs-pos_loss": 0.4480924606323242,
463
+ "eval_scitail-pairs-pos_runtime": 5.2741,
464
+ "eval_scitail-pairs-pos_samples_per_second": 247.244,
465
+ "eval_scitail-pairs-pos_steps_per_second": 15.548,
466
+ "step": 26355
467
+ },
468
+ {
469
+ "epoch": 1.5,
470
+ "eval_qnli-contrastive_loss": 0.06811495870351791,
471
+ "eval_qnli-contrastive_runtime": 15.7641,
472
+ "eval_qnli-contrastive_samples_per_second": 346.546,
473
+ "eval_qnli-contrastive_steps_per_second": 21.695,
474
+ "step": 26355
475
+ },
476
+ {
477
+ "epoch": 1.6,
478
+ "grad_norm": 3.676146984100342,
479
+ "learning_rate": 3.4668235704897813e-06,
480
+ "loss": 0.3572,
481
+ "step": 28112
482
+ },
483
+ {
484
+ "epoch": 1.6,
485
+ "eval_nli-pairs_loss": 0.3715905547142029,
486
+ "eval_nli-pairs_runtime": 23.1744,
487
+ "eval_nli-pairs_samples_per_second": 293.773,
488
+ "eval_nli-pairs_steps_per_second": 18.382,
489
+ "step": 28112
490
+ },
491
+ {
492
+ "epoch": 1.6,
493
+ "eval_scitail-pairs-pos_loss": 0.49534013867378235,
494
+ "eval_scitail-pairs-pos_runtime": 5.2856,
495
+ "eval_scitail-pairs-pos_samples_per_second": 246.708,
496
+ "eval_scitail-pairs-pos_steps_per_second": 15.514,
497
+ "step": 28112
498
+ },
499
+ {
500
+ "epoch": 1.6,
501
+ "eval_qnli-contrastive_loss": 0.06735851615667343,
502
+ "eval_qnli-contrastive_runtime": 15.7308,
503
+ "eval_qnli-contrastive_samples_per_second": 347.281,
504
+ "eval_qnli-contrastive_steps_per_second": 21.741,
505
+ "step": 28112
506
+ },
507
+ {
508
+ "epoch": 1.7,
509
+ "grad_norm": 229.6580047607422,
510
+ "learning_rate": 2.072658211127134e-06,
511
+ "loss": 0.371,
512
+ "step": 29869
513
+ },
514
+ {
515
+ "epoch": 1.7,
516
+ "eval_nli-pairs_loss": 0.36217835545539856,
517
+ "eval_nli-pairs_runtime": 23.1495,
518
+ "eval_nli-pairs_samples_per_second": 294.089,
519
+ "eval_nli-pairs_steps_per_second": 18.402,
520
+ "step": 29869
521
+ },
522
+ {
523
+ "epoch": 1.7,
524
+ "eval_scitail-pairs-pos_loss": 0.47673526406288147,
525
+ "eval_scitail-pairs-pos_runtime": 5.2158,
526
+ "eval_scitail-pairs-pos_samples_per_second": 250.008,
527
+ "eval_scitail-pairs-pos_steps_per_second": 15.721,
528
+ "step": 29869
529
+ },
530
+ {
531
+ "epoch": 1.7,
532
+ "eval_qnli-contrastive_loss": 0.06000087782740593,
533
+ "eval_qnli-contrastive_runtime": 15.6328,
534
+ "eval_qnli-contrastive_samples_per_second": 349.458,
535
+ "eval_qnli-contrastive_steps_per_second": 21.877,
536
+ "step": 29869
537
+ },
538
+ {
539
+ "epoch": 1.8,
540
+ "grad_norm": 0.6022229194641113,
541
+ "learning_rate": 9.638670801112644e-07,
542
+ "loss": 0.3332,
543
+ "step": 31626
544
+ },
545
+ {
546
+ "epoch": 1.8,
547
+ "eval_nli-pairs_loss": 0.3600439131259918,
548
+ "eval_nli-pairs_runtime": 23.0874,
549
+ "eval_nli-pairs_samples_per_second": 294.879,
550
+ "eval_nli-pairs_steps_per_second": 18.452,
551
+ "step": 31626
552
+ },
553
+ {
554
+ "epoch": 1.8,
555
+ "eval_scitail-pairs-pos_loss": 0.465911865234375,
556
+ "eval_scitail-pairs-pos_runtime": 5.3369,
557
+ "eval_scitail-pairs-pos_samples_per_second": 244.338,
558
+ "eval_scitail-pairs-pos_steps_per_second": 15.365,
559
+ "step": 31626
560
+ },
561
+ {
562
+ "epoch": 1.8,
563
+ "eval_qnli-contrastive_loss": 0.05613844096660614,
564
+ "eval_qnli-contrastive_runtime": 15.7089,
565
+ "eval_qnli-contrastive_samples_per_second": 347.764,
566
+ "eval_qnli-contrastive_steps_per_second": 21.771,
567
+ "step": 31626
568
+ },
569
+ {
570
+ "epoch": 1.9,
571
+ "grad_norm": 0.23106251657009125,
572
+ "learning_rate": 2.4943593464921476e-07,
573
+ "loss": 0.3695,
574
+ "step": 33383
575
+ },
576
+ {
577
+ "epoch": 1.9,
578
+ "eval_nli-pairs_loss": 0.35667526721954346,
579
+ "eval_nli-pairs_runtime": 23.1588,
580
+ "eval_nli-pairs_samples_per_second": 293.971,
581
+ "eval_nli-pairs_steps_per_second": 18.395,
582
+ "step": 33383
583
+ },
584
+ {
585
+ "epoch": 1.9,
586
+ "eval_scitail-pairs-pos_loss": 0.4603894352912903,
587
+ "eval_scitail-pairs-pos_runtime": 5.248,
588
+ "eval_scitail-pairs-pos_samples_per_second": 248.476,
589
+ "eval_scitail-pairs-pos_steps_per_second": 15.625,
590
+ "step": 33383
591
+ },
592
+ {
593
+ "epoch": 1.9,
594
+ "eval_qnli-contrastive_loss": 0.06141861155629158,
595
+ "eval_qnli-contrastive_runtime": 15.6709,
596
+ "eval_qnli-contrastive_samples_per_second": 348.608,
597
+ "eval_qnli-contrastive_steps_per_second": 21.824,
598
+ "step": 33383
599
+ },
600
+ {
601
+ "epoch": 2.0,
602
+ "grad_norm": Infinity,
603
+ "learning_rate": 2.5896487759191624e-11,
604
+ "loss": 0.3315,
605
+ "step": 35140
606
+ },
607
+ {
608
+ "epoch": 2.0,
609
+ "eval_nli-pairs_loss": 0.3597075045108795,
610
+ "eval_nli-pairs_runtime": 23.1058,
611
+ "eval_nli-pairs_samples_per_second": 294.645,
612
+ "eval_nli-pairs_steps_per_second": 18.437,
613
+ "step": 35140
614
+ },
615
+ {
616
+ "epoch": 2.0,
617
+ "eval_scitail-pairs-pos_loss": 0.47120198607444763,
618
+ "eval_scitail-pairs-pos_runtime": 5.2532,
619
+ "eval_scitail-pairs-pos_samples_per_second": 248.23,
620
+ "eval_scitail-pairs-pos_steps_per_second": 15.61,
621
+ "step": 35140
622
+ },
623
+ {
624
+ "epoch": 2.0,
625
+ "eval_qnli-contrastive_loss": 0.05398999899625778,
626
+ "eval_qnli-contrastive_runtime": 15.7099,
627
+ "eval_qnli-contrastive_samples_per_second": 347.743,
628
+ "eval_qnli-contrastive_steps_per_second": 21.77,
629
+ "step": 35140
630
  }
631
  ],
632
  "logging_steps": 1757,
 
641
  "should_evaluate": false,
642
  "should_log": false,
643
  "should_save": true,
644
+ "should_training_stop": true
645
  },
646
  "attributes": {}
647
  }