NairaRahim commited on
Commit
0c5ac52
·
verified ·
1 Parent(s): e96d7de

Training in progress, epoch 26, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebf6b113805e8d5c18f20cc3a7f743cea1ac029ed8f4448a7b46de82a6c516e9
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e71eff0d61a84e366db8484360ee95668b8ea2394397d8e4e593ed6e3506b9
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7534969a274bb3fa8023a906c28ee9fb96fa28e85e22f56fbd7e7b549d41dd80
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a7fb9eb6216192ce22490824fdf15cbc77c71775a288290e6a20a495b35fff2
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f7a8d60a2f79f9fed2cea73d23dd3dfda5f5e479acfb4213a6f2e863cb76904
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30464a0259a27646e9e215f48107081e6b2f053cf9eb8cd5707cfedf93581ee
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cbd4af8c4ab3cb75893cf7c4c12466d6c795077167416da697449ce4a12b474
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68265dc14a2d04db70bc11c529ede2fb6197ce4bf18435eda07a0317b0f9a6b4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.53865432739258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
4
- "epoch": 25.0,
5
  "eval_steps": 500,
6
- "global_step": 32625,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2489,6 +2489,105 @@
2489
  "eval_samples_per_second": 26.448,
2490
  "eval_steps_per_second": 3.324,
2491
  "step": 32625
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2492
  }
2493
  ],
2494
  "logging_steps": 100,
@@ -2503,7 +2602,7 @@
2503
  "early_stopping_threshold": 0.0
2504
  },
2505
  "attributes": {
2506
- "early_stopping_patience_counter": 3
2507
  }
2508
  },
2509
  "TrainerControl": {
@@ -2517,7 +2616,7 @@
2517
  "attributes": {}
2518
  }
2519
  },
2520
- "total_flos": 3.51827725761792e+16,
2521
  "train_batch_size": 8,
2522
  "trial_name": null,
2523
  "trial_params": null
 
1
  {
2
  "best_metric": 34.53865432739258,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
4
+ "epoch": 26.0,
5
  "eval_steps": 500,
6
+ "global_step": 33930,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2489
  "eval_samples_per_second": 26.448,
2490
  "eval_steps_per_second": 3.324,
2491
  "step": 32625
2492
+ },
2493
+ {
2494
+ "epoch": 25.057471264367816,
2495
+ "grad_norm": 2.94795823097229,
2496
+ "learning_rate": 3.43462643678161e-05,
2497
+ "loss": 33.1012,
2498
+ "step": 32700
2499
+ },
2500
+ {
2501
+ "epoch": 25.134099616858236,
2502
+ "grad_norm": 2.3455259799957275,
2503
+ "learning_rate": 3.4298371647509584e-05,
2504
+ "loss": 33.1345,
2505
+ "step": 32800
2506
+ },
2507
+ {
2508
+ "epoch": 25.21072796934866,
2509
+ "grad_norm": 2.678739547729492,
2510
+ "learning_rate": 3.4250478927203064e-05,
2511
+ "loss": 33.2271,
2512
+ "step": 32900
2513
+ },
2514
+ {
2515
+ "epoch": 25.28735632183908,
2516
+ "grad_norm": 4.3170952796936035,
2517
+ "learning_rate": 3.420258620689655e-05,
2518
+ "loss": 33.0392,
2519
+ "step": 33000
2520
+ },
2521
+ {
2522
+ "epoch": 25.3639846743295,
2523
+ "grad_norm": 3.8895034790039062,
2524
+ "learning_rate": 3.415469348659004e-05,
2525
+ "loss": 33.2535,
2526
+ "step": 33100
2527
+ },
2528
+ {
2529
+ "epoch": 25.440613026819925,
2530
+ "grad_norm": 3.693235158920288,
2531
+ "learning_rate": 3.4106800766283525e-05,
2532
+ "loss": 33.4471,
2533
+ "step": 33200
2534
+ },
2535
+ {
2536
+ "epoch": 25.517241379310345,
2537
+ "grad_norm": 5.521793365478516,
2538
+ "learning_rate": 3.405890804597701e-05,
2539
+ "loss": 34.2142,
2540
+ "step": 33300
2541
+ },
2542
+ {
2543
+ "epoch": 25.593869731800766,
2544
+ "grad_norm": 2.8983964920043945,
2545
+ "learning_rate": 3.40110153256705e-05,
2546
+ "loss": 34.362,
2547
+ "step": 33400
2548
+ },
2549
+ {
2550
+ "epoch": 25.67049808429119,
2551
+ "grad_norm": 3.329155206680298,
2552
+ "learning_rate": 3.396360153256705e-05,
2553
+ "loss": 32.373,
2554
+ "step": 33500
2555
+ },
2556
+ {
2557
+ "epoch": 25.74712643678161,
2558
+ "grad_norm": 2.6269519329071045,
2559
+ "learning_rate": 3.391570881226054e-05,
2560
+ "loss": 33.1401,
2561
+ "step": 33600
2562
+ },
2563
+ {
2564
+ "epoch": 25.82375478927203,
2565
+ "grad_norm": 3.1628787517547607,
2566
+ "learning_rate": 3.3867816091954024e-05,
2567
+ "loss": 33.2718,
2568
+ "step": 33700
2569
+ },
2570
+ {
2571
+ "epoch": 25.900383141762454,
2572
+ "grad_norm": 3.0653462409973145,
2573
+ "learning_rate": 3.381992337164751e-05,
2574
+ "loss": 33.481,
2575
+ "step": 33800
2576
+ },
2577
+ {
2578
+ "epoch": 25.977011494252874,
2579
+ "grad_norm": 2.5874106884002686,
2580
+ "learning_rate": 3.377250957854406e-05,
2581
+ "loss": 33.2467,
2582
+ "step": 33900
2583
+ },
2584
+ {
2585
+ "epoch": 26.0,
2586
+ "eval_loss": 34.54924392700195,
2587
+ "eval_runtime": 49.3,
2588
+ "eval_samples_per_second": 26.471,
2589
+ "eval_steps_per_second": 3.327,
2590
+ "step": 33930
2591
  }
2592
  ],
2593
  "logging_steps": 100,
 
2602
  "early_stopping_threshold": 0.0
2603
  },
2604
  "attributes": {
2605
+ "early_stopping_patience_counter": 4
2606
  }
2607
  },
2608
  "TrainerControl": {
 
2616
  "attributes": {}
2617
  }
2618
  },
2619
+ "total_flos": 3.659008347922637e+16,
2620
  "train_batch_size": 8,
2621
  "trial_name": null,
2622
  "trial_params": null