besimray commited on
Commit
ed89e8e
·
verified ·
1 Parent(s): cae436c

Training in progress, step 70, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1349c39706954f73b87ce2e0d506e00758e8b213bb17e1a6dd1aed5c3f715b64
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ddf2f84301434e699d9c2f35b9d9105c9f4f2d8b564220a9bd359c56851edc
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76e9ed812ac31db5016397a98d34a65c97abc515da8c38a3f1a9ed81553a05d4
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d469aaf72c006ee445cf4f9b87e5f5cc4be9a600f4e6cccf5710fa260e9f24f8
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78f4cc9cdab22e1e0f660a41fa1d4cd32f78600a13210572d12e13002a5dbf56
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:342d36d482e38fbc36cc0a28040792e6a4257d06cd930a5ace345aee66811294
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54b996514a941dd419a3f7869454171b960cb51cf1b91d9b10dbdcf1b1e50a10
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1074437c4c638775a20a86de2e689d1a14d0ff4e4137df2ddeb45e94a776caef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.633779525756836,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-60",
4
- "epoch": 0.01694316978468055,
5
  "eval_steps": 10,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -483,6 +483,84 @@
483
  "eval_samples_per_second": 5.597,
484
  "eval_steps_per_second": 5.597,
485
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  }
487
  ],
488
  "logging_steps": 1,
@@ -511,7 +589,7 @@
511
  "attributes": {}
512
  }
513
  },
514
- "total_flos": 5872821511127040.0,
515
  "train_batch_size": 1,
516
  "trial_name": null,
517
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6125953197479248,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-70",
4
+ "epoch": 0.019767031415460642,
5
  "eval_steps": 10,
6
+ "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
483
  "eval_samples_per_second": 5.597,
484
  "eval_steps_per_second": 5.597,
485
  "step": 60
486
+ },
487
+ {
488
+ "epoch": 0.01722555594775856,
489
+ "grad_norm": 1.3225600719451904,
490
+ "learning_rate": 0.00019470159657616215,
491
+ "loss": 1.7811,
492
+ "step": 61
493
+ },
494
+ {
495
+ "epoch": 0.017507942110836568,
496
+ "grad_norm": 1.632053017616272,
497
+ "learning_rate": 0.00019449372563954293,
498
+ "loss": 1.8656,
499
+ "step": 62
500
+ },
501
+ {
502
+ "epoch": 0.01779032827391458,
503
+ "grad_norm": 1.7111440896987915,
504
+ "learning_rate": 0.0001942819704359693,
505
+ "loss": 0.8098,
506
+ "step": 63
507
+ },
508
+ {
509
+ "epoch": 0.018072714436992588,
510
+ "grad_norm": 1.5115035772323608,
511
+ "learning_rate": 0.00019406633966986828,
512
+ "loss": 1.0582,
513
+ "step": 64
514
+ },
515
+ {
516
+ "epoch": 0.018355100600070597,
517
+ "grad_norm": 2.999513626098633,
518
+ "learning_rate": 0.00019384684220497605,
519
+ "loss": 2.1569,
520
+ "step": 65
521
+ },
522
+ {
523
+ "epoch": 0.018637486763148605,
524
+ "grad_norm": 1.0796102285385132,
525
+ "learning_rate": 0.00019362348706397373,
526
+ "loss": 1.4516,
527
+ "step": 66
528
+ },
529
+ {
530
+ "epoch": 0.018919872926226614,
531
+ "grad_norm": 2.9733681678771973,
532
+ "learning_rate": 0.00019339628342811632,
533
+ "loss": 1.6857,
534
+ "step": 67
535
+ },
536
+ {
537
+ "epoch": 0.019202259089304625,
538
+ "grad_norm": 2.086916923522949,
539
+ "learning_rate": 0.0001931652406368554,
540
+ "loss": 0.7689,
541
+ "step": 68
542
+ },
543
+ {
544
+ "epoch": 0.019484645252382634,
545
+ "grad_norm": 1.511246919631958,
546
+ "learning_rate": 0.0001929303681874552,
547
+ "loss": 1.5469,
548
+ "step": 69
549
+ },
550
+ {
551
+ "epoch": 0.019767031415460642,
552
+ "grad_norm": 2.029017925262451,
553
+ "learning_rate": 0.0001926916757346022,
554
+ "loss": 1.3705,
555
+ "step": 70
556
+ },
557
+ {
558
+ "epoch": 0.019767031415460642,
559
+ "eval_loss": 1.6125953197479248,
560
+ "eval_runtime": 133.426,
561
+ "eval_samples_per_second": 5.591,
562
+ "eval_steps_per_second": 5.591,
563
+ "step": 70
564
  }
565
  ],
566
  "logging_steps": 1,
 
589
  "attributes": {}
590
  }
591
  },
592
+ "total_flos": 6851625096314880.0,
593
  "train_batch_size": 1,
594
  "trial_name": null,
595
  "trial_params": null