Training in progress, step 2520, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50503544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd41ebaf0126bcbc9cb857ac5378df1eabfc3bdefaf0428fc58c9548b03fb716
|
3 |
size 50503544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 25986468
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e1fc2cc37d953fa48873d103c237d567951d0ea49cb5dad104417190acaae8c
|
3 |
size 25986468
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7f712a67228f3bcffa37c019c35e988ecd34b25e0960d7fa983f4ae67e4c1e5
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfd166862c66a199e9571546b71d0678fd9a94525d7d5a7b2b2e955704972144
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.023890294134616852,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2500",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17715,6 +17715,146 @@
|
|
17715 |
"eval_samples_per_second": 27.795,
|
17716 |
"eval_steps_per_second": 6.949,
|
17717 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17718 |
}
|
17719 |
],
|
17720 |
"logging_steps": 1,
|
@@ -17738,12 +17878,12 @@
|
|
17738 |
"should_evaluate": false,
|
17739 |
"should_log": false,
|
17740 |
"should_save": true,
|
17741 |
-
"should_training_stop":
|
17742 |
},
|
17743 |
"attributes": {}
|
17744 |
}
|
17745 |
},
|
17746 |
-
"total_flos": 1.
|
17747 |
"train_batch_size": 4,
|
17748 |
"trial_name": null,
|
17749 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.023890294134616852,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2500",
|
4 |
+
"epoch": 1.4070351758793969,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2520,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17715 |
"eval_samples_per_second": 27.795,
|
17716 |
"eval_steps_per_second": 6.949,
|
17717 |
"step": 2500
|
17718 |
+
},
|
17719 |
+
{
|
17720 |
+
"epoch": 1.3964265773311,
|
17721 |
+
"grad_norm": 0.06104286387562752,
|
17722 |
+
"learning_rate": 2.8275418474443814e-08,
|
17723 |
+
"loss": 0.0183,
|
17724 |
+
"step": 2501
|
17725 |
+
},
|
17726 |
+
{
|
17727 |
+
"epoch": 1.3969849246231156,
|
17728 |
+
"grad_norm": 0.06953344494104385,
|
17729 |
+
"learning_rate": 2.537750647535475e-08,
|
17730 |
+
"loss": 0.0209,
|
17731 |
+
"step": 2502
|
17732 |
+
},
|
17733 |
+
{
|
17734 |
+
"epoch": 1.3975432719151313,
|
17735 |
+
"grad_norm": 0.08410750329494476,
|
17736 |
+
"learning_rate": 2.26362126006352e-08,
|
17737 |
+
"loss": 0.0195,
|
17738 |
+
"step": 2503
|
17739 |
+
},
|
17740 |
+
{
|
17741 |
+
"epoch": 1.398101619207147,
|
17742 |
+
"grad_norm": 0.07485437393188477,
|
17743 |
+
"learning_rate": 2.0051541144749943e-08,
|
17744 |
+
"loss": 0.0232,
|
17745 |
+
"step": 2504
|
17746 |
+
},
|
17747 |
+
{
|
17748 |
+
"epoch": 1.3986599664991624,
|
17749 |
+
"grad_norm": 0.06569929420948029,
|
17750 |
+
"learning_rate": 1.762349615677117e-08,
|
17751 |
+
"loss": 0.0212,
|
17752 |
+
"step": 2505
|
17753 |
+
},
|
17754 |
+
{
|
17755 |
+
"epoch": 1.3992183137911782,
|
17756 |
+
"grad_norm": 0.05091328173875809,
|
17757 |
+
"learning_rate": 1.5352081440422884e-08,
|
17758 |
+
"loss": 0.016,
|
17759 |
+
"step": 2506
|
17760 |
+
},
|
17761 |
+
{
|
17762 |
+
"epoch": 1.3997766610831937,
|
17763 |
+
"grad_norm": 0.045666009187698364,
|
17764 |
+
"learning_rate": 1.3237300554069798e-08,
|
17765 |
+
"loss": 0.0142,
|
17766 |
+
"step": 2507
|
17767 |
+
},
|
17768 |
+
{
|
17769 |
+
"epoch": 1.4003350083752093,
|
17770 |
+
"grad_norm": 0.05261168256402016,
|
17771 |
+
"learning_rate": 1.1279156810684034e-08,
|
17772 |
+
"loss": 0.0145,
|
17773 |
+
"step": 2508
|
17774 |
+
},
|
17775 |
+
{
|
17776 |
+
"epoch": 1.400893355667225,
|
17777 |
+
"grad_norm": 0.05364307388663292,
|
17778 |
+
"learning_rate": 9.477653277834009e-09,
|
17779 |
+
"loss": 0.0143,
|
17780 |
+
"step": 2509
|
17781 |
+
},
|
17782 |
+
{
|
17783 |
+
"epoch": 1.4014517029592406,
|
17784 |
+
"grad_norm": 0.07531214505434036,
|
17785 |
+
"learning_rate": 7.83279277773996e-09,
|
17786 |
+
"loss": 0.0189,
|
17787 |
+
"step": 2510
|
17788 |
+
},
|
17789 |
+
{
|
17790 |
+
"epoch": 1.4020100502512562,
|
17791 |
+
"grad_norm": 0.07607190310955048,
|
17792 |
+
"learning_rate": 6.344577887185121e-09,
|
17793 |
+
"loss": 0.0206,
|
17794 |
+
"step": 2511
|
17795 |
+
},
|
17796 |
+
{
|
17797 |
+
"epoch": 1.402568397543272,
|
17798 |
+
"grad_norm": 0.08191350847482681,
|
17799 |
+
"learning_rate": 5.01301093758233e-09,
|
17800 |
+
"loss": 0.0211,
|
17801 |
+
"step": 2512
|
17802 |
+
},
|
17803 |
+
{
|
17804 |
+
"epoch": 1.4031267448352875,
|
17805 |
+
"grad_norm": 0.07381971925497055,
|
17806 |
+
"learning_rate": 3.838094014940729e-09,
|
17807 |
+
"loss": 0.0184,
|
17808 |
+
"step": 2513
|
17809 |
+
},
|
17810 |
+
{
|
17811 |
+
"epoch": 1.4036850921273032,
|
17812 |
+
"grad_norm": 0.06222749873995781,
|
17813 |
+
"learning_rate": 2.8198289598435625e-09,
|
17814 |
+
"loss": 0.0184,
|
17815 |
+
"step": 2514
|
17816 |
+
},
|
17817 |
+
{
|
17818 |
+
"epoch": 1.4042434394193188,
|
17819 |
+
"grad_norm": 0.07456585019826889,
|
17820 |
+
"learning_rate": 1.958217367514781e-09,
|
17821 |
+
"loss": 0.0239,
|
17822 |
+
"step": 2515
|
17823 |
+
},
|
17824 |
+
{
|
17825 |
+
"epoch": 1.4048017867113345,
|
17826 |
+
"grad_norm": 0.07716374844312668,
|
17827 |
+
"learning_rate": 1.2532605877080273e-09,
|
17828 |
+
"loss": 0.0228,
|
17829 |
+
"step": 2516
|
17830 |
+
},
|
17831 |
+
{
|
17832 |
+
"epoch": 1.4053601340033501,
|
17833 |
+
"grad_norm": 0.08897180110216141,
|
17834 |
+
"learning_rate": 7.049597248065532e-10,
|
17835 |
+
"loss": 0.0247,
|
17836 |
+
"step": 2517
|
17837 |
+
},
|
17838 |
+
{
|
17839 |
+
"epoch": 1.4059184812953658,
|
17840 |
+
"grad_norm": 0.061453305184841156,
|
17841 |
+
"learning_rate": 3.13315637756606e-10,
|
17842 |
+
"loss": 0.0199,
|
17843 |
+
"step": 2518
|
17844 |
+
},
|
17845 |
+
{
|
17846 |
+
"epoch": 1.4064768285873814,
|
17847 |
+
"grad_norm": 0.07491685450077057,
|
17848 |
+
"learning_rate": 7.832894011183811e-11,
|
17849 |
+
"loss": 0.0209,
|
17850 |
+
"step": 2519
|
17851 |
+
},
|
17852 |
+
{
|
17853 |
+
"epoch": 1.4070351758793969,
|
17854 |
+
"grad_norm": 0.10247643291950226,
|
17855 |
+
"learning_rate": 0.0,
|
17856 |
+
"loss": 0.0198,
|
17857 |
+
"step": 2520
|
17858 |
}
|
17859 |
],
|
17860 |
"logging_steps": 1,
|
|
|
17878 |
"should_evaluate": false,
|
17879 |
"should_log": false,
|
17880 |
"should_save": true,
|
17881 |
+
"should_training_stop": true
|
17882 |
},
|
17883 |
"attributes": {}
|
17884 |
}
|
17885 |
},
|
17886 |
+
"total_flos": 1.0376037222108365e+18,
|
17887 |
"train_batch_size": 4,
|
17888 |
"trial_name": null,
|
17889 |
"trial_params": null
|