Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d679d225ca5e07bc96074f009eb38d0586a5368763bbb78f1c8bfae0e14a8bb
 size 628216

 version https://git-lfs.github.com/spec/v1
+oid sha256:47e9e11086022bdb672109255bcb5da3ea9c9c5b728778ba9dee332b610d60ba
 size 628216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11e09ac261eeddadc9997bc627fd1ed1e786842110bc5ca577418ab8940db125
 size 1273162

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a0d8dfdabfc6df4fb78d5072c2dd9120dd8b8db86fbfe2b9aabf4be73804c1d
 size 1273162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59e46fccc4be1a8d61042b97c95ce2311a6460b315d5f85e0f154f1defc5f482
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:626c6d4cfa38cc6b99d3ede7924a255742fa64c0b1d284a9b68cafb35460e388
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c9fd70aa4cf68daad2242bc04a8a03f22adc681e42e4ebf5294902cea9d0a87
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.751439094543457,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.021607605877268798,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 287.073,
       "eval_steps_per_second": 143.555,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 69847090200576.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.746877670288086,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.032411408815903195,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 287.073,
       "eval_steps_per_second": 143.555,
       "step": 100
+    },
+    {
+      "epoch": 0.021823681936041485,
+      "grad_norm": 0.21080626547336578,
+      "learning_rate": 6.279213887972179e-05,
+      "loss": 10.7706,
+      "step": 101
+    },
+    {
+      "epoch": 0.022039757994814176,
+      "grad_norm": 0.20775455236434937,
+      "learning_rate": 6.189675975213094e-05,
+      "loss": 10.7617,
+      "step": 102
+    },
+    {
+      "epoch": 0.022255834053586863,
+      "grad_norm": 0.1883164793252945,
+      "learning_rate": 6.099731789198344e-05,
+      "loss": 10.7622,
+      "step": 103
+    },
+    {
+      "epoch": 0.02247191011235955,
+      "grad_norm": 0.22989457845687866,
+      "learning_rate": 6.009412045785051e-05,
+      "loss": 10.767,
+      "step": 104
+    },
+    {
+      "epoch": 0.022687986171132237,
+      "grad_norm": 0.21413500607013702,
+      "learning_rate": 5.918747589082853e-05,
+      "loss": 10.7754,
+      "step": 105
+    },
+    {
+      "epoch": 0.022904062229904928,
+      "grad_norm": 0.20524542033672333,
+      "learning_rate": 5.82776938092065e-05,
+      "loss": 10.7728,
+      "step": 106
+    },
+    {
+      "epoch": 0.023120138288677615,
+      "grad_norm": 0.276962012052536,
+      "learning_rate": 5.736508490273188e-05,
+      "loss": 10.7504,
+      "step": 107
+    },
+    {
+      "epoch": 0.023336214347450302,
+      "grad_norm": 0.2452148199081421,
+      "learning_rate": 5.644996082651017e-05,
+      "loss": 10.7616,
+      "step": 108
+    },
+    {
+      "epoch": 0.02355229040622299,
+      "grad_norm": 0.25284409523010254,
+      "learning_rate": 5.553263409457504e-05,
+      "loss": 10.7604,
+      "step": 109
+    },
+    {
+      "epoch": 0.02376836646499568,
+      "grad_norm": 0.247440367937088,
+      "learning_rate": 5.4613417973165106e-05,
+      "loss": 10.7641,
+      "step": 110
+    },
+    {
+      "epoch": 0.023984442523768367,
+      "grad_norm": 0.18067599833011627,
+      "learning_rate": 5.3692626373743706e-05,
+      "loss": 10.7746,
+      "step": 111
+    },
+    {
+      "epoch": 0.024200518582541054,
+      "grad_norm": 0.19137042760849,
+      "learning_rate": 5.27705737457985e-05,
+      "loss": 10.7714,
+      "step": 112
+    },
+    {
+      "epoch": 0.02441659464131374,
+      "grad_norm": 0.2149994820356369,
+      "learning_rate": 5.184757496945726e-05,
+      "loss": 10.7733,
+      "step": 113
+    },
+    {
+      "epoch": 0.024632670700086432,
+      "grad_norm": 0.20906798541545868,
+      "learning_rate": 5.092394524795649e-05,
+      "loss": 10.7726,
+      "step": 114
+    },
+    {
+      "epoch": 0.02484874675885912,
+      "grad_norm": 0.24666734039783478,
+      "learning_rate": 5e-05,
+      "loss": 10.7536,
+      "step": 115
+    },
+    {
+      "epoch": 0.025064822817631807,
+      "grad_norm": 0.23656940460205078,
+      "learning_rate": 4.907605475204352e-05,
+      "loss": 10.7602,
+      "step": 116
+    },
+    {
+      "epoch": 0.025280898876404494,
+      "grad_norm": 0.25157082080841064,
+      "learning_rate": 4.8152425030542766e-05,
+      "loss": 10.7619,
+      "step": 117
+    },
+    {
+      "epoch": 0.02549697493517718,
+      "grad_norm": 0.2460283637046814,
+      "learning_rate": 4.72294262542015e-05,
+      "loss": 10.7613,
+      "step": 118
+    },
+    {
+      "epoch": 0.02571305099394987,
+      "grad_norm": 0.1980646699666977,
+      "learning_rate": 4.6307373626256306e-05,
+      "loss": 10.7639,
+      "step": 119
+    },
+    {
+      "epoch": 0.02592912705272256,
+      "grad_norm": 0.20079004764556885,
+      "learning_rate": 4.5386582026834906e-05,
+      "loss": 10.7587,
+      "step": 120
+    },
+    {
+      "epoch": 0.026145203111495246,
+      "grad_norm": 0.22045651078224182,
+      "learning_rate": 4.446736590542497e-05,
+      "loss": 10.7678,
+      "step": 121
+    },
+    {
+      "epoch": 0.026361279170267933,
+      "grad_norm": 0.22736740112304688,
+      "learning_rate": 4.3550039173489845e-05,
+      "loss": 10.7542,
+      "step": 122
+    },
+    {
+      "epoch": 0.026577355229040624,
+      "grad_norm": 0.17214606702327728,
+      "learning_rate": 4.2634915097268115e-05,
+      "loss": 10.7608,
+      "step": 123
+    },
+    {
+      "epoch": 0.02679343128781331,
+      "grad_norm": 0.18513911962509155,
+      "learning_rate": 4.1722306190793495e-05,
+      "loss": 10.7495,
+      "step": 124
+    },
+    {
+      "epoch": 0.027009507346585998,
+      "grad_norm": 0.2161649465560913,
+      "learning_rate": 4.0812524109171476e-05,
+      "loss": 10.7578,
+      "step": 125
+    },
+    {
+      "epoch": 0.027225583405358685,
+      "grad_norm": 0.2477547526359558,
+      "learning_rate": 3.99058795421495e-05,
+      "loss": 10.7566,
+      "step": 126
+    },
+    {
+      "epoch": 0.027441659464131376,
+      "grad_norm": 0.19045643508434296,
+      "learning_rate": 3.9002682108016585e-05,
+      "loss": 10.753,
+      "step": 127
+    },
+    {
+      "epoch": 0.027657735522904063,
+      "grad_norm": 0.2026197761297226,
+      "learning_rate": 3.8103240247869075e-05,
+      "loss": 10.7643,
+      "step": 128
+    },
+    {
+      "epoch": 0.02787381158167675,
+      "grad_norm": 0.23443067073822021,
+      "learning_rate": 3.720786112027822e-05,
+      "loss": 10.7545,
+      "step": 129
+    },
+    {
+      "epoch": 0.028089887640449437,
+      "grad_norm": 0.21837399899959564,
+      "learning_rate": 3.631685049639586e-05,
+      "loss": 10.7466,
+      "step": 130
+    },
+    {
+      "epoch": 0.028305963699222125,
+      "grad_norm": 0.228903129696846,
+      "learning_rate": 3.543051265553377e-05,
+      "loss": 10.7553,
+      "step": 131
+    },
+    {
+      "epoch": 0.028522039757994815,
+      "grad_norm": 0.22999686002731323,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 10.7365,
+      "step": 132
+    },
+    {
+      "epoch": 0.028738115816767502,
+      "grad_norm": 0.2587832808494568,
+      "learning_rate": 3.367306435799584e-05,
+      "loss": 10.7495,
+      "step": 133
+    },
+    {
+      "epoch": 0.02895419187554019,
+      "grad_norm": 0.2272672802209854,
+      "learning_rate": 3.2802554068303596e-05,
+      "loss": 10.7485,
+      "step": 134
+    },
+    {
+      "epoch": 0.029170267934312877,
+      "grad_norm": 0.1977323740720749,
+      "learning_rate": 3.1937916690642356e-05,
+      "loss": 10.7478,
+      "step": 135
+    },
+    {
+      "epoch": 0.029386343993085567,
+      "grad_norm": 0.20933398604393005,
+      "learning_rate": 3.107944749788449e-05,
+      "loss": 10.759,
+      "step": 136
+    },
+    {
+      "epoch": 0.029602420051858255,
+      "grad_norm": 0.2519747316837311,
+      "learning_rate": 3.0227439656472877e-05,
+      "loss": 10.7503,
+      "step": 137
+    },
+    {
+      "epoch": 0.029818496110630942,
+      "grad_norm": 0.2388688176870346,
+      "learning_rate": 2.9382184126304834e-05,
+      "loss": 10.7351,
+      "step": 138
+    },
+    {
+      "epoch": 0.03003457216940363,
+      "grad_norm": 0.29849356412887573,
+      "learning_rate": 2.8543969561369556e-05,
+      "loss": 10.7364,
+      "step": 139
+    },
+    {
+      "epoch": 0.03025064822817632,
+      "grad_norm": 0.2356027066707611,
+      "learning_rate": 2.771308221117309e-05,
+      "loss": 10.7399,
+      "step": 140
+    },
+    {
+      "epoch": 0.030466724286949007,
+      "grad_norm": 0.25474485754966736,
+      "learning_rate": 2.688980582298435e-05,
+      "loss": 10.74,
+      "step": 141
+    },
+    {
+      "epoch": 0.030682800345721694,
+      "grad_norm": 0.23734347522258759,
+      "learning_rate": 2.607442154493568e-05,
+      "loss": 10.7393,
+      "step": 142
+    },
+    {
+      "epoch": 0.03089887640449438,
+      "grad_norm": 0.28821271657943726,
+      "learning_rate": 2.5267207830011068e-05,
+      "loss": 10.7336,
+      "step": 143
+    },
+    {
+      "epoch": 0.03111495246326707,
+      "grad_norm": 0.2773192822933197,
+      "learning_rate": 2.446844034095466e-05,
+      "loss": 10.7414,
+      "step": 144
+    },
+    {
+      "epoch": 0.031331028522039756,
+      "grad_norm": 0.3964485824108124,
+      "learning_rate": 2.3678391856132204e-05,
+      "loss": 10.742,
+      "step": 145
+    },
+    {
+      "epoch": 0.031547104580812446,
+      "grad_norm": 0.33878329396247864,
+      "learning_rate": 2.2897332176377528e-05,
+      "loss": 10.7489,
+      "step": 146
+    },
+    {
+      "epoch": 0.03176318063958514,
+      "grad_norm": 0.3486817479133606,
+      "learning_rate": 2.2125528032855724e-05,
+      "loss": 10.7379,
+      "step": 147
+    },
+    {
+      "epoch": 0.03197925669835782,
+      "grad_norm": 0.38555988669395447,
+      "learning_rate": 2.136324299597474e-05,
+      "loss": 10.7374,
+      "step": 148
+    },
+    {
+      "epoch": 0.03219533275713051,
+      "grad_norm": 0.521436333656311,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 10.7291,
+      "step": 149
+    },
+    {
+      "epoch": 0.032411408815903195,
+      "grad_norm": 0.8618770241737366,
+      "learning_rate": 1.9868268181037185e-05,
+      "loss": 10.7151,
+      "step": 150
+    },
+    {
+      "epoch": 0.032411408815903195,
+      "eval_loss": 10.746877670288086,
+      "eval_runtime": 27.2327,
+      "eval_samples_per_second": 286.237,
+      "eval_steps_per_second": 143.137,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 104685247660032.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null