Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dae14b45b2aa05876691e23f79429617bf62e1e8afb4d0a5743568fd619d2c54
 size 9047632

 version https://git-lfs.github.com/spec/v1
+oid sha256:7cc56f67deb779d6052abb900ce052fcd11684d60029cbce871306aed255e2ae
 size 9047632

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1c05f6d980eff832bf6dd2c25396e96004d3c0e2848e7d831ccd6a4b3f7893a
 size 18111946

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d1fe780a142dab3ee2cf45d502b8d4414e7c362871d6e9ab783dc65ff47baea
 size 18111946

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da8220ce55eaafcaa352ab22e4d8c0c6f1d426253d50805c3fec604c30f7d073
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4ec831143f16e46d12aa833eb8d03bc97aa2607d0befc8ab206536cbaaa4d29
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c9fd70aa4cf68daad2242bc04a8a03f22adc681e42e4ebf5294902cea9d0a87
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9128943681716919,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.017075773745997867,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 275.732,
       "eval_steps_per_second": 137.866,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 954972241920000.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8842025995254517,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.025613660618996798,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 275.732,
       "eval_steps_per_second": 137.866,
       "step": 100
+    },
+    {
+      "epoch": 0.017246531483457846,
+      "grad_norm": 7.36553955078125,
+      "learning_rate": 6.279213887972179e-05,
+      "loss": 3.389,
+      "step": 101
+    },
+    {
+      "epoch": 0.01741728922091782,
+      "grad_norm": 5.0720367431640625,
+      "learning_rate": 6.189675975213094e-05,
+      "loss": 1.5951,
+      "step": 102
+    },
+    {
+      "epoch": 0.0175880469583778,
+      "grad_norm": 4.1941752433776855,
+      "learning_rate": 6.099731789198344e-05,
+      "loss": 1.0009,
+      "step": 103
+    },
+    {
+      "epoch": 0.01775880469583778,
+      "grad_norm": 2.9722445011138916,
+      "learning_rate": 6.009412045785051e-05,
+      "loss": 1.0415,
+      "step": 104
+    },
+    {
+      "epoch": 0.01792956243329776,
+      "grad_norm": 1.8852466344833374,
+      "learning_rate": 5.918747589082853e-05,
+      "loss": 0.9703,
+      "step": 105
+    },
+    {
+      "epoch": 0.018100320170757737,
+      "grad_norm": 1.7154128551483154,
+      "learning_rate": 5.82776938092065e-05,
+      "loss": 0.8274,
+      "step": 106
+    },
+    {
+      "epoch": 0.018271077908217716,
+      "grad_norm": 1.148832082748413,
+      "learning_rate": 5.736508490273188e-05,
+      "loss": 0.6555,
+      "step": 107
+    },
+    {
+      "epoch": 0.018441835645677696,
+      "grad_norm": 1.6996549367904663,
+      "learning_rate": 5.644996082651017e-05,
+      "loss": 1.1596,
+      "step": 108
+    },
+    {
+      "epoch": 0.018612593383137675,
+      "grad_norm": 1.3889989852905273,
+      "learning_rate": 5.553263409457504e-05,
+      "loss": 0.897,
+      "step": 109
+    },
+    {
+      "epoch": 0.018783351120597654,
+      "grad_norm": 1.1142041683197021,
+      "learning_rate": 5.4613417973165106e-05,
+      "loss": 0.8444,
+      "step": 110
+    },
+    {
+      "epoch": 0.01895410885805763,
+      "grad_norm": 1.0875897407531738,
+      "learning_rate": 5.3692626373743706e-05,
+      "loss": 0.7941,
+      "step": 111
+    },
+    {
+      "epoch": 0.019124866595517608,
+      "grad_norm": 1.482211947441101,
+      "learning_rate": 5.27705737457985e-05,
+      "loss": 0.8514,
+      "step": 112
+    },
+    {
+      "epoch": 0.019295624332977587,
+      "grad_norm": 1.3092451095581055,
+      "learning_rate": 5.184757496945726e-05,
+      "loss": 0.8415,
+      "step": 113
+    },
+    {
+      "epoch": 0.019466382070437566,
+      "grad_norm": 1.1167197227478027,
+      "learning_rate": 5.092394524795649e-05,
+      "loss": 0.8627,
+      "step": 114
+    },
+    {
+      "epoch": 0.019637139807897545,
+      "grad_norm": 1.0100393295288086,
+      "learning_rate": 5e-05,
+      "loss": 0.7654,
+      "step": 115
+    },
+    {
+      "epoch": 0.019807897545357524,
+      "grad_norm": 1.2014217376708984,
+      "learning_rate": 4.907605475204352e-05,
+      "loss": 0.7972,
+      "step": 116
+    },
+    {
+      "epoch": 0.019978655282817503,
+      "grad_norm": 0.9819137454032898,
+      "learning_rate": 4.8152425030542766e-05,
+      "loss": 0.7866,
+      "step": 117
+    },
+    {
+      "epoch": 0.020149413020277483,
+      "grad_norm": 1.2597284317016602,
+      "learning_rate": 4.72294262542015e-05,
+      "loss": 0.8979,
+      "step": 118
+    },
+    {
+      "epoch": 0.02032017075773746,
+      "grad_norm": 0.9694597721099854,
+      "learning_rate": 4.6307373626256306e-05,
+      "loss": 0.8589,
+      "step": 119
+    },
+    {
+      "epoch": 0.020490928495197437,
+      "grad_norm": 1.2638909816741943,
+      "learning_rate": 4.5386582026834906e-05,
+      "loss": 0.9064,
+      "step": 120
+    },
+    {
+      "epoch": 0.020661686232657416,
+      "grad_norm": 1.3041003942489624,
+      "learning_rate": 4.446736590542497e-05,
+      "loss": 0.6922,
+      "step": 121
+    },
+    {
+      "epoch": 0.020832443970117395,
+      "grad_norm": 1.2819812297821045,
+      "learning_rate": 4.3550039173489845e-05,
+      "loss": 0.9398,
+      "step": 122
+    },
+    {
+      "epoch": 0.021003201707577374,
+      "grad_norm": 1.2087786197662354,
+      "learning_rate": 4.2634915097268115e-05,
+      "loss": 0.7732,
+      "step": 123
+    },
+    {
+      "epoch": 0.021173959445037353,
+      "grad_norm": 1.1736648082733154,
+      "learning_rate": 4.1722306190793495e-05,
+      "loss": 0.7672,
+      "step": 124
+    },
+    {
+      "epoch": 0.021344717182497332,
+      "grad_norm": 1.046942114830017,
+      "learning_rate": 4.0812524109171476e-05,
+      "loss": 0.7634,
+      "step": 125
+    },
+    {
+      "epoch": 0.02151547491995731,
+      "grad_norm": 1.1425883769989014,
+      "learning_rate": 3.99058795421495e-05,
+      "loss": 0.808,
+      "step": 126
+    },
+    {
+      "epoch": 0.02168623265741729,
+      "grad_norm": 0.9221328496932983,
+      "learning_rate": 3.9002682108016585e-05,
+      "loss": 0.718,
+      "step": 127
+    },
+    {
+      "epoch": 0.02185699039487727,
+      "grad_norm": 0.9747080206871033,
+      "learning_rate": 3.8103240247869075e-05,
+      "loss": 0.769,
+      "step": 128
+    },
+    {
+      "epoch": 0.022027748132337245,
+      "grad_norm": 1.0447397232055664,
+      "learning_rate": 3.720786112027822e-05,
+      "loss": 0.8581,
+      "step": 129
+    },
+    {
+      "epoch": 0.022198505869797224,
+      "grad_norm": 1.0637578964233398,
+      "learning_rate": 3.631685049639586e-05,
+      "loss": 0.6822,
+      "step": 130
+    },
+    {
+      "epoch": 0.022369263607257203,
+      "grad_norm": 1.0002477169036865,
+      "learning_rate": 3.543051265553377e-05,
+      "loss": 0.8158,
+      "step": 131
+    },
+    {
+      "epoch": 0.022540021344717182,
+      "grad_norm": 0.9726616144180298,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 0.7542,
+      "step": 132
+    },
+    {
+      "epoch": 0.02271077908217716,
+      "grad_norm": 1.1409964561462402,
+      "learning_rate": 3.367306435799584e-05,
+      "loss": 0.9922,
+      "step": 133
+    },
+    {
+      "epoch": 0.02288153681963714,
+      "grad_norm": 1.2967826128005981,
+      "learning_rate": 3.2802554068303596e-05,
+      "loss": 0.912,
+      "step": 134
+    },
+    {
+      "epoch": 0.02305229455709712,
+      "grad_norm": 1.1848523616790771,
+      "learning_rate": 3.1937916690642356e-05,
+      "loss": 0.7371,
+      "step": 135
+    },
+    {
+      "epoch": 0.0232230522945571,
+      "grad_norm": 1.1654822826385498,
+      "learning_rate": 3.107944749788449e-05,
+      "loss": 0.865,
+      "step": 136
+    },
+    {
+      "epoch": 0.023393810032017077,
+      "grad_norm": 1.1176100969314575,
+      "learning_rate": 3.0227439656472877e-05,
+      "loss": 0.685,
+      "step": 137
+    },
+    {
+      "epoch": 0.023564567769477053,
+      "grad_norm": 1.2199519872665405,
+      "learning_rate": 2.9382184126304834e-05,
+      "loss": 0.7731,
+      "step": 138
+    },
+    {
+      "epoch": 0.023735325506937032,
+      "grad_norm": 1.148708462715149,
+      "learning_rate": 2.8543969561369556e-05,
+      "loss": 0.9054,
+      "step": 139
+    },
+    {
+      "epoch": 0.02390608324439701,
+      "grad_norm": 1.3541618585586548,
+      "learning_rate": 2.771308221117309e-05,
+      "loss": 0.8916,
+      "step": 140
+    },
+    {
+      "epoch": 0.02407684098185699,
+      "grad_norm": 1.2918407917022705,
+      "learning_rate": 2.688980582298435e-05,
+      "loss": 0.7598,
+      "step": 141
+    },
+    {
+      "epoch": 0.02424759871931697,
+      "grad_norm": 1.2371271848678589,
+      "learning_rate": 2.607442154493568e-05,
+      "loss": 0.7015,
+      "step": 142
+    },
+    {
+      "epoch": 0.02441835645677695,
+      "grad_norm": 1.3515294790267944,
+      "learning_rate": 2.5267207830011068e-05,
+      "loss": 0.806,
+      "step": 143
+    },
+    {
+      "epoch": 0.024589114194236927,
+      "grad_norm": 1.6061891317367554,
+      "learning_rate": 2.446844034095466e-05,
+      "loss": 0.7577,
+      "step": 144
+    },
+    {
+      "epoch": 0.024759871931696906,
+      "grad_norm": 1.211403250694275,
+      "learning_rate": 2.3678391856132204e-05,
+      "loss": 0.6685,
+      "step": 145
+    },
+    {
+      "epoch": 0.024930629669156882,
+      "grad_norm": 1.245577096939087,
+      "learning_rate": 2.2897332176377528e-05,
+      "loss": 0.6328,
+      "step": 146
+    },
+    {
+      "epoch": 0.02510138740661686,
+      "grad_norm": 1.5203893184661865,
+      "learning_rate": 2.2125528032855724e-05,
+      "loss": 0.7963,
+      "step": 147
+    },
+    {
+      "epoch": 0.02527214514407684,
+      "grad_norm": 1.4067859649658203,
+      "learning_rate": 2.136324299597474e-05,
+      "loss": 0.7993,
+      "step": 148
+    },
+    {
+      "epoch": 0.02544290288153682,
+      "grad_norm": 1.456285834312439,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 0.7195,
+      "step": 149
+    },
+    {
+      "epoch": 0.025613660618996798,
+      "grad_norm": 1.5420877933502197,
+      "learning_rate": 1.9868268181037185e-05,
+      "loss": 0.7393,
+      "step": 150
+    },
+    {
+      "epoch": 0.025613660618996798,
+      "eval_loss": 0.8842025995254517,
+      "eval_runtime": 35.6967,
+      "eval_samples_per_second": 276.328,
+      "eval_steps_per_second": 138.164,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1431334866124800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null