Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +283 -3

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd7280b72137a3f45684f0a4365181f6a4e76516dc937372728ffd427374896b
 size 83946192

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b82ba4c676b3b5446a0c3c0895e6170fde811e7522a584fefd422d795d05e4d
 size 83946192

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f39105b02b5a28736f00337347099a101737aeeff1bee35761b81446d63727b7
 size 168150290

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6255ec5b843f16274c3a630ceda1999d9a6ff2db1125db8a13388a7d838f220
 size 168150290

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa9292ce99046a75ac58c8e68f29aec19b32f8b2c2b1414bfaa45151498cf09a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:44ab34006e4ff8e3ddb1b3e0970e22b7afa1b47af9f1338b5e8a38648238a8fe
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a89103c425bccf7ac0286a5e2c396ce22a8dc4396452a8ba49448cd162c1206
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e97643a2c9855ef59e9a06836cda3285ce9299fbc4d864a30671a903ea3632d3
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3118271586606281,
   "eval_steps": 500,
-  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2954,6 +2954,286 @@
       "learning_rate": 0.0001738967827178286,
       "loss": 0.9547,
       "step": 2100
     }
   ],
   "logging_steps": 5,
@@ -2961,7 +3241,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
-  "total_flos": 2.953144006936101e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.34152498329497366,
   "eval_steps": 500,
+  "global_step": 2300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001738967827178286,
       "loss": 0.9547,
       "step": 2100
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 0.333984375,
+      "learning_rate": 0.00017372189614169947,
+      "loss": 0.966,
+      "step": 2105
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.00017354651423998733,
+      "loss": 0.9754,
+      "step": 2110
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 0.341796875,
+      "learning_rate": 0.00017337063819105496,
+      "loss": 0.928,
+      "step": 2115
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 0.337890625,
+      "learning_rate": 0.00017319426917658537,
+      "loss": 0.9721,
+      "step": 2120
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.3515625,
+      "learning_rate": 0.00017301740838157362,
+      "loss": 0.9716,
+      "step": 2125
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.34375,
+      "learning_rate": 0.00017284005699431896,
+      "loss": 0.9606,
+      "step": 2130
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.341796875,
+      "learning_rate": 0.000172662216206417,
+      "loss": 0.9619,
+      "step": 2135
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.3359375,
+      "learning_rate": 0.00017248388721275129,
+      "loss": 0.9236,
+      "step": 2140
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.333984375,
+      "learning_rate": 0.00017230507121148575,
+      "loss": 0.9592,
+      "step": 2145
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.3359375,
+      "learning_rate": 0.00017212576940405647,
+      "loss": 0.9507,
+      "step": 2150
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.369140625,
+      "learning_rate": 0.00017194598299516338,
+      "loss": 0.9622,
+      "step": 2155
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.00017176571319276257,
+      "loss": 0.9527,
+      "step": 2160
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.330078125,
+      "learning_rate": 0.00017158496120805788,
+      "loss": 0.9426,
+      "step": 2165
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.00017140372825549284,
+      "loss": 0.9517,
+      "step": 2170
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.33984375,
+      "learning_rate": 0.00017122201555274261,
+      "loss": 0.9285,
+      "step": 2175
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.33984375,
+      "learning_rate": 0.00017103982432070563,
+      "loss": 0.9671,
+      "step": 2180
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.34375,
+      "learning_rate": 0.00017085715578349557,
+      "loss": 0.9375,
+      "step": 2185
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.333984375,
+      "learning_rate": 0.00017067401116843296,
+      "loss": 0.954,
+      "step": 2190
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.345703125,
+      "learning_rate": 0.0001704903917060371,
+      "loss": 0.9486,
+      "step": 2195
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.337890625,
+      "learning_rate": 0.00017030629863001764,
+      "loss": 0.9823,
+      "step": 2200
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.330078125,
+      "learning_rate": 0.0001701217331772664,
+      "loss": 0.9401,
+      "step": 2205
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.34375,
+      "learning_rate": 0.00016993669658784904,
+      "loss": 0.9571,
+      "step": 2210
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.328125,
+      "learning_rate": 0.0001697511901049967,
+      "loss": 0.9801,
+      "step": 2215
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.3359375,
+      "learning_rate": 0.00016956521497509764,
+      "loss": 0.943,
+      "step": 2220
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.34375,
+      "learning_rate": 0.0001693787724476889,
+      "loss": 0.973,
+      "step": 2225
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.345703125,
+      "learning_rate": 0.00016919186377544788,
+      "loss": 0.9662,
+      "step": 2230
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.353515625,
+      "learning_rate": 0.00016900449021418394,
+      "loss": 0.9618,
+      "step": 2235
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.33984375,
+      "learning_rate": 0.00016881665302282995,
+      "loss": 0.9454,
+      "step": 2240
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.3359375,
+      "learning_rate": 0.00016862835346343385,
+      "loss": 0.9414,
+      "step": 2245
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.00016843959280115015,
+      "loss": 0.9437,
+      "step": 2250
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.353515625,
+      "learning_rate": 0.00016825037230423139,
+      "loss": 0.9761,
+      "step": 2255
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.345703125,
+      "learning_rate": 0.00016806069324401977,
+      "loss": 0.9458,
+      "step": 2260
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.33203125,
+      "learning_rate": 0.00016787055689493837,
+      "loss": 0.9676,
+      "step": 2265
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.341796875,
+      "learning_rate": 0.00016767996453448283,
+      "loss": 0.9729,
+      "step": 2270
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.33984375,
+      "learning_rate": 0.00016748891744321263,
+      "loss": 0.9613,
+      "step": 2275
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.34765625,
+      "learning_rate": 0.0001672974169047425,
+      "loss": 0.9453,
+      "step": 2280
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.33203125,
+      "learning_rate": 0.00016710546420573377,
+      "loss": 0.9538,
+      "step": 2285
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.00016691306063588583,
+      "loss": 0.9484,
+      "step": 2290
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.3359375,
+      "learning_rate": 0.0001667202074879274,
+      "loss": 0.9501,
+      "step": 2295
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 0.3515625,
+      "learning_rate": 0.00016652690605760775,
+      "loss": 0.9739,
+      "step": 2300
     }
   ],
   "logging_steps": 5,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
+  "total_flos": 3.2343958171460567e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null