Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

adapter_model.safetensors +1 -1
latest +1 -1
rng_state_0.pth +1 -1
rng_state_1.pth +1 -1
rng_state_2.pth +1 -1
rng_state_3.pth +1 -1
rng_state_4.pth +1 -1
rng_state_5.pth +1 -1
rng_state_6.pth +1 -1
rng_state_7.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +283 -3

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a4d62d92c4b35ef29608ae3d385e2dbbd3bd2e44e1e9c274952fed6b383e48e
 size 469105640

 version https://git-lfs.github.com/spec/v1
+oid sha256:86a85eb22246650e7064af73ff633a6e5db5926d868fcace188a50b2339da322
 size 469105640

latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step800~~


1	+ global_step1200

rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a343dd93cd21bdc90d289f3ca48ab49de24b9f748799acb23184c62f5d2b505a
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d5c385708fb05661a4a8830505a505fe5e9b78fa137b27d24db7b55c3109e66
 size 15920

rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e78f906506770f43e59c54fef023c80264ba4db0c95909db5aa497d4875f1e32
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:54a0e9ba9c486d9f2d3a1e1833dde7d1e5e24be602bbe39591f9ce42d6c1d9a2
 size 15920

rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1801fc92aac20f4b2cd6c241493cc948c1ce8800b14797fdefee2b1f494d7b9f
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f01bf9dc1bbe7ddeac01f70fb90763087099c832e1eb46ff1d0a18b90cb42f0
 size 15920

rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb7617cd6b75e491a348879fed069c07f2a2f52647a39a51812a3039227e011e
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:046d1a25a22d9a76cfa2bcfa7cf0f015d6f3fdda3ed5ca4852edd19999e520a4
 size 15920

rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2aa9e524787be3fd2130cbb1a33ce0d917090fdf18cf026905505c6c1f67c64
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bcd470f6b81981751baca34844a802fa863605a53d6e5c33cf9b95de794f264
 size 15920

rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:048ab222ccc631300416028b25a3132d82f849b7a32356b338d26e9eef8ae3fb
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:633063a8621dab82d328021c44ce11580d5ca0bed894eac1db835cd5550054bf
 size 15920

rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9670c00e2e4b001bb5f458d57d181a0ae7bf4587cc05947eef4b84e438e4178
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:831f5f2bccebc23f6a90b480779eb8ce9444452ff9525537f298c2fe07f58208
 size 15920

rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da5ed04d69158bc88c3dc621620dae175703ddfab9924471e44fc939b4c4386c
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffb457efa4dbdf26174186fe237d62d8862065efcd11448470f26066ab373ab0
 size 15920

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:901dc2645bb26444439097220bce3343e3d0a315e276f271fbd122fb8170ad53
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:748e88435c8297b9cdf1b7b8ccd7e64d6ff7fe7e782a39c2866f34b8b9e4e95f
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12325706802249442,
   "eval_steps": 500,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -567,6 +567,286 @@
       "learning_rate": 2.9041681386148966e-05,
       "loss": 0.6447,
       "step": 800
     }
   ],
   "logging_steps": 10,
@@ -586,7 +866,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.289521573986304e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.18488560203374163,
   "eval_steps": 500,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.9041681386148966e-05,
       "loss": 0.6447,
       "step": 800
+    },
+    {
+      "epoch": 0.1247977813727756,
+      "grad_norm": 5.542855195057769,
+      "learning_rate": 2.9015718212400918e-05,
+      "loss": 0.6486,
+      "step": 810
+    },
+    {
+      "epoch": 0.12633849472305678,
+      "grad_norm": 3.60243198052599,
+      "learning_rate": 2.8989419943632992e-05,
+      "loss": 0.6552,
+      "step": 820
+    },
+    {
+      "epoch": 0.12787920807333797,
+      "grad_norm": 10.54124596601907,
+      "learning_rate": 2.896278720859776e-05,
+      "loss": 0.667,
+      "step": 830
+    },
+    {
+      "epoch": 0.12941992142361913,
+      "grad_norm": 8.542824965925352,
+      "learning_rate": 2.8935820644044398e-05,
+      "loss": 0.697,
+      "step": 840
+    },
+    {
+      "epoch": 0.13096063477390033,
+      "grad_norm": 6.327341591650607,
+      "learning_rate": 2.890852089470343e-05,
+      "loss": 0.65,
+      "step": 850
+    },
+    {
+      "epoch": 0.1325013481241815,
+      "grad_norm": 7.687827002540841,
+      "learning_rate": 2.888088861327135e-05,
+      "loss": 0.6435,
+      "step": 860
+    },
+    {
+      "epoch": 0.13404206147446268,
+      "grad_norm": 3.356453126127434,
+      "learning_rate": 2.885292446039499e-05,
+      "loss": 0.6721,
+      "step": 870
+    },
+    {
+      "epoch": 0.13558277482474385,
+      "grad_norm": 8.406402473059597,
+      "learning_rate": 2.8824629104655736e-05,
+      "loss": 0.6694,
+      "step": 880
+    },
+    {
+      "epoch": 0.13712348817502504,
+      "grad_norm": 11.653019434398818,
+      "learning_rate": 2.8796003222553558e-05,
+      "loss": 0.6531,
+      "step": 890
+    },
+    {
+      "epoch": 0.13866420152530623,
+      "grad_norm": 6.371551478518258,
+      "learning_rate": 2.8767047498490798e-05,
+      "loss": 0.6568,
+      "step": 900
+    },
+    {
+      "epoch": 0.1402049148755874,
+      "grad_norm": 26.71523757066426,
+      "learning_rate": 2.8737762624755846e-05,
+      "loss": 0.6857,
+      "step": 910
+    },
+    {
+      "epoch": 0.1417456282258686,
+      "grad_norm": 4.417578021376778,
+      "learning_rate": 2.8708149301506573e-05,
+      "loss": 0.665,
+      "step": 920
+    },
+    {
+      "epoch": 0.14328634157614975,
+      "grad_norm": 5.335327649767265,
+      "learning_rate": 2.8678208236753577e-05,
+      "loss": 0.7014,
+      "step": 930
+    },
+    {
+      "epoch": 0.14482705492643094,
+      "grad_norm": 4.155520038033631,
+      "learning_rate": 2.8647940146343278e-05,
+      "loss": 0.6767,
+      "step": 940
+    },
+    {
+      "epoch": 0.1463677682767121,
+      "grad_norm": 3.815046866792752,
+      "learning_rate": 2.86173457539408e-05,
+      "loss": 0.6557,
+      "step": 950
+    },
+    {
+      "epoch": 0.1479084816269933,
+      "grad_norm": 3.7651811393538552,
+      "learning_rate": 2.8586425791012648e-05,
+      "loss": 0.675,
+      "step": 960
+    },
+    {
+      "epoch": 0.1494491949772745,
+      "grad_norm": 4.382966553943605,
+      "learning_rate": 2.8555180996809246e-05,
+      "loss": 0.6313,
+      "step": 970
+    },
+    {
+      "epoch": 0.15098990832755566,
+      "grad_norm": 10.103890347717131,
+      "learning_rate": 2.8523612118347245e-05,
+      "loss": 0.645,
+      "step": 980
+    },
+    {
+      "epoch": 0.15253062167783685,
+      "grad_norm": 6.294074332584702,
+      "learning_rate": 2.8491719910391685e-05,
+      "loss": 0.659,
+      "step": 990
+    },
+    {
+      "epoch": 0.154071335028118,
+      "grad_norm": 2.5808531227457565,
+      "learning_rate": 2.845950513543791e-05,
+      "loss": 0.6688,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1556120483783992,
+      "grad_norm": 2.927888770737132,
+      "learning_rate": 2.842696856369338e-05,
+      "loss": 0.6381,
+      "step": 1010
+    },
+    {
+      "epoch": 0.15715276172868037,
+      "grad_norm": 14.062433268070832,
+      "learning_rate": 2.8394110973059233e-05,
+      "loss": 0.6946,
+      "step": 1020
+    },
+    {
+      "epoch": 0.15869347507896156,
+      "grad_norm": 2.4470437840581054,
+      "learning_rate": 2.8360933149111695e-05,
+      "loss": 0.6844,
+      "step": 1030
+    },
+    {
+      "epoch": 0.16023418842924275,
+      "grad_norm": 3.8078577514013343,
+      "learning_rate": 2.8327435885083292e-05,
+      "loss": 0.64,
+      "step": 1040
+    },
+    {
+      "epoch": 0.16177490177952392,
+      "grad_norm": 25.680768915471432,
+      "learning_rate": 2.8293619981843887e-05,
+      "loss": 0.6329,
+      "step": 1050
+    },
+    {
+      "epoch": 0.1633156151298051,
+      "grad_norm": 5.0369491995422715,
+      "learning_rate": 2.8259486247881537e-05,
+      "loss": 0.6604,
+      "step": 1060
+    },
+    {
+      "epoch": 0.16485632848008627,
+      "grad_norm": 3.9026521516961608,
+      "learning_rate": 2.8225035499283155e-05,
+      "loss": 0.6564,
+      "step": 1070
+    },
+    {
+      "epoch": 0.16639704183036746,
+      "grad_norm": 4.945417598915296,
+      "learning_rate": 2.8190268559715017e-05,
+      "loss": 0.6655,
+      "step": 1080
+    },
+    {
+      "epoch": 0.16793775518064863,
+      "grad_norm": 3.222466850494984,
+      "learning_rate": 2.815518626040304e-05,
+      "loss": 0.6603,
+      "step": 1090
+    },
+    {
+      "epoch": 0.16947846853092982,
+      "grad_norm": 6.539136587655263,
+      "learning_rate": 2.811978944011293e-05,
+      "loss": 0.7036,
+      "step": 1100
+    },
+    {
+      "epoch": 0.171019181881211,
+      "grad_norm": 8.14211829139052,
+      "learning_rate": 2.8084078945130117e-05,
+      "loss": 0.6356,
+      "step": 1110
+    },
+    {
+      "epoch": 0.17255989523149218,
+      "grad_norm": 4.1954200205175605,
+      "learning_rate": 2.8048055629239543e-05,
+      "loss": 0.6591,
+      "step": 1120
+    },
+    {
+      "epoch": 0.17410060858177337,
+      "grad_norm": 4.333940585698679,
+      "learning_rate": 2.8011720353705224e-05,
+      "loss": 0.6575,
+      "step": 1130
+    },
+    {
+      "epoch": 0.17564132193205453,
+      "grad_norm": 4.2805487065333,
+      "learning_rate": 2.797507398724966e-05,
+      "loss": 0.6624,
+      "step": 1140
+    },
+    {
+      "epoch": 0.17718203528233573,
+      "grad_norm": 4.7173213185412,
+      "learning_rate": 2.7938117406033086e-05,
+      "loss": 0.623,
+      "step": 1150
+    },
+    {
+      "epoch": 0.1787227486326169,
+      "grad_norm": 99.71383370833006,
+      "learning_rate": 2.7900851493632508e-05,
+      "loss": 0.6591,
+      "step": 1160
+    },
+    {
+      "epoch": 0.18026346198289808,
+      "grad_norm": 3.6747989781213954,
+      "learning_rate": 2.786327714102058e-05,
+      "loss": 0.692,
+      "step": 1170
+    },
+    {
+      "epoch": 0.18180417533317927,
+      "grad_norm": 2.5009166944220604,
+      "learning_rate": 2.78253952465443e-05,
+      "loss": 0.6614,
+      "step": 1180
+    },
+    {
+      "epoch": 0.18334488868346044,
+      "grad_norm": 3.144011687958325,
+      "learning_rate": 2.7787206715903543e-05,
+      "loss": 0.6406,
+      "step": 1190
+    },
+    {
+      "epoch": 0.18488560203374163,
+      "grad_norm": 4.063731315051197,
+      "learning_rate": 2.7748712462129396e-05,
+      "loss": 0.6444,
+      "step": 1200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.0934282360979456e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null