Training in progress, step 22800, checkpoint

Browse files

Files changed (5) hide show

.gitattributes +1 -1
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +273 -3

.gitattributes CHANGED Viewed

@@ -34,4 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
-checkpoint-22800/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce5bc62b462cec671c252739f2dc62bd7fba894afdb3be9a2d2c7507560195bc
 size 3541119728

 version https://git-lfs.github.com/spec/v1
+oid sha256:237f5c17c55df679a8e8f4a65ad9de09e2a99a2eaba9876aace075096abcfb63
 size 3541119728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ed691e41a2ff7f7d475bdb81ca2c3a7572376d28d14ce571a0d4355792b6e26
 size 778374186

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cc82f803cbf27fa5d02dc20006fbaf09405895a4d61a6169c832576c2db2940
 size 778374186

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21ea56942b563c971fd85c03a66bbd99dd86b68826dc8364e7984139050ee071
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:959293b55cb243a7a3af582584a0698f2aeb95373b8b27dd72c03d8f0bdce376
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0737302998126916,
   "eval_steps": 500,
-  "global_step": 22500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -20258,6 +20258,276 @@
       "mean_token_accuracy": 0.8939898759126663,
       "num_tokens": 37361844.0,
       "step": 22500
     }
   ],
   "logging_steps": 10,
@@ -20277,7 +20547,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.413914307123446e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0880468628831172,
   "eval_steps": 500,
+  "global_step": 22800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.8939898759126663,
       "num_tokens": 37361844.0,
       "step": 22500
+    },
+    {
+      "epoch": 1.074207518581706,
+      "grad_norm": 0.37823808193206787,
+      "learning_rate": 9.258410880458124e-06,
+      "loss": 0.6272,
+      "mean_token_accuracy": 0.8770518571138382,
+      "num_tokens": 37379000.0,
+      "step": 22510
+    },
+    {
+      "epoch": 1.07468473735072,
+      "grad_norm": 0.4005347788333893,
+      "learning_rate": 9.253638749701743e-06,
+      "loss": 0.6015,
+      "mean_token_accuracy": 0.8748599126935005,
+      "num_tokens": 37395773.0,
+      "step": 22520
+    },
+    {
+      "epoch": 1.075161956119734,
+      "grad_norm": 0.39781296253204346,
+      "learning_rate": 9.24886661894536e-06,
+      "loss": 0.6869,
+      "mean_token_accuracy": 0.8716883912682534,
+      "num_tokens": 37415210.0,
+      "step": 22530
+    },
+    {
+      "epoch": 1.0756391748887484,
+      "grad_norm": 0.39247405529022217,
+      "learning_rate": 9.244094488188978e-06,
+      "loss": 0.7395,
+      "mean_token_accuracy": 0.8584993034601212,
+      "num_tokens": 37432118.0,
+      "step": 22540
+    },
+    {
+      "epoch": 1.0761163936577625,
+      "grad_norm": 0.3580706715583801,
+      "learning_rate": 9.239322357432594e-06,
+      "loss": 0.79,
+      "mean_token_accuracy": 0.8506992489099503,
+      "num_tokens": 37452181.0,
+      "step": 22550
+    },
+    {
+      "epoch": 1.0765936124267768,
+      "grad_norm": 0.3170486092567444,
+      "learning_rate": 9.234550226676213e-06,
+      "loss": 0.6072,
+      "mean_token_accuracy": 0.8733228012919426,
+      "num_tokens": 37468470.0,
+      "step": 22560
+    },
+    {
+      "epoch": 1.0770708311957908,
+      "grad_norm": 0.44699838757514954,
+      "learning_rate": 9.22977809591983e-06,
+      "loss": 0.5566,
+      "mean_token_accuracy": 0.8752368673682213,
+      "num_tokens": 37485065.0,
+      "step": 22570
+    },
+    {
+      "epoch": 1.0775480499648051,
+      "grad_norm": 0.5032857656478882,
+      "learning_rate": 9.225005965163446e-06,
+      "loss": 0.656,
+      "mean_token_accuracy": 0.8768767550587654,
+      "num_tokens": 37501662.0,
+      "step": 22580
+    },
+    {
+      "epoch": 1.0780252687338192,
+      "grad_norm": 0.3128605782985687,
+      "learning_rate": 9.220233834407063e-06,
+      "loss": 0.6645,
+      "mean_token_accuracy": 0.8662539958953858,
+      "num_tokens": 37519154.0,
+      "step": 22590
+    },
+    {
+      "epoch": 1.0785024875028335,
+      "grad_norm": 0.3933728039264679,
+      "learning_rate": 9.215461703650681e-06,
+      "loss": 0.5397,
+      "mean_token_accuracy": 0.8856153175234794,
+      "num_tokens": 37534651.0,
+      "step": 22600
+    },
+    {
+      "epoch": 1.0789797062718476,
+      "grad_norm": 0.5340325832366943,
+      "learning_rate": 9.210689572894298e-06,
+      "loss": 0.6269,
+      "mean_token_accuracy": 0.8772767931222916,
+      "num_tokens": 37551794.0,
+      "step": 22610
+    },
+    {
+      "epoch": 1.0794569250408619,
+      "grad_norm": 0.3841538429260254,
+      "learning_rate": 9.205917442137915e-06,
+      "loss": 0.6175,
+      "mean_token_accuracy": 0.8755568400025368,
+      "num_tokens": 37567508.0,
+      "step": 22620
+    },
+    {
+      "epoch": 1.079934143809876,
+      "grad_norm": 0.37845683097839355,
+      "learning_rate": 9.201145311381533e-06,
+      "loss": 0.5757,
+      "mean_token_accuracy": 0.879976649582386,
+      "num_tokens": 37582508.0,
+      "step": 22630
+    },
+    {
+      "epoch": 1.0804113625788903,
+      "grad_norm": 0.3559890687465668,
+      "learning_rate": 9.19637318062515e-06,
+      "loss": 0.8135,
+      "mean_token_accuracy": 0.8407615974545479,
+      "num_tokens": 37601326.0,
+      "step": 22640
+    },
+    {
+      "epoch": 1.0808885813479043,
+      "grad_norm": 0.32038992643356323,
+      "learning_rate": 9.191601049868766e-06,
+      "loss": 0.5877,
+      "mean_token_accuracy": 0.885163950920105,
+      "num_tokens": 37616610.0,
+      "step": 22650
+    },
+    {
+      "epoch": 1.0813658001169186,
+      "grad_norm": 0.366234689950943,
+      "learning_rate": 9.186828919112385e-06,
+      "loss": 0.6438,
+      "mean_token_accuracy": 0.8744160294532776,
+      "num_tokens": 37633602.0,
+      "step": 22660
+    },
+    {
+      "epoch": 1.0818430188859327,
+      "grad_norm": 0.32627347111701965,
+      "learning_rate": 9.182056788356001e-06,
+      "loss": 0.6948,
+      "mean_token_accuracy": 0.8610922127962113,
+      "num_tokens": 37651592.0,
+      "step": 22670
+    },
+    {
+      "epoch": 1.082320237654947,
+      "grad_norm": 0.3474673628807068,
+      "learning_rate": 9.17728465759962e-06,
+      "loss": 0.6262,
+      "mean_token_accuracy": 0.8750404015183448,
+      "num_tokens": 37668010.0,
+      "step": 22680
+    },
+    {
+      "epoch": 1.082797456423961,
+      "grad_norm": 0.3955213129520416,
+      "learning_rate": 9.172512526843236e-06,
+      "loss": 0.5588,
+      "mean_token_accuracy": 0.8861236184835434,
+      "num_tokens": 37684538.0,
+      "step": 22690
+    },
+    {
+      "epoch": 1.0832746751929754,
+      "grad_norm": 0.4451896846294403,
+      "learning_rate": 9.167740396086855e-06,
+      "loss": 0.5774,
+      "mean_token_accuracy": 0.8859012797474861,
+      "num_tokens": 37700694.0,
+      "step": 22700
+    },
+    {
+      "epoch": 1.0837518939619895,
+      "grad_norm": 0.41938453912734985,
+      "learning_rate": 9.162968265330471e-06,
+      "loss": 0.6575,
+      "mean_token_accuracy": 0.8762999802827836,
+      "num_tokens": 37716717.0,
+      "step": 22710
+    },
+    {
+      "epoch": 1.0842291127310038,
+      "grad_norm": 0.38627904653549194,
+      "learning_rate": 9.158196134574088e-06,
+      "loss": 0.6263,
+      "mean_token_accuracy": 0.8728866443037987,
+      "num_tokens": 37734196.0,
+      "step": 22720
+    },
+    {
+      "epoch": 1.0847063315000178,
+      "grad_norm": 0.39531171321868896,
+      "learning_rate": 9.153424003817706e-06,
+      "loss": 0.5782,
+      "mean_token_accuracy": 0.8879543572664261,
+      "num_tokens": 37750684.0,
+      "step": 22730
+    },
+    {
+      "epoch": 1.0851835502690321,
+      "grad_norm": 0.3783516585826874,
+      "learning_rate": 9.148651873061323e-06,
+      "loss": 0.599,
+      "mean_token_accuracy": 0.8701232433319092,
+      "num_tokens": 37767146.0,
+      "step": 22740
+    },
+    {
+      "epoch": 1.0856607690380462,
+      "grad_norm": 0.39319974184036255,
+      "learning_rate": 9.14387974230494e-06,
+      "loss": 0.6413,
+      "mean_token_accuracy": 0.8686925515532493,
+      "num_tokens": 37787034.0,
+      "step": 22750
+    },
+    {
+      "epoch": 1.0861379878070605,
+      "grad_norm": 0.41720524430274963,
+      "learning_rate": 9.139107611548556e-06,
+      "loss": 0.669,
+      "mean_token_accuracy": 0.8737372472882271,
+      "num_tokens": 37802820.0,
+      "step": 22760
+    },
+    {
+      "epoch": 1.0866152065760746,
+      "grad_norm": 0.5915963053703308,
+      "learning_rate": 9.134335480792175e-06,
+      "loss": 0.7127,
+      "mean_token_accuracy": 0.8542029947042465,
+      "num_tokens": 37820083.0,
+      "step": 22770
+    },
+    {
+      "epoch": 1.0870924253450889,
+      "grad_norm": 0.48407578468322754,
+      "learning_rate": 9.129563350035791e-06,
+      "loss": 0.6094,
+      "mean_token_accuracy": 0.8686896711587906,
+      "num_tokens": 37836877.0,
+      "step": 22780
+    },
+    {
+      "epoch": 1.087569644114103,
+      "grad_norm": 0.411697119474411,
+      "learning_rate": 9.124791219279408e-06,
+      "loss": 0.5874,
+      "mean_token_accuracy": 0.8753976777195931,
+      "num_tokens": 37852853.0,
+      "step": 22790
+    },
+    {
+      "epoch": 1.0880468628831172,
+      "grad_norm": 0.43069422245025635,
+      "learning_rate": 9.120019088523026e-06,
+      "loss": 0.6337,
+      "mean_token_accuracy": 0.8802076116204262,
+      "num_tokens": 37869204.0,
+      "step": 22800
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.528831995220091e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null