Training in progress, step 10000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb42262712eb0446298aefaf9502d1bce878381fa4256b4c412cb875cf7676dd
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9da6829b1edfacc61441699b4ac6d5dc6abb737be9152be8f29e5862abecd54
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37b61fef6d8dab3892dcb676937372c6938b18c4b8be84f3a00936c78dd241b6
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d9c41bcb1f7e3d0ff7cf1e9246c52eba5532bd32a5af7bbe5d88c8501561fc3
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:482021b320968c1aef3bb227f66c018b401e7317860a8a4bae46f36ed2c71427
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0f02b717c272316648da49ca6391d63601d6d8e37a3b73ce0655aa44e0b1efd
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4784f3b1ac308d4093c525f58ebfb1ed5c4e7ca17828bd58e2e6a8e2baed20b5
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:53471871a37f3cc35b4a656a6f0cfda18046c304a91d9bf8b29b14eea2ccc156
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.6050008447372868,
   "eval_steps": 500,
-  "global_step": 9500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6817,6 +6817,364 @@
       "eval_samples_per_second": 129.654,
       "eval_steps_per_second": 2.723,
       "step": 9500
     }
   ],
   "logging_steps": 10,
@@ -6836,7 +7194,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.177318894608056e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.6894745734076704,
   "eval_steps": 500,
+  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 129.654,
       "eval_steps_per_second": 2.723,
       "step": 9500
+    },
+    {
+      "epoch": 1.6066903193106943,
+      "grad_norm": 0.4606820344924927,
+      "learning_rate": 3.960727047894527e-05,
+      "loss": 4.359199523925781,
+      "step": 9510
+    },
+    {
+      "epoch": 1.608379793884102,
+      "grad_norm": 0.48804476857185364,
+      "learning_rate": 3.928353538569023e-05,
+      "loss": 4.32340087890625,
+      "step": 9520
+    },
+    {
+      "epoch": 1.6100692684575098,
+      "grad_norm": 0.4648666977882385,
+      "learning_rate": 3.8960929302853074e-05,
+      "loss": 4.31898078918457,
+      "step": 9530
+    },
+    {
+      "epoch": 1.6117587430309173,
+      "grad_norm": 0.48212724924087524,
+      "learning_rate": 3.863945552014892e-05,
+      "loss": 4.320017242431641,
+      "step": 9540
+    },
+    {
+      "epoch": 1.6134482176043252,
+      "grad_norm": 0.46979817748069763,
+      "learning_rate": 3.831911731574648e-05,
+      "loss": 4.365304946899414,
+      "step": 9550
+    },
+    {
+      "epoch": 1.6151376921777327,
+      "grad_norm": 0.47188496589660645,
+      "learning_rate": 3.799991795623471e-05,
+      "loss": 4.329359817504883,
+      "step": 9560
+    },
+    {
+      "epoch": 1.6168271667511402,
+      "grad_norm": 0.47442197799682617,
+      "learning_rate": 3.7681860696589216e-05,
+      "loss": 4.333200836181641,
+      "step": 9570
+    },
+    {
+      "epoch": 1.6185166413245482,
+      "grad_norm": 0.46460849046707153,
+      "learning_rate": 3.7364948780139344e-05,
+      "loss": 4.2955772399902346,
+      "step": 9580
+    },
+    {
+      "epoch": 1.6202061158979557,
+      "grad_norm": 0.4687038064002991,
+      "learning_rate": 3.70491854385351e-05,
+      "loss": 4.287596893310547,
+      "step": 9590
+    },
+    {
+      "epoch": 1.6218955904713634,
+      "grad_norm": 0.4717998802661896,
+      "learning_rate": 3.673457389171401e-05,
+      "loss": 4.3026374816894535,
+      "step": 9600
+    },
+    {
+      "epoch": 1.6235850650447712,
+      "grad_norm": 0.47237226366996765,
+      "learning_rate": 3.642111734786833e-05,
+      "loss": 4.3385662078857425,
+      "step": 9610
+    },
+    {
+      "epoch": 1.6252745396181787,
+      "grad_norm": 0.48337623476982117,
+      "learning_rate": 3.610881900341261e-05,
+      "loss": 4.29266357421875,
+      "step": 9620
+    },
+    {
+      "epoch": 1.6269640141915864,
+      "grad_norm": 0.46639102697372437,
+      "learning_rate": 3.579768204295063e-05,
+      "loss": 4.3327476501464846,
+      "step": 9630
+    },
+    {
+      "epoch": 1.6286534887649942,
+      "grad_norm": 0.4697898030281067,
+      "learning_rate": 3.54877096392434e-05,
+      "loss": 4.336753463745117,
+      "step": 9640
+    },
+    {
+      "epoch": 1.6303429633384017,
+      "grad_norm": 0.46316251158714294,
+      "learning_rate": 3.5178904953176354e-05,
+      "loss": 4.306925964355469,
+      "step": 9650
+    },
+    {
+      "epoch": 1.6320324379118094,
+      "grad_norm": 0.4708452820777893,
+      "learning_rate": 3.487127113372755e-05,
+      "loss": 4.326674270629883,
+      "step": 9660
+    },
+    {
+      "epoch": 1.6337219124852171,
+      "grad_norm": 0.4727766811847687,
+      "learning_rate": 3.4564811317935235e-05,
+      "loss": 4.304772186279297,
+      "step": 9670
+    },
+    {
+      "epoch": 1.6354113870586247,
+      "grad_norm": 0.47584787011146545,
+      "learning_rate": 3.4259528630865995e-05,
+      "loss": 4.3285400390625,
+      "step": 9680
+    },
+    {
+      "epoch": 1.6371008616320324,
+      "grad_norm": 0.4718579947948456,
+      "learning_rate": 3.3955426185582826e-05,
+      "loss": 4.310879135131836,
+      "step": 9690
+    },
+    {
+      "epoch": 1.6387903362054401,
+      "grad_norm": 0.466880738735199,
+      "learning_rate": 3.365250708311352e-05,
+      "loss": 4.325877380371094,
+      "step": 9700
+    },
+    {
+      "epoch": 1.6404798107788476,
+      "grad_norm": 0.46377378702163696,
+      "learning_rate": 3.335077441241895e-05,
+      "loss": 4.307848358154297,
+      "step": 9710
+    },
+    {
+      "epoch": 1.6421692853522556,
+      "grad_norm": 0.718170166015625,
+      "learning_rate": 3.305023125036148e-05,
+      "loss": 4.313734436035157,
+      "step": 9720
+    },
+    {
+      "epoch": 1.643858759925663,
+      "grad_norm": 0.463375985622406,
+      "learning_rate": 3.275088066167369e-05,
+      "loss": 4.3089752197265625,
+      "step": 9730
+    },
+    {
+      "epoch": 1.6455482344990708,
+      "grad_norm": 0.47580841183662415,
+      "learning_rate": 3.245272569892727e-05,
+      "loss": 4.3522186279296875,
+      "step": 9740
+    },
+    {
+      "epoch": 1.6472377090724786,
+      "grad_norm": 0.46081092953681946,
+      "learning_rate": 3.215576940250155e-05,
+      "loss": 4.3113548278808596,
+      "step": 9750
+    },
+    {
+      "epoch": 1.648927183645886,
+      "grad_norm": 0.47329118847846985,
+      "learning_rate": 3.1860014800552734e-05,
+      "loss": 4.3111930847167965,
+      "step": 9760
+    },
+    {
+      "epoch": 1.6506166582192938,
+      "grad_norm": 0.4813630282878876,
+      "learning_rate": 3.15654649089831e-05,
+      "loss": 4.312236404418945,
+      "step": 9770
+    },
+    {
+      "epoch": 1.6523061327927016,
+      "grad_norm": 0.5134222507476807,
+      "learning_rate": 3.1272122731409916e-05,
+      "loss": 4.3267356872558596,
+      "step": 9780
+    },
+    {
+      "epoch": 1.653995607366109,
+      "grad_norm": 0.4687715768814087,
+      "learning_rate": 3.097999125913518e-05,
+      "loss": 4.311066055297852,
+      "step": 9790
+    },
+    {
+      "epoch": 1.6556850819395168,
+      "grad_norm": 0.4736403524875641,
+      "learning_rate": 3.068907347111485e-05,
+      "loss": 4.3107654571533205,
+      "step": 9800
+    },
+    {
+      "epoch": 1.6573745565129245,
+      "grad_norm": 0.4813496172428131,
+      "learning_rate": 3.0399372333928644e-05,
+      "loss": 4.314376449584961,
+      "step": 9810
+    },
+    {
+      "epoch": 1.659064031086332,
+      "grad_norm": 0.49036741256713867,
+      "learning_rate": 3.0110890801749627e-05,
+      "loss": 4.307826995849609,
+      "step": 9820
+    },
+    {
+      "epoch": 1.6607535056597398,
+      "grad_norm": 0.4669703543186188,
+      "learning_rate": 2.982363181631418e-05,
+      "loss": 4.303530883789063,
+      "step": 9830
+    },
+    {
+      "epoch": 1.6624429802331475,
+      "grad_norm": 0.4788713753223419,
+      "learning_rate": 2.9537598306892103e-05,
+      "loss": 4.308844375610351,
+      "step": 9840
+    },
+    {
+      "epoch": 1.664132454806555,
+      "grad_norm": 0.5307414531707764,
+      "learning_rate": 2.9252793190256447e-05,
+      "loss": 4.285565567016602,
+      "step": 9850
+    },
+    {
+      "epoch": 1.665821929379963,
+      "grad_norm": 0.4659578502178192,
+      "learning_rate": 2.896921937065419e-05,
+      "loss": 4.313910675048828,
+      "step": 9860
+    },
+    {
+      "epoch": 1.6675114039533705,
+      "grad_norm": 0.46300381422042847,
+      "learning_rate": 2.8686879739776137e-05,
+      "loss": 4.31811408996582,
+      "step": 9870
+    },
+    {
+      "epoch": 1.669200878526778,
+      "grad_norm": 0.4717971086502075,
+      "learning_rate": 2.8405777176727924e-05,
+      "loss": 4.318044662475586,
+      "step": 9880
+    },
+    {
+      "epoch": 1.670890353100186,
+      "grad_norm": 0.45347994565963745,
+      "learning_rate": 2.8125914548000243e-05,
+      "loss": 4.295824432373047,
+      "step": 9890
+    },
+    {
+      "epoch": 1.6725798276735935,
+      "grad_norm": 0.4703952670097351,
+      "learning_rate": 2.7847294707439828e-05,
+      "loss": 4.28874626159668,
+      "step": 9900
+    },
+    {
+      "epoch": 1.6742693022470012,
+      "grad_norm": 0.4726548194885254,
+      "learning_rate": 2.7569920496220398e-05,
+      "loss": 4.304931259155273,
+      "step": 9910
+    },
+    {
+      "epoch": 1.675958776820409,
+      "grad_norm": 0.47394225001335144,
+      "learning_rate": 2.729379474281352e-05,
+      "loss": 4.3050182342529295,
+      "step": 9920
+    },
+    {
+      "epoch": 1.6776482513938165,
+      "grad_norm": 0.49833500385284424,
+      "learning_rate": 2.701892026295979e-05,
+      "loss": 4.331858062744141,
+      "step": 9930
+    },
+    {
+      "epoch": 1.6793377259672242,
+      "grad_norm": 0.4709710478782654,
+      "learning_rate": 2.6745299859640318e-05,
+      "loss": 4.332807159423828,
+      "step": 9940
+    },
+    {
+      "epoch": 1.681027200540632,
+      "grad_norm": 0.48379939794540405,
+      "learning_rate": 2.6472936323047972e-05,
+      "loss": 4.311476516723633,
+      "step": 9950
+    },
+    {
+      "epoch": 1.6827166751140394,
+      "grad_norm": 0.475941926240921,
+      "learning_rate": 2.6201832430558866e-05,
+      "loss": 4.314311599731445,
+      "step": 9960
+    },
+    {
+      "epoch": 1.6844061496874472,
+      "grad_norm": 0.4633561372756958,
+      "learning_rate": 2.5931990946704206e-05,
+      "loss": 4.312783050537109,
+      "step": 9970
+    },
+    {
+      "epoch": 1.686095624260855,
+      "grad_norm": 0.4624374806880951,
+      "learning_rate": 2.5663414623141943e-05,
+      "loss": 4.315936279296875,
+      "step": 9980
+    },
+    {
+      "epoch": 1.6877850988342624,
+      "grad_norm": 0.46104687452316284,
+      "learning_rate": 2.5396106198628947e-05,
+      "loss": 4.317576217651367,
+      "step": 9990
+    },
+    {
+      "epoch": 1.6894745734076704,
+      "grad_norm": 0.46486878395080566,
+      "learning_rate": 2.5130068398992716e-05,
+      "loss": 4.3148681640625,
+      "step": 10000
+    },
+    {
+      "epoch": 1.6894745734076704,
+      "eval_loss": 4.282918930053711,
+      "eval_runtime": 3.8826,
+      "eval_samples_per_second": 257.563,
+      "eval_steps_per_second": 5.409,
+      "step": 10000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.344547305037496e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null