Training in progress, step 130000

Browse files

Files changed (17) hide show

.gitattributes +0 -0
.gitignore +0 -0
README.md +0 -0
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +303 -3
last-checkpoint/training_args.bin +1 -1
merges.txt +0 -0
pytorch_model.bin +1 -1
special_tokens_map.json +0 -0
tokenizer.json +0 -0
tokenizer_config.json +0 -0
training_args.bin +1 -1
vocab.json +0 -0

.gitattributes CHANGED Viewed

File without changes

.gitignore CHANGED Viewed

File without changes

README.md CHANGED Viewed

File without changes

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05e2d269f333c96c29da8075e36a6de506892a84e8ab7a1d79c6b5baf653edf5
 size 586828837

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbf6fc53cf1912d3e7691ef7613cd375aa4ebf7b7ad451c5645721500ea0ccf0
 size 586828837

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cc943ae46672312ee4175b7b0df7b2bcb16bb1598452afd869122102f93e701
 size 146774203

 version https://git-lfs.github.com/spec/v1
+oid sha256:a97d7b725676a32c62a89d7830c299ebb9d3dfbfb1d9ac8f927a0fd779341bb2
 size 146774203

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf3e3ff5ca04195d00ae182843134a34efdb2e565df68413f5842b7a4a84c37b
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:0eb0b61bfcc70468942923a7fe3124d17f4bfdbc8fb34ab21c173f5aa5dd9170
 size 14503

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17a6c740782a206d1a7821b1fbc9827af7a83dbc888bd997056c93056ef861be
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c5b714fbb6e17634404af5447fa5ba38c8fc02f762871048ad92bc1ddf9e592
 size 559

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2812a708855da00be5c7a2b5b6519990cb027a8d82f04f202c74834685819f6
 size 733555848

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e15b10691a90bb623d65c87c7e8ed415b9ec774dd56c0ff077ab98b237c15c1
 size 733555848

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.51566991968441,
-  "global_step": 120000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3606,11 +3606,311 @@
       "learning_rate": 0.003714012897878298,
       "loss": 8.5978,
       "step": 120000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 5,
-  "total_flos": 1.9125959786496e+17,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5586424129914441,
+  "global_step": 130000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.003714012897878298,
       "loss": 8.5978,
       "step": 120000
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0037235754018389664,
+      "loss": 8.5986,
+      "step": 120200
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0037331443821634266,
+      "loss": 8.6062,
+      "step": 120400
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0037427197968771695,
+      "loss": 8.5854,
+      "step": 120600
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0037523016039774605,
+      "loss": 8.5959,
+      "step": 120800
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.003761841804919297,
+      "loss": 8.6054,
+      "step": 121000
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.00377143623923569,
+      "loss": 8.5871,
+      "step": 121200
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0037810369399734253,
+      "loss": 8.5885,
+      "step": 121400
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0037906438650188523,
+      "loss": 8.5805,
+      "step": 121600
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0038002569722310163,
+      "loss": 8.5889,
+      "step": 121800
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 0.0038098762194418477,
+      "loss": 8.5866,
+      "step": 122000
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038195015644563388,
+      "loss": 8.5782,
+      "step": 122200
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038291329650527338,
+      "loss": 8.579,
+      "step": 122400
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038387703789827194,
+      "loss": 8.5773,
+      "step": 122600
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038484137639716006,
+      "loss": 8.582,
+      "step": 122800
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038580148164719733,
+      "loss": 8.5778,
+      "step": 123000
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038676699873231536,
+      "loss": 8.5789,
+      "step": 123200
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.0038773310024645593,
+      "loss": 8.578,
+      "step": 123400
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.003886997819517974,
+      "loss": 8.5609,
+      "step": 123600
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.003896670396079725,
+      "loss": 8.5707,
+      "step": 123800
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.003906300284101649,
+      "loss": 8.5732,
+      "step": 124000
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.003915984224100703,
+      "loss": 8.5731,
+      "step": 124200
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.003925673796458692,
+      "loss": 8.5496,
+      "step": 124400
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.0039353689586721285,
+      "loss": 8.5692,
+      "step": 124600
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.0039450696682130065,
+      "loss": 8.5704,
+      "step": 124800
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.003954775882528979,
+      "loss": 8.5663,
+      "step": 125000
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.003964487559043562,
+      "loss": 8.5697,
+      "step": 125200
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.003974204655156306,
+      "loss": 8.5784,
+      "step": 125400
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.003983927128242989,
+      "loss": 8.566,
+      "step": 125600
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.003993654935655802,
+      "loss": 8.5501,
+      "step": 125800
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.004003388034723539,
+      "loss": 8.5701,
+      "step": 126000
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.004013077678025505,
+      "loss": 8.563,
+      "step": 126200
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.00402282120637189,
+      "loss": 8.5541,
+      "step": 126400
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.004032569898434814,
+      "loss": 8.5581,
+      "step": 126600
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 0.004042323711451458,
+      "loss": 8.5513,
+      "step": 126800
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004052082602636542,
+      "loss": 8.5474,
+      "step": 127000
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004061846529182508,
+      "loss": 8.5427,
+      "step": 127200
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004071615448259712,
+      "loss": 8.5414,
+      "step": 127400
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.00408138931701661,
+      "loss": 8.5516,
+      "step": 127600
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004091168092579948,
+      "loss": 8.5422,
+      "step": 127800
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004100951732054943,
+      "loss": 8.5457,
+      "step": 128000
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004110740192525482,
+      "loss": 8.5488,
+      "step": 128200
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.0041205334310543025,
+      "loss": 8.5424,
+      "step": 128400
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004130331404683179,
+      "loss": 8.5408,
+      "step": 128600
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004140134070433124,
+      "loss": 8.5473,
+      "step": 128800
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.004149892337236666,
+      "loss": 8.5442,
+      "step": 129000
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 0.004159704235286162,
+      "loss": 8.5338,
+      "step": 129200
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 0.004169520696612262,
+      "loss": 8.5434,
+      "step": 129400
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 0.004179341678154871,
+      "loss": 8.5343,
+      "step": 129600
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 0.0041891671368340785,
+      "loss": 8.5373,
+      "step": 129800
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 0.0041989970295503234,
+      "loss": 8.5275,
+      "step": 130000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 5,
+  "total_flos": 2.0719789768704e+17,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c33b030231937c49711cbb55890f65b86f81a68638a6a5c30e4f67ed0b41b6f
 size 3375

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f266c72fbc4bc9e80a8144b40b5365bf1402777bd57f354fdb5cde802d5943e
 size 3375

merges.txt CHANGED Viewed

File without changes

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cc943ae46672312ee4175b7b0df7b2bcb16bb1598452afd869122102f93e701
 size 146774203

 version https://git-lfs.github.com/spec/v1
+oid sha256:a97d7b725676a32c62a89d7830c299ebb9d3dfbfb1d9ac8f927a0fd779341bb2
 size 146774203

special_tokens_map.json CHANGED Viewed

File without changes

tokenizer.json CHANGED Viewed

File without changes

tokenizer_config.json CHANGED Viewed

File without changes

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c33b030231937c49711cbb55890f65b86f81a68638a6a5c30e4f67ed0b41b6f
 size 3375

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f266c72fbc4bc9e80a8144b40b5365bf1402777bd57f354fdb5cde802d5943e
 size 3375

vocab.json CHANGED Viewed

File without changes