Training in progress, step 55000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 487156538
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:436bf79533e258070c96b4760436afa1f9251b1590c7ae2a2f60dc7519e9b64b
|
| 3 |
size 487156538
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1059459406
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab2a7667c52f9cc64e61b137dc5df66439292fcc32acda2e7782c8372f9c8172
|
| 3 |
size 1059459406
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61a042d8f729c4f51ba538ba4c747cf1d8cbb1b59cf032f3422995a579b49f8a
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63b64a2edbbd5cf896abcb8f817b204e5a511d27c7efe13e0a92b23dc6a3b777
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0540aa39d91dc61087d3dd380d7b7750dc1d19afff10c530b1d0895a416cf32f
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47442bd8ad617950fc9791e10321850b084b057926e59d24f6b5e09aefa3043b
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1887d8c6d2dc250cfb0b7c57e61e4fa0abc40fda0dbe8977a6841b90daceb70
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -18908,6 +18908,356 @@
|
|
| 18908 |
"learning_rate": 0.0004867885958015101,
|
| 18909 |
"loss": 16.9579,
|
| 18910 |
"step": 54000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18911 |
}
|
| 18912 |
],
|
| 18913 |
"logging_steps": 20,
|
|
@@ -18927,7 +19277,7 @@
|
|
| 18927 |
"attributes": {}
|
| 18928 |
}
|
| 18929 |
},
|
| 18930 |
-
"total_flos": 1.
|
| 18931 |
"train_batch_size": 48,
|
| 18932 |
"trial_name": null,
|
| 18933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.08147230830306514,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 55000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 18908 |
"learning_rate": 0.0004867885958015101,
|
| 18909 |
"loss": 16.9579,
|
| 18910 |
"step": 54000
|
| 18911 |
+
},
|
| 18912 |
+
{
|
| 18913 |
+
"epoch": 0.08002061990057416,
|
| 18914 |
+
"grad_norm": 8.375,
|
| 18915 |
+
"learning_rate": 0.00048678365686650256,
|
| 18916 |
+
"loss": 16.8883,
|
| 18917 |
+
"step": 54020
|
| 18918 |
+
},
|
| 18919 |
+
{
|
| 18920 |
+
"epoch": 0.08005024619450254,
|
| 18921 |
+
"grad_norm": 6.28125,
|
| 18922 |
+
"learning_rate": 0.000486778717931495,
|
| 18923 |
+
"loss": 16.9452,
|
| 18924 |
+
"step": 54040
|
| 18925 |
+
},
|
| 18926 |
+
{
|
| 18927 |
+
"epoch": 0.08007987248843093,
|
| 18928 |
+
"grad_norm": 8.25,
|
| 18929 |
+
"learning_rate": 0.00048677377899648746,
|
| 18930 |
+
"loss": 16.8946,
|
| 18931 |
+
"step": 54060
|
| 18932 |
+
},
|
| 18933 |
+
{
|
| 18934 |
+
"epoch": 0.08010949878235932,
|
| 18935 |
+
"grad_norm": 8.5,
|
| 18936 |
+
"learning_rate": 0.00048676884006147985,
|
| 18937 |
+
"loss": 16.9801,
|
| 18938 |
+
"step": 54080
|
| 18939 |
+
},
|
| 18940 |
+
{
|
| 18941 |
+
"epoch": 0.0801391250762877,
|
| 18942 |
+
"grad_norm": 6.15625,
|
| 18943 |
+
"learning_rate": 0.0004867639011264723,
|
| 18944 |
+
"loss": 16.9925,
|
| 18945 |
+
"step": 54100
|
| 18946 |
+
},
|
| 18947 |
+
{
|
| 18948 |
+
"epoch": 0.08016875137021609,
|
| 18949 |
+
"grad_norm": 6.6875,
|
| 18950 |
+
"learning_rate": 0.00048675896219146475,
|
| 18951 |
+
"loss": 16.9666,
|
| 18952 |
+
"step": 54120
|
| 18953 |
+
},
|
| 18954 |
+
{
|
| 18955 |
+
"epoch": 0.08019837766414448,
|
| 18956 |
+
"grad_norm": 7.28125,
|
| 18957 |
+
"learning_rate": 0.0004867540232564572,
|
| 18958 |
+
"loss": 16.9161,
|
| 18959 |
+
"step": 54140
|
| 18960 |
+
},
|
| 18961 |
+
{
|
| 18962 |
+
"epoch": 0.08022800395807286,
|
| 18963 |
+
"grad_norm": 7.78125,
|
| 18964 |
+
"learning_rate": 0.0004867490843214496,
|
| 18965 |
+
"loss": 17.0152,
|
| 18966 |
+
"step": 54160
|
| 18967 |
+
},
|
| 18968 |
+
{
|
| 18969 |
+
"epoch": 0.08025763025200125,
|
| 18970 |
+
"grad_norm": 7.40625,
|
| 18971 |
+
"learning_rate": 0.00048674414538644204,
|
| 18972 |
+
"loss": 16.9116,
|
| 18973 |
+
"step": 54180
|
| 18974 |
+
},
|
| 18975 |
+
{
|
| 18976 |
+
"epoch": 0.08028725654592965,
|
| 18977 |
+
"grad_norm": 7.15625,
|
| 18978 |
+
"learning_rate": 0.0004867392064514345,
|
| 18979 |
+
"loss": 16.9337,
|
| 18980 |
+
"step": 54200
|
| 18981 |
+
},
|
| 18982 |
+
{
|
| 18983 |
+
"epoch": 0.08031688283985804,
|
| 18984 |
+
"grad_norm": 8.375,
|
| 18985 |
+
"learning_rate": 0.00048673426751642693,
|
| 18986 |
+
"loss": 16.9931,
|
| 18987 |
+
"step": 54220
|
| 18988 |
+
},
|
| 18989 |
+
{
|
| 18990 |
+
"epoch": 0.08034650913378642,
|
| 18991 |
+
"grad_norm": 7.3125,
|
| 18992 |
+
"learning_rate": 0.0004867293285814193,
|
| 18993 |
+
"loss": 17.0016,
|
| 18994 |
+
"step": 54240
|
| 18995 |
+
},
|
| 18996 |
+
{
|
| 18997 |
+
"epoch": 0.08037613542771481,
|
| 18998 |
+
"grad_norm": 11.3125,
|
| 18999 |
+
"learning_rate": 0.0004867243896464118,
|
| 19000 |
+
"loss": 16.9347,
|
| 19001 |
+
"step": 54260
|
| 19002 |
+
},
|
| 19003 |
+
{
|
| 19004 |
+
"epoch": 0.0804057617216432,
|
| 19005 |
+
"grad_norm": 9.875,
|
| 19006 |
+
"learning_rate": 0.0004867194507114042,
|
| 19007 |
+
"loss": 16.9192,
|
| 19008 |
+
"step": 54280
|
| 19009 |
+
},
|
| 19010 |
+
{
|
| 19011 |
+
"epoch": 0.08043538801557158,
|
| 19012 |
+
"grad_norm": 8.6875,
|
| 19013 |
+
"learning_rate": 0.0004867145117763966,
|
| 19014 |
+
"loss": 16.8667,
|
| 19015 |
+
"step": 54300
|
| 19016 |
+
},
|
| 19017 |
+
{
|
| 19018 |
+
"epoch": 0.08046501430949997,
|
| 19019 |
+
"grad_norm": 11.875,
|
| 19020 |
+
"learning_rate": 0.00048670957284138906,
|
| 19021 |
+
"loss": 16.9558,
|
| 19022 |
+
"step": 54320
|
| 19023 |
+
},
|
| 19024 |
+
{
|
| 19025 |
+
"epoch": 0.08049464060342836,
|
| 19026 |
+
"grad_norm": 8.6875,
|
| 19027 |
+
"learning_rate": 0.0004867046339063815,
|
| 19028 |
+
"loss": 17.0024,
|
| 19029 |
+
"step": 54340
|
| 19030 |
+
},
|
| 19031 |
+
{
|
| 19032 |
+
"epoch": 0.08052426689735674,
|
| 19033 |
+
"grad_norm": 6.625,
|
| 19034 |
+
"learning_rate": 0.00048669969497137396,
|
| 19035 |
+
"loss": 16.9562,
|
| 19036 |
+
"step": 54360
|
| 19037 |
+
},
|
| 19038 |
+
{
|
| 19039 |
+
"epoch": 0.08055389319128513,
|
| 19040 |
+
"grad_norm": 6.9375,
|
| 19041 |
+
"learning_rate": 0.00048669475603636635,
|
| 19042 |
+
"loss": 16.9164,
|
| 19043 |
+
"step": 54380
|
| 19044 |
+
},
|
| 19045 |
+
{
|
| 19046 |
+
"epoch": 0.08058351948521351,
|
| 19047 |
+
"grad_norm": 9.625,
|
| 19048 |
+
"learning_rate": 0.0004866898171013588,
|
| 19049 |
+
"loss": 16.9818,
|
| 19050 |
+
"step": 54400
|
| 19051 |
+
},
|
| 19052 |
+
{
|
| 19053 |
+
"epoch": 0.0806131457791419,
|
| 19054 |
+
"grad_norm": 6.875,
|
| 19055 |
+
"learning_rate": 0.00048668487816635125,
|
| 19056 |
+
"loss": 16.9353,
|
| 19057 |
+
"step": 54420
|
| 19058 |
+
},
|
| 19059 |
+
{
|
| 19060 |
+
"epoch": 0.08064277207307029,
|
| 19061 |
+
"grad_norm": 9.25,
|
| 19062 |
+
"learning_rate": 0.0004866799392313437,
|
| 19063 |
+
"loss": 16.9368,
|
| 19064 |
+
"step": 54440
|
| 19065 |
+
},
|
| 19066 |
+
{
|
| 19067 |
+
"epoch": 0.08067239836699867,
|
| 19068 |
+
"grad_norm": 8.0625,
|
| 19069 |
+
"learning_rate": 0.0004866750002963361,
|
| 19070 |
+
"loss": 16.9073,
|
| 19071 |
+
"step": 54460
|
| 19072 |
+
},
|
| 19073 |
+
{
|
| 19074 |
+
"epoch": 0.08070202466092706,
|
| 19075 |
+
"grad_norm": 6.71875,
|
| 19076 |
+
"learning_rate": 0.00048667006136132854,
|
| 19077 |
+
"loss": 16.9283,
|
| 19078 |
+
"step": 54480
|
| 19079 |
+
},
|
| 19080 |
+
{
|
| 19081 |
+
"epoch": 0.08073165095485545,
|
| 19082 |
+
"grad_norm": 6.84375,
|
| 19083 |
+
"learning_rate": 0.000486665122426321,
|
| 19084 |
+
"loss": 16.9585,
|
| 19085 |
+
"step": 54500
|
| 19086 |
+
},
|
| 19087 |
+
{
|
| 19088 |
+
"epoch": 0.08076127724878385,
|
| 19089 |
+
"grad_norm": 6.84375,
|
| 19090 |
+
"learning_rate": 0.00048666018349131343,
|
| 19091 |
+
"loss": 16.9718,
|
| 19092 |
+
"step": 54520
|
| 19093 |
+
},
|
| 19094 |
+
{
|
| 19095 |
+
"epoch": 0.08079090354271223,
|
| 19096 |
+
"grad_norm": 6.6875,
|
| 19097 |
+
"learning_rate": 0.0004866552445563058,
|
| 19098 |
+
"loss": 16.974,
|
| 19099 |
+
"step": 54540
|
| 19100 |
+
},
|
| 19101 |
+
{
|
| 19102 |
+
"epoch": 0.08082052983664062,
|
| 19103 |
+
"grad_norm": 7.25,
|
| 19104 |
+
"learning_rate": 0.0004866503056212983,
|
| 19105 |
+
"loss": 16.9887,
|
| 19106 |
+
"step": 54560
|
| 19107 |
+
},
|
| 19108 |
+
{
|
| 19109 |
+
"epoch": 0.080850156130569,
|
| 19110 |
+
"grad_norm": 8.375,
|
| 19111 |
+
"learning_rate": 0.0004866453666862907,
|
| 19112 |
+
"loss": 17.0524,
|
| 19113 |
+
"step": 54580
|
| 19114 |
+
},
|
| 19115 |
+
{
|
| 19116 |
+
"epoch": 0.08087978242449739,
|
| 19117 |
+
"grad_norm": 7.375,
|
| 19118 |
+
"learning_rate": 0.00048664042775128317,
|
| 19119 |
+
"loss": 16.9761,
|
| 19120 |
+
"step": 54600
|
| 19121 |
+
},
|
| 19122 |
+
{
|
| 19123 |
+
"epoch": 0.08090940871842578,
|
| 19124 |
+
"grad_norm": 7.0625,
|
| 19125 |
+
"learning_rate": 0.00048663548881627556,
|
| 19126 |
+
"loss": 16.8978,
|
| 19127 |
+
"step": 54620
|
| 19128 |
+
},
|
| 19129 |
+
{
|
| 19130 |
+
"epoch": 0.08093903501235417,
|
| 19131 |
+
"grad_norm": 7.40625,
|
| 19132 |
+
"learning_rate": 0.000486630549881268,
|
| 19133 |
+
"loss": 16.9383,
|
| 19134 |
+
"step": 54640
|
| 19135 |
+
},
|
| 19136 |
+
{
|
| 19137 |
+
"epoch": 0.08096866130628255,
|
| 19138 |
+
"grad_norm": 7.1875,
|
| 19139 |
+
"learning_rate": 0.00048662561094626046,
|
| 19140 |
+
"loss": 16.9231,
|
| 19141 |
+
"step": 54660
|
| 19142 |
+
},
|
| 19143 |
+
{
|
| 19144 |
+
"epoch": 0.08099828760021094,
|
| 19145 |
+
"grad_norm": 7.90625,
|
| 19146 |
+
"learning_rate": 0.00048662067201125285,
|
| 19147 |
+
"loss": 16.9352,
|
| 19148 |
+
"step": 54680
|
| 19149 |
+
},
|
| 19150 |
+
{
|
| 19151 |
+
"epoch": 0.08102791389413933,
|
| 19152 |
+
"grad_norm": 11.6875,
|
| 19153 |
+
"learning_rate": 0.0004866157330762453,
|
| 19154 |
+
"loss": 16.9842,
|
| 19155 |
+
"step": 54700
|
| 19156 |
+
},
|
| 19157 |
+
{
|
| 19158 |
+
"epoch": 0.08105754018806771,
|
| 19159 |
+
"grad_norm": 6.625,
|
| 19160 |
+
"learning_rate": 0.00048661079414123775,
|
| 19161 |
+
"loss": 16.9285,
|
| 19162 |
+
"step": 54720
|
| 19163 |
+
},
|
| 19164 |
+
{
|
| 19165 |
+
"epoch": 0.0810871664819961,
|
| 19166 |
+
"grad_norm": 6.28125,
|
| 19167 |
+
"learning_rate": 0.0004866058552062302,
|
| 19168 |
+
"loss": 16.9533,
|
| 19169 |
+
"step": 54740
|
| 19170 |
+
},
|
| 19171 |
+
{
|
| 19172 |
+
"epoch": 0.08111679277592448,
|
| 19173 |
+
"grad_norm": 10.5,
|
| 19174 |
+
"learning_rate": 0.0004866009162712226,
|
| 19175 |
+
"loss": 16.9167,
|
| 19176 |
+
"step": 54760
|
| 19177 |
+
},
|
| 19178 |
+
{
|
| 19179 |
+
"epoch": 0.08114641906985287,
|
| 19180 |
+
"grad_norm": 12.5,
|
| 19181 |
+
"learning_rate": 0.00048659597733621504,
|
| 19182 |
+
"loss": 16.8854,
|
| 19183 |
+
"step": 54780
|
| 19184 |
+
},
|
| 19185 |
+
{
|
| 19186 |
+
"epoch": 0.08117604536378126,
|
| 19187 |
+
"grad_norm": 6.875,
|
| 19188 |
+
"learning_rate": 0.0004865910384012075,
|
| 19189 |
+
"loss": 16.9078,
|
| 19190 |
+
"step": 54800
|
| 19191 |
+
},
|
| 19192 |
+
{
|
| 19193 |
+
"epoch": 0.08120567165770964,
|
| 19194 |
+
"grad_norm": 6.65625,
|
| 19195 |
+
"learning_rate": 0.00048658609946619993,
|
| 19196 |
+
"loss": 16.9198,
|
| 19197 |
+
"step": 54820
|
| 19198 |
+
},
|
| 19199 |
+
{
|
| 19200 |
+
"epoch": 0.08123529795163804,
|
| 19201 |
+
"grad_norm": 7.9375,
|
| 19202 |
+
"learning_rate": 0.0004865811605311923,
|
| 19203 |
+
"loss": 16.9367,
|
| 19204 |
+
"step": 54840
|
| 19205 |
+
},
|
| 19206 |
+
{
|
| 19207 |
+
"epoch": 0.08126492424556643,
|
| 19208 |
+
"grad_norm": 7.1875,
|
| 19209 |
+
"learning_rate": 0.0004865762215961848,
|
| 19210 |
+
"loss": 16.9474,
|
| 19211 |
+
"step": 54860
|
| 19212 |
+
},
|
| 19213 |
+
{
|
| 19214 |
+
"epoch": 0.08129455053949482,
|
| 19215 |
+
"grad_norm": 7.0625,
|
| 19216 |
+
"learning_rate": 0.0004865712826611772,
|
| 19217 |
+
"loss": 16.93,
|
| 19218 |
+
"step": 54880
|
| 19219 |
+
},
|
| 19220 |
+
{
|
| 19221 |
+
"epoch": 0.0813241768334232,
|
| 19222 |
+
"grad_norm": 7.25,
|
| 19223 |
+
"learning_rate": 0.00048656634372616967,
|
| 19224 |
+
"loss": 16.9968,
|
| 19225 |
+
"step": 54900
|
| 19226 |
+
},
|
| 19227 |
+
{
|
| 19228 |
+
"epoch": 0.08135380312735159,
|
| 19229 |
+
"grad_norm": 7.40625,
|
| 19230 |
+
"learning_rate": 0.00048656140479116206,
|
| 19231 |
+
"loss": 16.9388,
|
| 19232 |
+
"step": 54920
|
| 19233 |
+
},
|
| 19234 |
+
{
|
| 19235 |
+
"epoch": 0.08138342942127998,
|
| 19236 |
+
"grad_norm": 6.375,
|
| 19237 |
+
"learning_rate": 0.00048655646585615456,
|
| 19238 |
+
"loss": 16.9977,
|
| 19239 |
+
"step": 54940
|
| 19240 |
+
},
|
| 19241 |
+
{
|
| 19242 |
+
"epoch": 0.08141305571520836,
|
| 19243 |
+
"grad_norm": 8.125,
|
| 19244 |
+
"learning_rate": 0.00048655152692114696,
|
| 19245 |
+
"loss": 16.9571,
|
| 19246 |
+
"step": 54960
|
| 19247 |
+
},
|
| 19248 |
+
{
|
| 19249 |
+
"epoch": 0.08144268200913675,
|
| 19250 |
+
"grad_norm": 7.59375,
|
| 19251 |
+
"learning_rate": 0.00048654658798613935,
|
| 19252 |
+
"loss": 16.9437,
|
| 19253 |
+
"step": 54980
|
| 19254 |
+
},
|
| 19255 |
+
{
|
| 19256 |
+
"epoch": 0.08147230830306514,
|
| 19257 |
+
"grad_norm": 6.34375,
|
| 19258 |
+
"learning_rate": 0.0004865416490511318,
|
| 19259 |
+
"loss": 16.92,
|
| 19260 |
+
"step": 55000
|
| 19261 |
}
|
| 19262 |
],
|
| 19263 |
"logging_steps": 20,
|
|
|
|
| 19277 |
"attributes": {}
|
| 19278 |
}
|
| 19279 |
},
|
| 19280 |
+
"total_flos": 1.224059432458917e+20,
|
| 19281 |
"train_batch_size": 48,
|
| 19282 |
"trial_name": null,
|
| 19283 |
"trial_params": null
|