Training in progress, step 1700, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 159967880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0930d6e64875820c22c7cffca09ca1acaf6cdcc1e7968d0c5a856968a87824e8
|
3 |
size 159967880
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 81735892
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b9d593632762fa95cd51142f1adb4c560e59f1f9d92ecb7de05d6485c887205
|
3 |
size 81735892
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b283d316b0c499174401fc8457651f1fb183c6003c46a4d25e29dfecd151147
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a6797f0f81e1d80bc4d2d6295ad3c421b4b433370ca9e0c209b11267f3ef64f
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bd17fa23f67ef7fbf6e377f7e0c23474bf385755bb96f63949a2752039f1de4
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:212929e3bfad92319ef54b8b509922f96991c6c7d7791e9983b6f58b96c35aff
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671918de7ffc87d6187292033f79bb1cacaa6a7d5996a986d5989df4cdad43d1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11207,6 +11207,706 @@
|
|
11207 |
"learning_rate": 6.759906759906761e-05,
|
11208 |
"loss": 0.0034,
|
11209 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11210 |
}
|
11211 |
],
|
11212 |
"logging_steps": 1,
|
@@ -11226,7 +11926,7 @@
|
|
11226 |
"attributes": {}
|
11227 |
}
|
11228 |
},
|
11229 |
-
"total_flos":
|
11230 |
"train_batch_size": 4,
|
11231 |
"trial_name": null,
|
11232 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9906759906759907,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11207 |
"learning_rate": 6.759906759906761e-05,
|
11208 |
"loss": 0.0034,
|
11209 |
"step": 1600
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.932983682983683,
|
11213 |
+
"grad_norm": 0.0013091769069433212,
|
11214 |
+
"learning_rate": 6.701631701631703e-05,
|
11215 |
+
"loss": 0.0038,
|
11216 |
+
"step": 1601
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.9335664335664335,
|
11220 |
+
"grad_norm": 0.0009162210044451058,
|
11221 |
+
"learning_rate": 6.643356643356644e-05,
|
11222 |
+
"loss": 0.0028,
|
11223 |
+
"step": 1602
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.9341491841491841,
|
11227 |
+
"grad_norm": 0.0008717044838704169,
|
11228 |
+
"learning_rate": 6.585081585081586e-05,
|
11229 |
+
"loss": 0.003,
|
11230 |
+
"step": 1603
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.9347319347319347,
|
11234 |
+
"grad_norm": 0.0010865787044167519,
|
11235 |
+
"learning_rate": 6.526806526806527e-05,
|
11236 |
+
"loss": 0.0035,
|
11237 |
+
"step": 1604
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.9353146853146853,
|
11241 |
+
"grad_norm": 0.0007628489984199405,
|
11242 |
+
"learning_rate": 6.46853146853147e-05,
|
11243 |
+
"loss": 0.0025,
|
11244 |
+
"step": 1605
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.9358974358974359,
|
11248 |
+
"grad_norm": 0.0009772854391485453,
|
11249 |
+
"learning_rate": 6.41025641025641e-05,
|
11250 |
+
"loss": 0.0041,
|
11251 |
+
"step": 1606
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.9364801864801865,
|
11255 |
+
"grad_norm": 0.0008730532717891037,
|
11256 |
+
"learning_rate": 6.351981351981353e-05,
|
11257 |
+
"loss": 0.0028,
|
11258 |
+
"step": 1607
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.9370629370629371,
|
11262 |
+
"grad_norm": 0.0007411614060401917,
|
11263 |
+
"learning_rate": 6.293706293706295e-05,
|
11264 |
+
"loss": 0.0032,
|
11265 |
+
"step": 1608
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.9376456876456877,
|
11269 |
+
"grad_norm": 0.0009279727819375694,
|
11270 |
+
"learning_rate": 6.235431235431236e-05,
|
11271 |
+
"loss": 0.0022,
|
11272 |
+
"step": 1609
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.9382284382284383,
|
11276 |
+
"grad_norm": 0.0010616903891786933,
|
11277 |
+
"learning_rate": 6.177156177156177e-05,
|
11278 |
+
"loss": 0.0044,
|
11279 |
+
"step": 1610
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.9388111888111889,
|
11283 |
+
"grad_norm": 0.0009532080148346722,
|
11284 |
+
"learning_rate": 6.118881118881119e-05,
|
11285 |
+
"loss": 0.0026,
|
11286 |
+
"step": 1611
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.9393939393939394,
|
11290 |
+
"grad_norm": 0.0007426452939398587,
|
11291 |
+
"learning_rate": 6.060606060606061e-05,
|
11292 |
+
"loss": 0.0027,
|
11293 |
+
"step": 1612
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.9399766899766899,
|
11297 |
+
"grad_norm": 0.0010714689269661903,
|
11298 |
+
"learning_rate": 6.002331002331003e-05,
|
11299 |
+
"loss": 0.0041,
|
11300 |
+
"step": 1613
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.9405594405594405,
|
11304 |
+
"grad_norm": 0.0008739576442167163,
|
11305 |
+
"learning_rate": 5.944055944055944e-05,
|
11306 |
+
"loss": 0.0035,
|
11307 |
+
"step": 1614
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.9411421911421911,
|
11311 |
+
"grad_norm": 0.0018457169644534588,
|
11312 |
+
"learning_rate": 5.885780885780886e-05,
|
11313 |
+
"loss": 0.0048,
|
11314 |
+
"step": 1615
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.9417249417249417,
|
11318 |
+
"grad_norm": 0.0008747098036110401,
|
11319 |
+
"learning_rate": 5.8275058275058275e-05,
|
11320 |
+
"loss": 0.003,
|
11321 |
+
"step": 1616
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.9423076923076923,
|
11325 |
+
"grad_norm": 0.001420872751623392,
|
11326 |
+
"learning_rate": 5.76923076923077e-05,
|
11327 |
+
"loss": 0.0048,
|
11328 |
+
"step": 1617
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.9428904428904429,
|
11332 |
+
"grad_norm": 0.0008954692748375237,
|
11333 |
+
"learning_rate": 5.7109557109557114e-05,
|
11334 |
+
"loss": 0.0026,
|
11335 |
+
"step": 1618
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.9434731934731935,
|
11339 |
+
"grad_norm": 0.000952814007177949,
|
11340 |
+
"learning_rate": 5.652680652680653e-05,
|
11341 |
+
"loss": 0.004,
|
11342 |
+
"step": 1619
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.9440559440559441,
|
11346 |
+
"grad_norm": 0.0011160552967339754,
|
11347 |
+
"learning_rate": 5.5944055944055945e-05,
|
11348 |
+
"loss": 0.0034,
|
11349 |
+
"step": 1620
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.9446386946386947,
|
11353 |
+
"grad_norm": 0.0009834656957536936,
|
11354 |
+
"learning_rate": 5.536130536130536e-05,
|
11355 |
+
"loss": 0.0042,
|
11356 |
+
"step": 1621
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.9452214452214452,
|
11360 |
+
"grad_norm": 0.0007495367899537086,
|
11361 |
+
"learning_rate": 5.477855477855478e-05,
|
11362 |
+
"loss": 0.0028,
|
11363 |
+
"step": 1622
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.9458041958041958,
|
11367 |
+
"grad_norm": 0.0011641675373539329,
|
11368 |
+
"learning_rate": 5.419580419580419e-05,
|
11369 |
+
"loss": 0.004,
|
11370 |
+
"step": 1623
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.9463869463869464,
|
11374 |
+
"grad_norm": 0.0013844856293871999,
|
11375 |
+
"learning_rate": 5.3613053613053616e-05,
|
11376 |
+
"loss": 0.014,
|
11377 |
+
"step": 1624
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.946969696969697,
|
11381 |
+
"grad_norm": 0.0008486348669975996,
|
11382 |
+
"learning_rate": 5.303030303030303e-05,
|
11383 |
+
"loss": 0.0029,
|
11384 |
+
"step": 1625
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.9475524475524476,
|
11388 |
+
"grad_norm": 0.001107304822653532,
|
11389 |
+
"learning_rate": 5.244755244755245e-05,
|
11390 |
+
"loss": 0.0028,
|
11391 |
+
"step": 1626
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.9481351981351981,
|
11395 |
+
"grad_norm": 0.001052669482305646,
|
11396 |
+
"learning_rate": 5.1864801864801863e-05,
|
11397 |
+
"loss": 0.0035,
|
11398 |
+
"step": 1627
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.9487179487179487,
|
11402 |
+
"grad_norm": 0.001127295778132975,
|
11403 |
+
"learning_rate": 5.128205128205128e-05,
|
11404 |
+
"loss": 0.0033,
|
11405 |
+
"step": 1628
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.9493006993006993,
|
11409 |
+
"grad_norm": 0.00099327159114182,
|
11410 |
+
"learning_rate": 5.0699300699300695e-05,
|
11411 |
+
"loss": 0.0029,
|
11412 |
+
"step": 1629
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.9498834498834499,
|
11416 |
+
"grad_norm": 0.0008510001935064793,
|
11417 |
+
"learning_rate": 5.011655011655012e-05,
|
11418 |
+
"loss": 0.0031,
|
11419 |
+
"step": 1630
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.9504662004662005,
|
11423 |
+
"grad_norm": 0.0006990230758674443,
|
11424 |
+
"learning_rate": 4.9533799533799534e-05,
|
11425 |
+
"loss": 0.0028,
|
11426 |
+
"step": 1631
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.951048951048951,
|
11430 |
+
"grad_norm": 0.0008159316494129598,
|
11431 |
+
"learning_rate": 4.895104895104895e-05,
|
11432 |
+
"loss": 0.0028,
|
11433 |
+
"step": 1632
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.9516317016317016,
|
11437 |
+
"grad_norm": 0.0008230661042034626,
|
11438 |
+
"learning_rate": 4.836829836829837e-05,
|
11439 |
+
"loss": 0.0033,
|
11440 |
+
"step": 1633
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.9522144522144522,
|
11444 |
+
"grad_norm": 0.0009854782838374376,
|
11445 |
+
"learning_rate": 4.778554778554779e-05,
|
11446 |
+
"loss": 0.0034,
|
11447 |
+
"step": 1634
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.9527972027972028,
|
11451 |
+
"grad_norm": 0.0010709573980420828,
|
11452 |
+
"learning_rate": 4.7202797202797204e-05,
|
11453 |
+
"loss": 0.0029,
|
11454 |
+
"step": 1635
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.9533799533799534,
|
11458 |
+
"grad_norm": 0.0010027334792539477,
|
11459 |
+
"learning_rate": 4.662004662004663e-05,
|
11460 |
+
"loss": 0.0033,
|
11461 |
+
"step": 1636
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.953962703962704,
|
11465 |
+
"grad_norm": 0.0009396614041179419,
|
11466 |
+
"learning_rate": 4.603729603729604e-05,
|
11467 |
+
"loss": 0.0088,
|
11468 |
+
"step": 1637
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.9545454545454546,
|
11472 |
+
"grad_norm": 0.0007274977397173643,
|
11473 |
+
"learning_rate": 4.545454545454546e-05,
|
11474 |
+
"loss": 0.0032,
|
11475 |
+
"step": 1638
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.9551282051282052,
|
11479 |
+
"grad_norm": 0.0010892600985243917,
|
11480 |
+
"learning_rate": 4.4871794871794874e-05,
|
11481 |
+
"loss": 0.0042,
|
11482 |
+
"step": 1639
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.9557109557109557,
|
11486 |
+
"grad_norm": 0.0007885160739533603,
|
11487 |
+
"learning_rate": 4.428904428904429e-05,
|
11488 |
+
"loss": 0.0031,
|
11489 |
+
"step": 1640
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.9562937062937062,
|
11493 |
+
"grad_norm": 0.0012906527845188975,
|
11494 |
+
"learning_rate": 4.3706293706293706e-05,
|
11495 |
+
"loss": 0.0044,
|
11496 |
+
"step": 1641
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.9568764568764568,
|
11500 |
+
"grad_norm": 0.0012814976507797837,
|
11501 |
+
"learning_rate": 4.312354312354312e-05,
|
11502 |
+
"loss": 0.0038,
|
11503 |
+
"step": 1642
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.9574592074592074,
|
11507 |
+
"grad_norm": 0.0011575610842555761,
|
11508 |
+
"learning_rate": 4.2540792540792545e-05,
|
11509 |
+
"loss": 0.004,
|
11510 |
+
"step": 1643
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.958041958041958,
|
11514 |
+
"grad_norm": 0.0012061079032719135,
|
11515 |
+
"learning_rate": 4.195804195804196e-05,
|
11516 |
+
"loss": 0.0043,
|
11517 |
+
"step": 1644
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.9586247086247086,
|
11521 |
+
"grad_norm": 0.0008972581708803773,
|
11522 |
+
"learning_rate": 4.1375291375291377e-05,
|
11523 |
+
"loss": 0.0038,
|
11524 |
+
"step": 1645
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.9592074592074592,
|
11528 |
+
"grad_norm": 0.0008104901062324643,
|
11529 |
+
"learning_rate": 4.079254079254079e-05,
|
11530 |
+
"loss": 0.0028,
|
11531 |
+
"step": 1646
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.9597902097902098,
|
11535 |
+
"grad_norm": 0.0007863112259656191,
|
11536 |
+
"learning_rate": 4.020979020979021e-05,
|
11537 |
+
"loss": 0.003,
|
11538 |
+
"step": 1647
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.9603729603729604,
|
11542 |
+
"grad_norm": 0.0006893305107951164,
|
11543 |
+
"learning_rate": 3.9627039627039624e-05,
|
11544 |
+
"loss": 0.0026,
|
11545 |
+
"step": 1648
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.960955710955711,
|
11549 |
+
"grad_norm": 0.0009396909736096859,
|
11550 |
+
"learning_rate": 3.904428904428905e-05,
|
11551 |
+
"loss": 0.0029,
|
11552 |
+
"step": 1649
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.9615384615384616,
|
11556 |
+
"grad_norm": 0.0007228578324429691,
|
11557 |
+
"learning_rate": 3.846153846153846e-05,
|
11558 |
+
"loss": 0.0023,
|
11559 |
+
"step": 1650
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 0.9621212121212122,
|
11563 |
+
"grad_norm": 0.0011825780384242535,
|
11564 |
+
"learning_rate": 3.787878787878788e-05,
|
11565 |
+
"loss": 0.0044,
|
11566 |
+
"step": 1651
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 0.9627039627039627,
|
11570 |
+
"grad_norm": 0.0007647788152098656,
|
11571 |
+
"learning_rate": 3.7296037296037295e-05,
|
11572 |
+
"loss": 0.003,
|
11573 |
+
"step": 1652
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 0.9632867132867133,
|
11577 |
+
"grad_norm": 0.0010042464127764106,
|
11578 |
+
"learning_rate": 3.671328671328671e-05,
|
11579 |
+
"loss": 0.0041,
|
11580 |
+
"step": 1653
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 0.9638694638694638,
|
11584 |
+
"grad_norm": 0.0009029952925629914,
|
11585 |
+
"learning_rate": 3.6130536130536126e-05,
|
11586 |
+
"loss": 0.0032,
|
11587 |
+
"step": 1654
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 0.9644522144522144,
|
11591 |
+
"grad_norm": 0.0010356158018112183,
|
11592 |
+
"learning_rate": 3.554778554778554e-05,
|
11593 |
+
"loss": 0.0031,
|
11594 |
+
"step": 1655
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 0.965034965034965,
|
11598 |
+
"grad_norm": 0.0010094497120007873,
|
11599 |
+
"learning_rate": 3.4965034965034965e-05,
|
11600 |
+
"loss": 0.0043,
|
11601 |
+
"step": 1656
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 0.9656177156177156,
|
11605 |
+
"grad_norm": 0.0008370497962459922,
|
11606 |
+
"learning_rate": 3.438228438228439e-05,
|
11607 |
+
"loss": 0.0035,
|
11608 |
+
"step": 1657
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 0.9662004662004662,
|
11612 |
+
"grad_norm": 0.000803111121058464,
|
11613 |
+
"learning_rate": 3.3799533799533804e-05,
|
11614 |
+
"loss": 0.0029,
|
11615 |
+
"step": 1658
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 0.9667832167832168,
|
11619 |
+
"grad_norm": 0.0011143162846565247,
|
11620 |
+
"learning_rate": 3.321678321678322e-05,
|
11621 |
+
"loss": 0.0035,
|
11622 |
+
"step": 1659
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 0.9673659673659674,
|
11626 |
+
"grad_norm": 0.0008031773613765836,
|
11627 |
+
"learning_rate": 3.2634032634032635e-05,
|
11628 |
+
"loss": 0.0031,
|
11629 |
+
"step": 1660
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 0.967948717948718,
|
11633 |
+
"grad_norm": 0.0011737227905541658,
|
11634 |
+
"learning_rate": 3.205128205128205e-05,
|
11635 |
+
"loss": 0.004,
|
11636 |
+
"step": 1661
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 0.9685314685314685,
|
11640 |
+
"grad_norm": 0.001068048644810915,
|
11641 |
+
"learning_rate": 3.1468531468531474e-05,
|
11642 |
+
"loss": 0.0038,
|
11643 |
+
"step": 1662
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 0.9691142191142191,
|
11647 |
+
"grad_norm": 0.0008520625997334719,
|
11648 |
+
"learning_rate": 3.088578088578088e-05,
|
11649 |
+
"loss": 0.0032,
|
11650 |
+
"step": 1663
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 0.9696969696969697,
|
11654 |
+
"grad_norm": 0.0006241014925763011,
|
11655 |
+
"learning_rate": 3.0303030303030306e-05,
|
11656 |
+
"loss": 0.0023,
|
11657 |
+
"step": 1664
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 0.9702797202797203,
|
11661 |
+
"grad_norm": 0.0013019571779295802,
|
11662 |
+
"learning_rate": 2.972027972027972e-05,
|
11663 |
+
"loss": 0.0048,
|
11664 |
+
"step": 1665
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 0.9708624708624709,
|
11668 |
+
"grad_norm": 0.0011066205333918333,
|
11669 |
+
"learning_rate": 2.9137529137529138e-05,
|
11670 |
+
"loss": 0.0031,
|
11671 |
+
"step": 1666
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 0.9714452214452215,
|
11675 |
+
"grad_norm": 0.0010444342624396086,
|
11676 |
+
"learning_rate": 2.8554778554778557e-05,
|
11677 |
+
"loss": 0.0033,
|
11678 |
+
"step": 1667
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 0.972027972027972,
|
11682 |
+
"grad_norm": 0.0007983744144439697,
|
11683 |
+
"learning_rate": 2.7972027972027973e-05,
|
11684 |
+
"loss": 0.0032,
|
11685 |
+
"step": 1668
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 0.9726107226107226,
|
11689 |
+
"grad_norm": 0.0008884937269613147,
|
11690 |
+
"learning_rate": 2.738927738927739e-05,
|
11691 |
+
"loss": 0.0037,
|
11692 |
+
"step": 1669
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 0.9731934731934732,
|
11696 |
+
"grad_norm": 0.0012803805293515325,
|
11697 |
+
"learning_rate": 2.6806526806526808e-05,
|
11698 |
+
"loss": 0.0034,
|
11699 |
+
"step": 1670
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 0.9737762237762237,
|
11703 |
+
"grad_norm": 0.0011096763191744685,
|
11704 |
+
"learning_rate": 2.6223776223776224e-05,
|
11705 |
+
"loss": 0.0035,
|
11706 |
+
"step": 1671
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 0.9743589743589743,
|
11710 |
+
"grad_norm": 0.0011874607298523188,
|
11711 |
+
"learning_rate": 2.564102564102564e-05,
|
11712 |
+
"loss": 0.0043,
|
11713 |
+
"step": 1672
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 0.9749417249417249,
|
11717 |
+
"grad_norm": 0.0009078698931261897,
|
11718 |
+
"learning_rate": 2.505827505827506e-05,
|
11719 |
+
"loss": 0.0027,
|
11720 |
+
"step": 1673
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 0.9755244755244755,
|
11724 |
+
"grad_norm": 0.0009188731200993061,
|
11725 |
+
"learning_rate": 2.4475524475524475e-05,
|
11726 |
+
"loss": 0.0046,
|
11727 |
+
"step": 1674
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 0.9761072261072261,
|
11731 |
+
"grad_norm": 0.0009996923618018627,
|
11732 |
+
"learning_rate": 2.3892773892773894e-05,
|
11733 |
+
"loss": 0.006,
|
11734 |
+
"step": 1675
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 0.9766899766899767,
|
11738 |
+
"grad_norm": 0.0008249058737419546,
|
11739 |
+
"learning_rate": 2.3310023310023313e-05,
|
11740 |
+
"loss": 0.0033,
|
11741 |
+
"step": 1676
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 0.9772727272727273,
|
11745 |
+
"grad_norm": 0.0008003967232070863,
|
11746 |
+
"learning_rate": 2.272727272727273e-05,
|
11747 |
+
"loss": 0.003,
|
11748 |
+
"step": 1677
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 0.9778554778554779,
|
11752 |
+
"grad_norm": 0.0014063924318179488,
|
11753 |
+
"learning_rate": 2.2144522144522145e-05,
|
11754 |
+
"loss": 0.0036,
|
11755 |
+
"step": 1678
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 0.9784382284382285,
|
11759 |
+
"grad_norm": 0.0008004964329302311,
|
11760 |
+
"learning_rate": 2.156177156177156e-05,
|
11761 |
+
"loss": 0.0028,
|
11762 |
+
"step": 1679
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 0.9790209790209791,
|
11766 |
+
"grad_norm": 0.0009626666433177888,
|
11767 |
+
"learning_rate": 2.097902097902098e-05,
|
11768 |
+
"loss": 0.0027,
|
11769 |
+
"step": 1680
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 0.9796037296037297,
|
11773 |
+
"grad_norm": 0.0008628361392766237,
|
11774 |
+
"learning_rate": 2.0396270396270396e-05,
|
11775 |
+
"loss": 0.0027,
|
11776 |
+
"step": 1681
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 0.9801864801864801,
|
11780 |
+
"grad_norm": 0.0008874722989276052,
|
11781 |
+
"learning_rate": 1.9813519813519812e-05,
|
11782 |
+
"loss": 0.0042,
|
11783 |
+
"step": 1682
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 0.9807692307692307,
|
11787 |
+
"grad_norm": 0.0015157037414610386,
|
11788 |
+
"learning_rate": 1.923076923076923e-05,
|
11789 |
+
"loss": 0.0052,
|
11790 |
+
"step": 1683
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 0.9813519813519813,
|
11794 |
+
"grad_norm": 0.0011951492633670568,
|
11795 |
+
"learning_rate": 1.8648018648018647e-05,
|
11796 |
+
"loss": 0.0049,
|
11797 |
+
"step": 1684
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 0.9819347319347319,
|
11801 |
+
"grad_norm": 0.0009483549511060119,
|
11802 |
+
"learning_rate": 1.8065268065268063e-05,
|
11803 |
+
"loss": 0.0026,
|
11804 |
+
"step": 1685
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 0.9825174825174825,
|
11808 |
+
"grad_norm": 0.000737398280762136,
|
11809 |
+
"learning_rate": 1.7482517482517483e-05,
|
11810 |
+
"loss": 0.0027,
|
11811 |
+
"step": 1686
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 0.9831002331002331,
|
11815 |
+
"grad_norm": 0.001031695050187409,
|
11816 |
+
"learning_rate": 1.6899766899766902e-05,
|
11817 |
+
"loss": 0.0025,
|
11818 |
+
"step": 1687
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 0.9836829836829837,
|
11822 |
+
"grad_norm": 0.0009539015591144562,
|
11823 |
+
"learning_rate": 1.6317016317016318e-05,
|
11824 |
+
"loss": 0.0027,
|
11825 |
+
"step": 1688
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 0.9842657342657343,
|
11829 |
+
"grad_norm": 0.0007205713191069663,
|
11830 |
+
"learning_rate": 1.5734265734265737e-05,
|
11831 |
+
"loss": 0.0026,
|
11832 |
+
"step": 1689
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 0.9848484848484849,
|
11836 |
+
"grad_norm": 0.0009316445211879909,
|
11837 |
+
"learning_rate": 1.5151515151515153e-05,
|
11838 |
+
"loss": 0.0032,
|
11839 |
+
"step": 1690
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 0.9854312354312355,
|
11843 |
+
"grad_norm": 0.0010426414664834738,
|
11844 |
+
"learning_rate": 1.4568764568764569e-05,
|
11845 |
+
"loss": 0.0041,
|
11846 |
+
"step": 1691
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 0.986013986013986,
|
11850 |
+
"grad_norm": 0.0006400300771929324,
|
11851 |
+
"learning_rate": 1.3986013986013986e-05,
|
11852 |
+
"loss": 0.0024,
|
11853 |
+
"step": 1692
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 0.9865967365967366,
|
11857 |
+
"grad_norm": 0.0009383864235132933,
|
11858 |
+
"learning_rate": 1.3403263403263404e-05,
|
11859 |
+
"loss": 0.0029,
|
11860 |
+
"step": 1693
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 0.9871794871794872,
|
11864 |
+
"grad_norm": 0.0008284033392556012,
|
11865 |
+
"learning_rate": 1.282051282051282e-05,
|
11866 |
+
"loss": 0.0029,
|
11867 |
+
"step": 1694
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 0.9877622377622378,
|
11871 |
+
"grad_norm": 0.0010577579960227013,
|
11872 |
+
"learning_rate": 1.2237762237762237e-05,
|
11873 |
+
"loss": 0.0052,
|
11874 |
+
"step": 1695
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 0.9883449883449883,
|
11878 |
+
"grad_norm": 0.0009795171208679676,
|
11879 |
+
"learning_rate": 1.1655011655011657e-05,
|
11880 |
+
"loss": 0.0046,
|
11881 |
+
"step": 1696
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 0.9889277389277389,
|
11885 |
+
"grad_norm": 0.0007197922095656395,
|
11886 |
+
"learning_rate": 1.1072261072261073e-05,
|
11887 |
+
"loss": 0.0031,
|
11888 |
+
"step": 1697
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 0.9895104895104895,
|
11892 |
+
"grad_norm": 0.0010326344054192305,
|
11893 |
+
"learning_rate": 1.048951048951049e-05,
|
11894 |
+
"loss": 0.0028,
|
11895 |
+
"step": 1698
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 0.9900932400932401,
|
11899 |
+
"grad_norm": 0.0008666233043186367,
|
11900 |
+
"learning_rate": 9.906759906759906e-06,
|
11901 |
+
"loss": 0.003,
|
11902 |
+
"step": 1699
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 0.9906759906759907,
|
11906 |
+
"grad_norm": 0.0008774434681981802,
|
11907 |
+
"learning_rate": 9.324009324009324e-06,
|
11908 |
+
"loss": 0.003,
|
11909 |
+
"step": 1700
|
11910 |
}
|
11911 |
],
|
11912 |
"logging_steps": 1,
|
|
|
11926 |
"attributes": {}
|
11927 |
}
|
11928 |
},
|
11929 |
+
"total_flos": 7.109695330572042e+19,
|
11930 |
"train_batch_size": 4,
|
11931 |
"trial_name": null,
|
11932 |
"trial_params": null
|