Initial commit
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- scheduler.pt +1 -1
- trainer_state.json +483 -3
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 655348487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d55c7235936d0242cf392bec1a2ea0817b12aa635dec8779c8a4eacdb4938ed
|
3 |
size 655348487
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 333975623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:492888ed30a69737bf3290cd99f0992bde40cc710fa370b0ccded98a91f294b8
|
3 |
size 333975623
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f51219b2cb4bc18c5893bf94b3f2834354e493062268fb89b16464b4a442a743
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5286,11 +5286,491 @@
|
|
5286 |
"learning_rate": 4.3991797676008206e-05,
|
5287 |
"loss": 1.9818,
|
5288 |
"step": 440000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5289 |
}
|
5290 |
],
|
5291 |
"max_steps": 3658000,
|
5292 |
"num_train_epochs": 1000,
|
5293 |
-
"total_flos":
|
5294 |
"trial_name": null,
|
5295 |
"trial_params": null
|
5296 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 131.21924548933845,
|
5 |
+
"global_step": 480000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5286 |
"learning_rate": 4.3991797676008206e-05,
|
5287 |
"loss": 1.9818,
|
5288 |
"step": 440000
|
5289 |
+
},
|
5290 |
+
{
|
5291 |
+
"epoch": 120.42,
|
5292 |
+
"learning_rate": 4.398496240601504e-05,
|
5293 |
+
"loss": 1.9914,
|
5294 |
+
"step": 440500
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 120.56,
|
5298 |
+
"learning_rate": 4.3978127136021876e-05,
|
5299 |
+
"loss": 1.9949,
|
5300 |
+
"step": 441000
|
5301 |
+
},
|
5302 |
+
{
|
5303 |
+
"epoch": 120.69,
|
5304 |
+
"learning_rate": 4.397129186602871e-05,
|
5305 |
+
"loss": 2.0031,
|
5306 |
+
"step": 441500
|
5307 |
+
},
|
5308 |
+
{
|
5309 |
+
"epoch": 120.83,
|
5310 |
+
"learning_rate": 4.3964456596035545e-05,
|
5311 |
+
"loss": 2.0048,
|
5312 |
+
"step": 442000
|
5313 |
+
},
|
5314 |
+
{
|
5315 |
+
"epoch": 120.97,
|
5316 |
+
"learning_rate": 4.395762132604238e-05,
|
5317 |
+
"loss": 2.018,
|
5318 |
+
"step": 442500
|
5319 |
+
},
|
5320 |
+
{
|
5321 |
+
"epoch": 121.1,
|
5322 |
+
"learning_rate": 4.3950786056049215e-05,
|
5323 |
+
"loss": 1.9803,
|
5324 |
+
"step": 443000
|
5325 |
+
},
|
5326 |
+
{
|
5327 |
+
"epoch": 121.24,
|
5328 |
+
"learning_rate": 4.3943950786056056e-05,
|
5329 |
+
"loss": 1.9769,
|
5330 |
+
"step": 443500
|
5331 |
+
},
|
5332 |
+
{
|
5333 |
+
"epoch": 121.38,
|
5334 |
+
"learning_rate": 4.393711551606289e-05,
|
5335 |
+
"loss": 1.9786,
|
5336 |
+
"step": 444000
|
5337 |
+
},
|
5338 |
+
{
|
5339 |
+
"epoch": 121.51,
|
5340 |
+
"learning_rate": 4.393028024606972e-05,
|
5341 |
+
"loss": 1.99,
|
5342 |
+
"step": 444500
|
5343 |
+
},
|
5344 |
+
{
|
5345 |
+
"epoch": 121.65,
|
5346 |
+
"learning_rate": 4.3923444976076554e-05,
|
5347 |
+
"loss": 1.9929,
|
5348 |
+
"step": 445000
|
5349 |
+
},
|
5350 |
+
{
|
5351 |
+
"epoch": 121.79,
|
5352 |
+
"learning_rate": 4.3916609706083396e-05,
|
5353 |
+
"loss": 2.0034,
|
5354 |
+
"step": 445500
|
5355 |
+
},
|
5356 |
+
{
|
5357 |
+
"epoch": 121.92,
|
5358 |
+
"learning_rate": 4.390977443609023e-05,
|
5359 |
+
"loss": 2.0121,
|
5360 |
+
"step": 446000
|
5361 |
+
},
|
5362 |
+
{
|
5363 |
+
"epoch": 122.06,
|
5364 |
+
"learning_rate": 4.3902939166097065e-05,
|
5365 |
+
"loss": 1.9899,
|
5366 |
+
"step": 446500
|
5367 |
+
},
|
5368 |
+
{
|
5369 |
+
"epoch": 122.2,
|
5370 |
+
"learning_rate": 4.389610389610389e-05,
|
5371 |
+
"loss": 1.9649,
|
5372 |
+
"step": 447000
|
5373 |
+
},
|
5374 |
+
{
|
5375 |
+
"epoch": 122.33,
|
5376 |
+
"learning_rate": 4.3889268626110735e-05,
|
5377 |
+
"loss": 1.9764,
|
5378 |
+
"step": 447500
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 122.47,
|
5382 |
+
"learning_rate": 4.388243335611757e-05,
|
5383 |
+
"loss": 1.9795,
|
5384 |
+
"step": 448000
|
5385 |
+
},
|
5386 |
+
{
|
5387 |
+
"epoch": 122.61,
|
5388 |
+
"learning_rate": 4.3875598086124404e-05,
|
5389 |
+
"loss": 1.9988,
|
5390 |
+
"step": 448500
|
5391 |
+
},
|
5392 |
+
{
|
5393 |
+
"epoch": 122.74,
|
5394 |
+
"learning_rate": 4.386876281613124e-05,
|
5395 |
+
"loss": 1.9975,
|
5396 |
+
"step": 449000
|
5397 |
+
},
|
5398 |
+
{
|
5399 |
+
"epoch": 122.88,
|
5400 |
+
"learning_rate": 4.3861927546138074e-05,
|
5401 |
+
"loss": 2.007,
|
5402 |
+
"step": 449500
|
5403 |
+
},
|
5404 |
+
{
|
5405 |
+
"epoch": 123.02,
|
5406 |
+
"learning_rate": 4.385509227614491e-05,
|
5407 |
+
"loss": 2.0015,
|
5408 |
+
"step": 450000
|
5409 |
+
},
|
5410 |
+
{
|
5411 |
+
"epoch": 123.15,
|
5412 |
+
"learning_rate": 4.3848257006151743e-05,
|
5413 |
+
"loss": 1.9606,
|
5414 |
+
"step": 450500
|
5415 |
+
},
|
5416 |
+
{
|
5417 |
+
"epoch": 123.29,
|
5418 |
+
"learning_rate": 4.384142173615858e-05,
|
5419 |
+
"loss": 1.9646,
|
5420 |
+
"step": 451000
|
5421 |
+
},
|
5422 |
+
{
|
5423 |
+
"epoch": 123.43,
|
5424 |
+
"learning_rate": 4.383458646616542e-05,
|
5425 |
+
"loss": 1.9758,
|
5426 |
+
"step": 451500
|
5427 |
+
},
|
5428 |
+
{
|
5429 |
+
"epoch": 123.56,
|
5430 |
+
"learning_rate": 4.382775119617225e-05,
|
5431 |
+
"loss": 1.9893,
|
5432 |
+
"step": 452000
|
5433 |
+
},
|
5434 |
+
{
|
5435 |
+
"epoch": 123.7,
|
5436 |
+
"learning_rate": 4.382091592617908e-05,
|
5437 |
+
"loss": 1.9943,
|
5438 |
+
"step": 452500
|
5439 |
+
},
|
5440 |
+
{
|
5441 |
+
"epoch": 123.84,
|
5442 |
+
"learning_rate": 4.3814080656185924e-05,
|
5443 |
+
"loss": 1.9997,
|
5444 |
+
"step": 453000
|
5445 |
+
},
|
5446 |
+
{
|
5447 |
+
"epoch": 123.97,
|
5448 |
+
"learning_rate": 4.380724538619276e-05,
|
5449 |
+
"loss": 2.0071,
|
5450 |
+
"step": 453500
|
5451 |
+
},
|
5452 |
+
{
|
5453 |
+
"epoch": 124.11,
|
5454 |
+
"learning_rate": 4.3800410116199594e-05,
|
5455 |
+
"loss": 1.9643,
|
5456 |
+
"step": 454000
|
5457 |
+
},
|
5458 |
+
{
|
5459 |
+
"epoch": 124.25,
|
5460 |
+
"learning_rate": 4.379357484620643e-05,
|
5461 |
+
"loss": 1.9644,
|
5462 |
+
"step": 454500
|
5463 |
+
},
|
5464 |
+
{
|
5465 |
+
"epoch": 124.38,
|
5466 |
+
"learning_rate": 4.378673957621326e-05,
|
5467 |
+
"loss": 1.9718,
|
5468 |
+
"step": 455000
|
5469 |
+
},
|
5470 |
+
{
|
5471 |
+
"epoch": 124.52,
|
5472 |
+
"learning_rate": 4.37799043062201e-05,
|
5473 |
+
"loss": 1.9801,
|
5474 |
+
"step": 455500
|
5475 |
+
},
|
5476 |
+
{
|
5477 |
+
"epoch": 124.66,
|
5478 |
+
"learning_rate": 4.377306903622693e-05,
|
5479 |
+
"loss": 1.9836,
|
5480 |
+
"step": 456000
|
5481 |
+
},
|
5482 |
+
{
|
5483 |
+
"epoch": 124.79,
|
5484 |
+
"learning_rate": 4.376623376623377e-05,
|
5485 |
+
"loss": 1.9961,
|
5486 |
+
"step": 456500
|
5487 |
+
},
|
5488 |
+
{
|
5489 |
+
"epoch": 124.93,
|
5490 |
+
"learning_rate": 4.37593984962406e-05,
|
5491 |
+
"loss": 1.9996,
|
5492 |
+
"step": 457000
|
5493 |
+
},
|
5494 |
+
{
|
5495 |
+
"epoch": 125.07,
|
5496 |
+
"learning_rate": 4.375256322624744e-05,
|
5497 |
+
"loss": 1.9744,
|
5498 |
+
"step": 457500
|
5499 |
+
},
|
5500 |
+
{
|
5501 |
+
"epoch": 125.21,
|
5502 |
+
"learning_rate": 4.374572795625427e-05,
|
5503 |
+
"loss": 1.9558,
|
5504 |
+
"step": 458000
|
5505 |
+
},
|
5506 |
+
{
|
5507 |
+
"epoch": 125.34,
|
5508 |
+
"learning_rate": 4.373889268626111e-05,
|
5509 |
+
"loss": 1.9701,
|
5510 |
+
"step": 458500
|
5511 |
+
},
|
5512 |
+
{
|
5513 |
+
"epoch": 125.48,
|
5514 |
+
"learning_rate": 4.373205741626795e-05,
|
5515 |
+
"loss": 1.971,
|
5516 |
+
"step": 459000
|
5517 |
+
},
|
5518 |
+
{
|
5519 |
+
"epoch": 125.62,
|
5520 |
+
"learning_rate": 4.372522214627478e-05,
|
5521 |
+
"loss": 1.9868,
|
5522 |
+
"step": 459500
|
5523 |
+
},
|
5524 |
+
{
|
5525 |
+
"epoch": 125.75,
|
5526 |
+
"learning_rate": 4.371838687628161e-05,
|
5527 |
+
"loss": 1.9827,
|
5528 |
+
"step": 460000
|
5529 |
+
},
|
5530 |
+
{
|
5531 |
+
"epoch": 125.89,
|
5532 |
+
"learning_rate": 4.3711551606288446e-05,
|
5533 |
+
"loss": 1.9944,
|
5534 |
+
"step": 460500
|
5535 |
+
},
|
5536 |
+
{
|
5537 |
+
"epoch": 126.03,
|
5538 |
+
"learning_rate": 4.370471633629529e-05,
|
5539 |
+
"loss": 1.9864,
|
5540 |
+
"step": 461000
|
5541 |
+
},
|
5542 |
+
{
|
5543 |
+
"epoch": 126.16,
|
5544 |
+
"learning_rate": 4.369788106630212e-05,
|
5545 |
+
"loss": 1.9455,
|
5546 |
+
"step": 461500
|
5547 |
+
},
|
5548 |
+
{
|
5549 |
+
"epoch": 126.3,
|
5550 |
+
"learning_rate": 4.369104579630896e-05,
|
5551 |
+
"loss": 1.9598,
|
5552 |
+
"step": 462000
|
5553 |
+
},
|
5554 |
+
{
|
5555 |
+
"epoch": 126.44,
|
5556 |
+
"learning_rate": 4.368421052631579e-05,
|
5557 |
+
"loss": 1.9695,
|
5558 |
+
"step": 462500
|
5559 |
+
},
|
5560 |
+
{
|
5561 |
+
"epoch": 126.57,
|
5562 |
+
"learning_rate": 4.367737525632263e-05,
|
5563 |
+
"loss": 1.9791,
|
5564 |
+
"step": 463000
|
5565 |
+
},
|
5566 |
+
{
|
5567 |
+
"epoch": 126.71,
|
5568 |
+
"learning_rate": 4.367053998632946e-05,
|
5569 |
+
"loss": 1.9733,
|
5570 |
+
"step": 463500
|
5571 |
+
},
|
5572 |
+
{
|
5573 |
+
"epoch": 126.85,
|
5574 |
+
"learning_rate": 4.3663704716336296e-05,
|
5575 |
+
"loss": 1.9899,
|
5576 |
+
"step": 464000
|
5577 |
+
},
|
5578 |
+
{
|
5579 |
+
"epoch": 126.98,
|
5580 |
+
"learning_rate": 4.365686944634314e-05,
|
5581 |
+
"loss": 1.9975,
|
5582 |
+
"step": 464500
|
5583 |
+
},
|
5584 |
+
{
|
5585 |
+
"epoch": 127.12,
|
5586 |
+
"learning_rate": 4.3650034176349966e-05,
|
5587 |
+
"loss": 1.9522,
|
5588 |
+
"step": 465000
|
5589 |
+
},
|
5590 |
+
{
|
5591 |
+
"epoch": 127.26,
|
5592 |
+
"learning_rate": 4.36431989063568e-05,
|
5593 |
+
"loss": 1.9489,
|
5594 |
+
"step": 465500
|
5595 |
+
},
|
5596 |
+
{
|
5597 |
+
"epoch": 127.39,
|
5598 |
+
"learning_rate": 4.3636363636363636e-05,
|
5599 |
+
"loss": 1.961,
|
5600 |
+
"step": 466000
|
5601 |
+
},
|
5602 |
+
{
|
5603 |
+
"epoch": 127.53,
|
5604 |
+
"learning_rate": 4.362952836637048e-05,
|
5605 |
+
"loss": 1.9685,
|
5606 |
+
"step": 466500
|
5607 |
+
},
|
5608 |
+
{
|
5609 |
+
"epoch": 127.67,
|
5610 |
+
"learning_rate": 4.362269309637731e-05,
|
5611 |
+
"loss": 1.9775,
|
5612 |
+
"step": 467000
|
5613 |
+
},
|
5614 |
+
{
|
5615 |
+
"epoch": 127.8,
|
5616 |
+
"learning_rate": 4.361585782638414e-05,
|
5617 |
+
"loss": 1.983,
|
5618 |
+
"step": 467500
|
5619 |
+
},
|
5620 |
+
{
|
5621 |
+
"epoch": 127.94,
|
5622 |
+
"learning_rate": 4.3609022556390975e-05,
|
5623 |
+
"loss": 1.9919,
|
5624 |
+
"step": 468000
|
5625 |
+
},
|
5626 |
+
{
|
5627 |
+
"epoch": 128.08,
|
5628 |
+
"learning_rate": 4.3602187286397816e-05,
|
5629 |
+
"loss": 1.9609,
|
5630 |
+
"step": 468500
|
5631 |
+
},
|
5632 |
+
{
|
5633 |
+
"epoch": 128.21,
|
5634 |
+
"learning_rate": 4.359535201640465e-05,
|
5635 |
+
"loss": 1.9508,
|
5636 |
+
"step": 469000
|
5637 |
+
},
|
5638 |
+
{
|
5639 |
+
"epoch": 128.35,
|
5640 |
+
"learning_rate": 4.3588516746411486e-05,
|
5641 |
+
"loss": 1.9594,
|
5642 |
+
"step": 469500
|
5643 |
+
},
|
5644 |
+
{
|
5645 |
+
"epoch": 128.49,
|
5646 |
+
"learning_rate": 4.358168147641832e-05,
|
5647 |
+
"loss": 1.9627,
|
5648 |
+
"step": 470000
|
5649 |
+
},
|
5650 |
+
{
|
5651 |
+
"epoch": 128.62,
|
5652 |
+
"learning_rate": 4.3574846206425156e-05,
|
5653 |
+
"loss": 1.9675,
|
5654 |
+
"step": 470500
|
5655 |
+
},
|
5656 |
+
{
|
5657 |
+
"epoch": 128.76,
|
5658 |
+
"learning_rate": 4.356801093643199e-05,
|
5659 |
+
"loss": 1.9777,
|
5660 |
+
"step": 471000
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 128.9,
|
5664 |
+
"learning_rate": 4.3561175666438825e-05,
|
5665 |
+
"loss": 1.9811,
|
5666 |
+
"step": 471500
|
5667 |
+
},
|
5668 |
+
{
|
5669 |
+
"epoch": 129.03,
|
5670 |
+
"learning_rate": 4.355434039644567e-05,
|
5671 |
+
"loss": 1.975,
|
5672 |
+
"step": 472000
|
5673 |
+
},
|
5674 |
+
{
|
5675 |
+
"epoch": 129.17,
|
5676 |
+
"learning_rate": 4.3547505126452495e-05,
|
5677 |
+
"loss": 1.9393,
|
5678 |
+
"step": 472500
|
5679 |
+
},
|
5680 |
+
{
|
5681 |
+
"epoch": 129.31,
|
5682 |
+
"learning_rate": 4.354066985645933e-05,
|
5683 |
+
"loss": 1.9465,
|
5684 |
+
"step": 473000
|
5685 |
+
},
|
5686 |
+
{
|
5687 |
+
"epoch": 129.44,
|
5688 |
+
"learning_rate": 4.3533834586466164e-05,
|
5689 |
+
"loss": 1.9567,
|
5690 |
+
"step": 473500
|
5691 |
+
},
|
5692 |
+
{
|
5693 |
+
"epoch": 129.58,
|
5694 |
+
"learning_rate": 4.3526999316473006e-05,
|
5695 |
+
"loss": 1.963,
|
5696 |
+
"step": 474000
|
5697 |
+
},
|
5698 |
+
{
|
5699 |
+
"epoch": 129.72,
|
5700 |
+
"learning_rate": 4.352016404647984e-05,
|
5701 |
+
"loss": 1.9731,
|
5702 |
+
"step": 474500
|
5703 |
+
},
|
5704 |
+
{
|
5705 |
+
"epoch": 129.85,
|
5706 |
+
"learning_rate": 4.3513328776486675e-05,
|
5707 |
+
"loss": 1.9787,
|
5708 |
+
"step": 475000
|
5709 |
+
},
|
5710 |
+
{
|
5711 |
+
"epoch": 129.99,
|
5712 |
+
"learning_rate": 4.3506493506493503e-05,
|
5713 |
+
"loss": 1.9779,
|
5714 |
+
"step": 475500
|
5715 |
+
},
|
5716 |
+
{
|
5717 |
+
"epoch": 130.13,
|
5718 |
+
"learning_rate": 4.3499658236500345e-05,
|
5719 |
+
"loss": 1.9351,
|
5720 |
+
"step": 476000
|
5721 |
+
},
|
5722 |
+
{
|
5723 |
+
"epoch": 130.26,
|
5724 |
+
"learning_rate": 4.349282296650718e-05,
|
5725 |
+
"loss": 1.9439,
|
5726 |
+
"step": 476500
|
5727 |
+
},
|
5728 |
+
{
|
5729 |
+
"epoch": 130.4,
|
5730 |
+
"learning_rate": 4.3485987696514015e-05,
|
5731 |
+
"loss": 1.953,
|
5732 |
+
"step": 477000
|
5733 |
+
},
|
5734 |
+
{
|
5735 |
+
"epoch": 130.54,
|
5736 |
+
"learning_rate": 4.347915242652085e-05,
|
5737 |
+
"loss": 1.9584,
|
5738 |
+
"step": 477500
|
5739 |
+
},
|
5740 |
+
{
|
5741 |
+
"epoch": 130.67,
|
5742 |
+
"learning_rate": 4.3472317156527684e-05,
|
5743 |
+
"loss": 1.9635,
|
5744 |
+
"step": 478000
|
5745 |
+
},
|
5746 |
+
{
|
5747 |
+
"epoch": 130.81,
|
5748 |
+
"learning_rate": 4.346548188653452e-05,
|
5749 |
+
"loss": 1.9722,
|
5750 |
+
"step": 478500
|
5751 |
+
},
|
5752 |
+
{
|
5753 |
+
"epoch": 130.95,
|
5754 |
+
"learning_rate": 4.3458646616541354e-05,
|
5755 |
+
"loss": 1.9806,
|
5756 |
+
"step": 479000
|
5757 |
+
},
|
5758 |
+
{
|
5759 |
+
"epoch": 131.08,
|
5760 |
+
"learning_rate": 4.345181134654819e-05,
|
5761 |
+
"loss": 1.9505,
|
5762 |
+
"step": 479500
|
5763 |
+
},
|
5764 |
+
{
|
5765 |
+
"epoch": 131.22,
|
5766 |
+
"learning_rate": 4.344497607655503e-05,
|
5767 |
+
"loss": 1.9355,
|
5768 |
+
"step": 480000
|
5769 |
}
|
5770 |
],
|
5771 |
"max_steps": 3658000,
|
5772 |
"num_train_epochs": 1000,
|
5773 |
+
"total_flos": 966230618169802752,
|
5774 |
"trial_name": null,
|
5775 |
"trial_params": null
|
5776 |
}
|