Your name
commited on
Commit
•
ebfc8f8
1
Parent(s):
9177a94
Initial commit
Browse files- config.json +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- scheduler.pt +1 -1
- trainer_state.json +603 -3
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/content/drive/MyDrive/ggpt2/checkpoint-
|
3 |
"_num_labels": 1,
|
4 |
"activation_function": "gelu_new",
|
5 |
"architectures": [
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/content/drive/MyDrive/ggpt2/checkpoint-360000",
|
3 |
"_num_labels": 1,
|
4 |
"activation_function": "gelu_new",
|
5 |
"architectures": [
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 655348487
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25aad45ca03f3ac4902671d5f30fcb071be626a4dddaf248bee4b6f553ec9a29
|
3 |
size 655348487
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 333975623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7c4d6d6d56f177aadbd34ba09347ff85284717d7a559aac93a6e51fad2a1d41
|
3 |
size 333975623
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:617b3a41c1dd383619fbebee52cdee21b1f69f1f07f755cce1d5f1686ff9115a
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4326,11 +4326,611 @@
|
|
4326 |
"learning_rate": 4.5085440874914565e-05,
|
4327 |
"loss": 2.0832,
|
4328 |
"step": 360000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4329 |
}
|
4330 |
],
|
4331 |
"max_steps": 3658000,
|
4332 |
"num_train_epochs": 1000,
|
4333 |
-
"total_flos":
|
4334 |
"trial_name": null,
|
4335 |
"trial_params": null
|
4336 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 112.08310552214324,
|
5 |
+
"global_step": 410000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4326 |
"learning_rate": 4.5085440874914565e-05,
|
4327 |
"loss": 2.0832,
|
4328 |
"step": 360000
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 98.55,
|
4332 |
+
"learning_rate": 4.50786056049214e-05,
|
4333 |
+
"loss": 0.0029,
|
4334 |
+
"step": 360500
|
4335 |
+
},
|
4336 |
+
{
|
4337 |
+
"epoch": 98.69,
|
4338 |
+
"learning_rate": 4.5071770334928234e-05,
|
4339 |
+
"loss": 2.0982,
|
4340 |
+
"step": 361000
|
4341 |
+
},
|
4342 |
+
{
|
4343 |
+
"epoch": 98.82,
|
4344 |
+
"learning_rate": 4.506493506493506e-05,
|
4345 |
+
"loss": 2.0996,
|
4346 |
+
"step": 361500
|
4347 |
+
},
|
4348 |
+
{
|
4349 |
+
"epoch": 98.96,
|
4350 |
+
"learning_rate": 4.5058099794941904e-05,
|
4351 |
+
"loss": 2.1084,
|
4352 |
+
"step": 362000
|
4353 |
+
},
|
4354 |
+
{
|
4355 |
+
"epoch": 99.1,
|
4356 |
+
"learning_rate": 4.505126452494874e-05,
|
4357 |
+
"loss": 2.0753,
|
4358 |
+
"step": 362500
|
4359 |
+
},
|
4360 |
+
{
|
4361 |
+
"epoch": 99.23,
|
4362 |
+
"learning_rate": 4.504442925495557e-05,
|
4363 |
+
"loss": 2.0645,
|
4364 |
+
"step": 363000
|
4365 |
+
},
|
4366 |
+
{
|
4367 |
+
"epoch": 99.37,
|
4368 |
+
"learning_rate": 4.503759398496241e-05,
|
4369 |
+
"loss": 2.0722,
|
4370 |
+
"step": 363500
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 99.51,
|
4374 |
+
"learning_rate": 4.503075871496924e-05,
|
4375 |
+
"loss": 2.0849,
|
4376 |
+
"step": 364000
|
4377 |
+
},
|
4378 |
+
{
|
4379 |
+
"epoch": 99.64,
|
4380 |
+
"learning_rate": 4.502392344497608e-05,
|
4381 |
+
"loss": 2.0903,
|
4382 |
+
"step": 364500
|
4383 |
+
},
|
4384 |
+
{
|
4385 |
+
"epoch": 99.78,
|
4386 |
+
"learning_rate": 4.501708817498291e-05,
|
4387 |
+
"loss": 2.0973,
|
4388 |
+
"step": 365000
|
4389 |
+
},
|
4390 |
+
{
|
4391 |
+
"epoch": 99.92,
|
4392 |
+
"learning_rate": 4.501025290498975e-05,
|
4393 |
+
"loss": 2.1043,
|
4394 |
+
"step": 365500
|
4395 |
+
},
|
4396 |
+
{
|
4397 |
+
"epoch": 100.05,
|
4398 |
+
"learning_rate": 4.500341763499659e-05,
|
4399 |
+
"loss": 2.0809,
|
4400 |
+
"step": 366000
|
4401 |
+
},
|
4402 |
+
{
|
4403 |
+
"epoch": 100.19,
|
4404 |
+
"learning_rate": 4.499658236500342e-05,
|
4405 |
+
"loss": 2.0633,
|
4406 |
+
"step": 366500
|
4407 |
+
},
|
4408 |
+
{
|
4409 |
+
"epoch": 100.33,
|
4410 |
+
"learning_rate": 4.498974709501025e-05,
|
4411 |
+
"loss": 2.0694,
|
4412 |
+
"step": 367000
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 100.46,
|
4416 |
+
"learning_rate": 4.4982911825017086e-05,
|
4417 |
+
"loss": 2.0763,
|
4418 |
+
"step": 367500
|
4419 |
+
},
|
4420 |
+
{
|
4421 |
+
"epoch": 100.6,
|
4422 |
+
"learning_rate": 4.497607655502393e-05,
|
4423 |
+
"loss": 2.0849,
|
4424 |
+
"step": 368000
|
4425 |
+
},
|
4426 |
+
{
|
4427 |
+
"epoch": 100.74,
|
4428 |
+
"learning_rate": 4.496924128503076e-05,
|
4429 |
+
"loss": 2.0855,
|
4430 |
+
"step": 368500
|
4431 |
+
},
|
4432 |
+
{
|
4433 |
+
"epoch": 100.87,
|
4434 |
+
"learning_rate": 4.49624060150376e-05,
|
4435 |
+
"loss": 2.0935,
|
4436 |
+
"step": 369000
|
4437 |
+
},
|
4438 |
+
{
|
4439 |
+
"epoch": 101.01,
|
4440 |
+
"learning_rate": 4.495557074504443e-05,
|
4441 |
+
"loss": 2.1002,
|
4442 |
+
"step": 369500
|
4443 |
+
},
|
4444 |
+
{
|
4445 |
+
"epoch": 101.15,
|
4446 |
+
"learning_rate": 4.494873547505127e-05,
|
4447 |
+
"loss": 2.0488,
|
4448 |
+
"step": 370000
|
4449 |
+
},
|
4450 |
+
{
|
4451 |
+
"epoch": 101.28,
|
4452 |
+
"learning_rate": 4.49419002050581e-05,
|
4453 |
+
"loss": 2.0613,
|
4454 |
+
"step": 370500
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 101.42,
|
4458 |
+
"learning_rate": 4.493506493506494e-05,
|
4459 |
+
"loss": 2.0637,
|
4460 |
+
"step": 371000
|
4461 |
+
},
|
4462 |
+
{
|
4463 |
+
"epoch": 101.56,
|
4464 |
+
"learning_rate": 4.492822966507177e-05,
|
4465 |
+
"loss": 2.0769,
|
4466 |
+
"step": 371500
|
4467 |
+
},
|
4468 |
+
{
|
4469 |
+
"epoch": 101.69,
|
4470 |
+
"learning_rate": 4.4921394395078606e-05,
|
4471 |
+
"loss": 2.086,
|
4472 |
+
"step": 372000
|
4473 |
+
},
|
4474 |
+
{
|
4475 |
+
"epoch": 101.83,
|
4476 |
+
"learning_rate": 4.491455912508544e-05,
|
4477 |
+
"loss": 2.0914,
|
4478 |
+
"step": 372500
|
4479 |
+
},
|
4480 |
+
{
|
4481 |
+
"epoch": 101.97,
|
4482 |
+
"learning_rate": 4.4907723855092276e-05,
|
4483 |
+
"loss": 2.0974,
|
4484 |
+
"step": 373000
|
4485 |
+
},
|
4486 |
+
{
|
4487 |
+
"epoch": 102.1,
|
4488 |
+
"learning_rate": 4.490088858509912e-05,
|
4489 |
+
"loss": 2.0534,
|
4490 |
+
"step": 373500
|
4491 |
+
},
|
4492 |
+
{
|
4493 |
+
"epoch": 102.24,
|
4494 |
+
"learning_rate": 4.489405331510595e-05,
|
4495 |
+
"loss": 2.0527,
|
4496 |
+
"step": 374000
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 102.38,
|
4500 |
+
"learning_rate": 4.488721804511278e-05,
|
4501 |
+
"loss": 2.0637,
|
4502 |
+
"step": 374500
|
4503 |
+
},
|
4504 |
+
{
|
4505 |
+
"epoch": 102.52,
|
4506 |
+
"learning_rate": 4.4880382775119615e-05,
|
4507 |
+
"loss": 2.0692,
|
4508 |
+
"step": 375000
|
4509 |
+
},
|
4510 |
+
{
|
4511 |
+
"epoch": 102.65,
|
4512 |
+
"learning_rate": 4.487354750512646e-05,
|
4513 |
+
"loss": 2.078,
|
4514 |
+
"step": 375500
|
4515 |
+
},
|
4516 |
+
{
|
4517 |
+
"epoch": 102.79,
|
4518 |
+
"learning_rate": 4.486671223513329e-05,
|
4519 |
+
"loss": 2.0788,
|
4520 |
+
"step": 376000
|
4521 |
+
},
|
4522 |
+
{
|
4523 |
+
"epoch": 102.93,
|
4524 |
+
"learning_rate": 4.4859876965140126e-05,
|
4525 |
+
"loss": 2.089,
|
4526 |
+
"step": 376500
|
4527 |
+
},
|
4528 |
+
{
|
4529 |
+
"epoch": 103.06,
|
4530 |
+
"learning_rate": 4.485304169514696e-05,
|
4531 |
+
"loss": 2.0704,
|
4532 |
+
"step": 377000
|
4533 |
+
},
|
4534 |
+
{
|
4535 |
+
"epoch": 103.2,
|
4536 |
+
"learning_rate": 4.4846206425153796e-05,
|
4537 |
+
"loss": 2.0524,
|
4538 |
+
"step": 377500
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 103.34,
|
4542 |
+
"learning_rate": 4.483937115516063e-05,
|
4543 |
+
"loss": 2.0481,
|
4544 |
+
"step": 378000
|
4545 |
+
},
|
4546 |
+
{
|
4547 |
+
"epoch": 103.47,
|
4548 |
+
"learning_rate": 4.4832535885167465e-05,
|
4549 |
+
"loss": 2.0607,
|
4550 |
+
"step": 378500
|
4551 |
+
},
|
4552 |
+
{
|
4553 |
+
"epoch": 103.61,
|
4554 |
+
"learning_rate": 4.482570061517431e-05,
|
4555 |
+
"loss": 2.0742,
|
4556 |
+
"step": 379000
|
4557 |
+
},
|
4558 |
+
{
|
4559 |
+
"epoch": 103.75,
|
4560 |
+
"learning_rate": 4.4818865345181135e-05,
|
4561 |
+
"loss": 2.0698,
|
4562 |
+
"step": 379500
|
4563 |
+
},
|
4564 |
+
{
|
4565 |
+
"epoch": 103.88,
|
4566 |
+
"learning_rate": 4.481203007518797e-05,
|
4567 |
+
"loss": 2.0826,
|
4568 |
+
"step": 380000
|
4569 |
+
},
|
4570 |
+
{
|
4571 |
+
"epoch": 104.02,
|
4572 |
+
"learning_rate": 4.4805194805194805e-05,
|
4573 |
+
"loss": 2.0789,
|
4574 |
+
"step": 380500
|
4575 |
+
},
|
4576 |
+
{
|
4577 |
+
"epoch": 104.16,
|
4578 |
+
"learning_rate": 4.4798359535201646e-05,
|
4579 |
+
"loss": 2.0342,
|
4580 |
+
"step": 381000
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 104.29,
|
4584 |
+
"learning_rate": 4.479152426520848e-05,
|
4585 |
+
"loss": 2.0422,
|
4586 |
+
"step": 381500
|
4587 |
+
},
|
4588 |
+
{
|
4589 |
+
"epoch": 104.43,
|
4590 |
+
"learning_rate": 4.4784688995215316e-05,
|
4591 |
+
"loss": 2.0582,
|
4592 |
+
"step": 382000
|
4593 |
+
},
|
4594 |
+
{
|
4595 |
+
"epoch": 104.57,
|
4596 |
+
"learning_rate": 4.4777853725222144e-05,
|
4597 |
+
"loss": 2.0651,
|
4598 |
+
"step": 382500
|
4599 |
+
},
|
4600 |
+
{
|
4601 |
+
"epoch": 104.7,
|
4602 |
+
"learning_rate": 4.4771018455228985e-05,
|
4603 |
+
"loss": 2.0687,
|
4604 |
+
"step": 383000
|
4605 |
+
},
|
4606 |
+
{
|
4607 |
+
"epoch": 104.84,
|
4608 |
+
"learning_rate": 4.476418318523582e-05,
|
4609 |
+
"loss": 2.0763,
|
4610 |
+
"step": 383500
|
4611 |
+
},
|
4612 |
+
{
|
4613 |
+
"epoch": 104.98,
|
4614 |
+
"learning_rate": 4.4757347915242655e-05,
|
4615 |
+
"loss": 2.0866,
|
4616 |
+
"step": 384000
|
4617 |
+
},
|
4618 |
+
{
|
4619 |
+
"epoch": 105.11,
|
4620 |
+
"learning_rate": 4.475051264524949e-05,
|
4621 |
+
"loss": 2.0387,
|
4622 |
+
"step": 384500
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 105.25,
|
4626 |
+
"learning_rate": 4.4743677375256325e-05,
|
4627 |
+
"loss": 2.0398,
|
4628 |
+
"step": 385000
|
4629 |
+
},
|
4630 |
+
{
|
4631 |
+
"epoch": 105.39,
|
4632 |
+
"learning_rate": 4.473684210526316e-05,
|
4633 |
+
"loss": 2.0542,
|
4634 |
+
"step": 385500
|
4635 |
+
},
|
4636 |
+
{
|
4637 |
+
"epoch": 105.52,
|
4638 |
+
"learning_rate": 4.4730006835269994e-05,
|
4639 |
+
"loss": 2.0551,
|
4640 |
+
"step": 386000
|
4641 |
+
},
|
4642 |
+
{
|
4643 |
+
"epoch": 105.66,
|
4644 |
+
"learning_rate": 4.472317156527683e-05,
|
4645 |
+
"loss": 2.0639,
|
4646 |
+
"step": 386500
|
4647 |
+
},
|
4648 |
+
{
|
4649 |
+
"epoch": 105.8,
|
4650 |
+
"learning_rate": 4.471633629528367e-05,
|
4651 |
+
"loss": 2.0706,
|
4652 |
+
"step": 387000
|
4653 |
+
},
|
4654 |
+
{
|
4655 |
+
"epoch": 105.93,
|
4656 |
+
"learning_rate": 4.47095010252905e-05,
|
4657 |
+
"loss": 2.0742,
|
4658 |
+
"step": 387500
|
4659 |
+
},
|
4660 |
+
{
|
4661 |
+
"epoch": 106.07,
|
4662 |
+
"learning_rate": 4.470266575529733e-05,
|
4663 |
+
"loss": 2.0517,
|
4664 |
+
"step": 388000
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 106.21,
|
4668 |
+
"learning_rate": 4.4695830485304175e-05,
|
4669 |
+
"loss": 2.0298,
|
4670 |
+
"step": 388500
|
4671 |
+
},
|
4672 |
+
{
|
4673 |
+
"epoch": 106.34,
|
4674 |
+
"learning_rate": 4.468899521531101e-05,
|
4675 |
+
"loss": 2.0385,
|
4676 |
+
"step": 389000
|
4677 |
+
},
|
4678 |
+
{
|
4679 |
+
"epoch": 106.48,
|
4680 |
+
"learning_rate": 4.4682159945317844e-05,
|
4681 |
+
"loss": 2.051,
|
4682 |
+
"step": 389500
|
4683 |
+
},
|
4684 |
+
{
|
4685 |
+
"epoch": 106.62,
|
4686 |
+
"learning_rate": 4.467532467532467e-05,
|
4687 |
+
"loss": 2.0592,
|
4688 |
+
"step": 390000
|
4689 |
+
},
|
4690 |
+
{
|
4691 |
+
"epoch": 106.75,
|
4692 |
+
"learning_rate": 4.4668489405331514e-05,
|
4693 |
+
"loss": 2.0676,
|
4694 |
+
"step": 390500
|
4695 |
+
},
|
4696 |
+
{
|
4697 |
+
"epoch": 106.89,
|
4698 |
+
"learning_rate": 4.466165413533835e-05,
|
4699 |
+
"loss": 2.0695,
|
4700 |
+
"step": 391000
|
4701 |
+
},
|
4702 |
+
{
|
4703 |
+
"epoch": 107.03,
|
4704 |
+
"learning_rate": 4.4654818865345184e-05,
|
4705 |
+
"loss": 2.0598,
|
4706 |
+
"step": 391500
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 107.16,
|
4710 |
+
"learning_rate": 4.464798359535202e-05,
|
4711 |
+
"loss": 2.024,
|
4712 |
+
"step": 392000
|
4713 |
+
},
|
4714 |
+
{
|
4715 |
+
"epoch": 107.3,
|
4716 |
+
"learning_rate": 4.464114832535885e-05,
|
4717 |
+
"loss": 2.0372,
|
4718 |
+
"step": 392500
|
4719 |
+
},
|
4720 |
+
{
|
4721 |
+
"epoch": 107.44,
|
4722 |
+
"learning_rate": 4.463431305536569e-05,
|
4723 |
+
"loss": 2.0433,
|
4724 |
+
"step": 393000
|
4725 |
+
},
|
4726 |
+
{
|
4727 |
+
"epoch": 107.57,
|
4728 |
+
"learning_rate": 4.462747778537252e-05,
|
4729 |
+
"loss": 2.0472,
|
4730 |
+
"step": 393500
|
4731 |
+
},
|
4732 |
+
{
|
4733 |
+
"epoch": 107.71,
|
4734 |
+
"learning_rate": 4.462064251537936e-05,
|
4735 |
+
"loss": 2.0579,
|
4736 |
+
"step": 394000
|
4737 |
+
},
|
4738 |
+
{
|
4739 |
+
"epoch": 107.85,
|
4740 |
+
"learning_rate": 4.46138072453862e-05,
|
4741 |
+
"loss": 2.0605,
|
4742 |
+
"step": 394500
|
4743 |
+
},
|
4744 |
+
{
|
4745 |
+
"epoch": 107.98,
|
4746 |
+
"learning_rate": 4.460697197539303e-05,
|
4747 |
+
"loss": 2.0745,
|
4748 |
+
"step": 395000
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 108.12,
|
4752 |
+
"learning_rate": 4.460013670539986e-05,
|
4753 |
+
"loss": 2.026,
|
4754 |
+
"step": 395500
|
4755 |
+
},
|
4756 |
+
{
|
4757 |
+
"epoch": 108.26,
|
4758 |
+
"learning_rate": 4.45933014354067e-05,
|
4759 |
+
"loss": 2.0251,
|
4760 |
+
"step": 396000
|
4761 |
+
},
|
4762 |
+
{
|
4763 |
+
"epoch": 108.39,
|
4764 |
+
"learning_rate": 4.458646616541354e-05,
|
4765 |
+
"loss": 2.0438,
|
4766 |
+
"step": 396500
|
4767 |
+
},
|
4768 |
+
{
|
4769 |
+
"epoch": 108.53,
|
4770 |
+
"learning_rate": 4.457963089542037e-05,
|
4771 |
+
"loss": 2.0407,
|
4772 |
+
"step": 397000
|
4773 |
+
},
|
4774 |
+
{
|
4775 |
+
"epoch": 108.67,
|
4776 |
+
"learning_rate": 4.457279562542721e-05,
|
4777 |
+
"loss": 2.0477,
|
4778 |
+
"step": 397500
|
4779 |
+
},
|
4780 |
+
{
|
4781 |
+
"epoch": 108.8,
|
4782 |
+
"learning_rate": 4.456596035543404e-05,
|
4783 |
+
"loss": 2.0536,
|
4784 |
+
"step": 398000
|
4785 |
+
},
|
4786 |
+
{
|
4787 |
+
"epoch": 108.94,
|
4788 |
+
"learning_rate": 4.455912508544088e-05,
|
4789 |
+
"loss": 2.065,
|
4790 |
+
"step": 398500
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 109.08,
|
4794 |
+
"learning_rate": 4.455228981544771e-05,
|
4795 |
+
"loss": 2.0385,
|
4796 |
+
"step": 399000
|
4797 |
+
},
|
4798 |
+
{
|
4799 |
+
"epoch": 109.21,
|
4800 |
+
"learning_rate": 4.454545454545455e-05,
|
4801 |
+
"loss": 2.0245,
|
4802 |
+
"step": 399500
|
4803 |
+
},
|
4804 |
+
{
|
4805 |
+
"epoch": 109.35,
|
4806 |
+
"learning_rate": 4.453861927546138e-05,
|
4807 |
+
"loss": 2.0304,
|
4808 |
+
"step": 400000
|
4809 |
+
},
|
4810 |
+
{
|
4811 |
+
"epoch": 109.49,
|
4812 |
+
"learning_rate": 4.453178400546822e-05,
|
4813 |
+
"loss": 2.0395,
|
4814 |
+
"step": 400500
|
4815 |
+
},
|
4816 |
+
{
|
4817 |
+
"epoch": 109.62,
|
4818 |
+
"learning_rate": 4.452494873547505e-05,
|
4819 |
+
"loss": 2.0405,
|
4820 |
+
"step": 401000
|
4821 |
+
},
|
4822 |
+
{
|
4823 |
+
"epoch": 109.76,
|
4824 |
+
"learning_rate": 4.4518113465481886e-05,
|
4825 |
+
"loss": 2.0475,
|
4826 |
+
"step": 401500
|
4827 |
+
},
|
4828 |
+
{
|
4829 |
+
"epoch": 109.9,
|
4830 |
+
"learning_rate": 4.451127819548873e-05,
|
4831 |
+
"loss": 2.0538,
|
4832 |
+
"step": 402000
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 110.03,
|
4836 |
+
"learning_rate": 4.450444292549556e-05,
|
4837 |
+
"loss": 2.0518,
|
4838 |
+
"step": 402500
|
4839 |
+
},
|
4840 |
+
{
|
4841 |
+
"epoch": 110.17,
|
4842 |
+
"learning_rate": 4.449760765550239e-05,
|
4843 |
+
"loss": 2.0132,
|
4844 |
+
"step": 403000
|
4845 |
+
},
|
4846 |
+
{
|
4847 |
+
"epoch": 110.31,
|
4848 |
+
"learning_rate": 4.4490772385509225e-05,
|
4849 |
+
"loss": 2.0259,
|
4850 |
+
"step": 403500
|
4851 |
+
},
|
4852 |
+
{
|
4853 |
+
"epoch": 110.44,
|
4854 |
+
"learning_rate": 4.448393711551607e-05,
|
4855 |
+
"loss": 2.0302,
|
4856 |
+
"step": 404000
|
4857 |
+
},
|
4858 |
+
{
|
4859 |
+
"epoch": 110.58,
|
4860 |
+
"learning_rate": 4.44771018455229e-05,
|
4861 |
+
"loss": 2.0392,
|
4862 |
+
"step": 404500
|
4863 |
+
},
|
4864 |
+
{
|
4865 |
+
"epoch": 110.72,
|
4866 |
+
"learning_rate": 4.4470266575529737e-05,
|
4867 |
+
"loss": 2.0429,
|
4868 |
+
"step": 405000
|
4869 |
+
},
|
4870 |
+
{
|
4871 |
+
"epoch": 110.85,
|
4872 |
+
"learning_rate": 4.446343130553657e-05,
|
4873 |
+
"loss": 2.0444,
|
4874 |
+
"step": 405500
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 110.99,
|
4878 |
+
"learning_rate": 4.4456596035543406e-05,
|
4879 |
+
"loss": 2.0542,
|
4880 |
+
"step": 406000
|
4881 |
+
},
|
4882 |
+
{
|
4883 |
+
"epoch": 111.13,
|
4884 |
+
"learning_rate": 4.444976076555024e-05,
|
4885 |
+
"loss": 2.0086,
|
4886 |
+
"step": 406500
|
4887 |
+
},
|
4888 |
+
{
|
4889 |
+
"epoch": 111.26,
|
4890 |
+
"learning_rate": 4.4442925495557076e-05,
|
4891 |
+
"loss": 2.0216,
|
4892 |
+
"step": 407000
|
4893 |
+
},
|
4894 |
+
{
|
4895 |
+
"epoch": 111.4,
|
4896 |
+
"learning_rate": 4.443609022556392e-05,
|
4897 |
+
"loss": 2.0197,
|
4898 |
+
"step": 407500
|
4899 |
+
},
|
4900 |
+
{
|
4901 |
+
"epoch": 111.54,
|
4902 |
+
"learning_rate": 4.4429254955570745e-05,
|
4903 |
+
"loss": 2.032,
|
4904 |
+
"step": 408000
|
4905 |
+
},
|
4906 |
+
{
|
4907 |
+
"epoch": 111.67,
|
4908 |
+
"learning_rate": 4.442241968557758e-05,
|
4909 |
+
"loss": 2.0413,
|
4910 |
+
"step": 408500
|
4911 |
+
},
|
4912 |
+
{
|
4913 |
+
"epoch": 111.81,
|
4914 |
+
"learning_rate": 4.4415584415584415e-05,
|
4915 |
+
"loss": 2.0456,
|
4916 |
+
"step": 409000
|
4917 |
+
},
|
4918 |
+
{
|
4919 |
+
"epoch": 111.95,
|
4920 |
+
"learning_rate": 4.4408749145591257e-05,
|
4921 |
+
"loss": 2.047,
|
4922 |
+
"step": 409500
|
4923 |
+
},
|
4924 |
+
{
|
4925 |
+
"epoch": 112.08,
|
4926 |
+
"learning_rate": 4.440191387559809e-05,
|
4927 |
+
"loss": 2.0248,
|
4928 |
+
"step": 410000
|
4929 |
}
|
4930 |
],
|
4931 |
"max_steps": 3658000,
|
4932 |
"num_train_epochs": 1000,
|
4933 |
+
"total_flos": 825321947035336704,
|
4934 |
"trial_name": null,
|
4935 |
"trial_params": null
|
4936 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2031
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93d2fc2ca4a3f894c9391bd5e40a96e46714817c7477b582da0320beb734993f
|
3 |
size 2031
|