amazingvince
commited on
Commit
•
9f7ec5a
1
Parent(s):
a54bfee
Upload folder using huggingface_hub
Browse files- latest +1 -1
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- trainer_state.json +971 -3
latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step12000
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4944210912
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f2f922cbafd08d7c3c9709c63870639b4fe5a77197b1aed02013fa0a46d37ab
|
3 |
size 4944210912
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b990bef30e1c5b9ae4bb3b37e37b31d793cfbf6af883deeba8aafd44531998bc
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4541564920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:308a00b64f20b918c2170564dfd59fab520e889ef27bfcfd89fb157907315e40
|
3 |
size 4541564920
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 800,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -13565,6 +13565,974 @@
|
|
13565 |
"eval_samples_per_second": 16.393,
|
13566 |
"eval_steps_per_second": 2.737,
|
13567 |
"step": 11200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13568 |
}
|
13569 |
],
|
13570 |
"logging_steps": 5,
|
@@ -13572,7 +14540,7 @@
|
|
13572 |
"num_input_tokens_seen": 0,
|
13573 |
"num_train_epochs": 1,
|
13574 |
"save_steps": 400,
|
13575 |
-
"total_flos":
|
13576 |
"trial_name": null,
|
13577 |
"trial_params": null
|
13578 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.48742098448884263,
|
5 |
"eval_steps": 800,
|
6 |
+
"global_step": 12000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
13565 |
"eval_samples_per_second": 16.393,
|
13566 |
"eval_steps_per_second": 2.737,
|
13567 |
"step": 11200
|
13568 |
+
},
|
13569 |
+
{
|
13570 |
+
"epoch": 0.46,
|
13571 |
+
"learning_rate": 1.15763110831054e-06,
|
13572 |
+
"loss": 0.6646,
|
13573 |
+
"step": 11205
|
13574 |
+
},
|
13575 |
+
{
|
13576 |
+
"epoch": 0.46,
|
13577 |
+
"learning_rate": 1.1569946246316182e-06,
|
13578 |
+
"loss": 0.7085,
|
13579 |
+
"step": 11210
|
13580 |
+
},
|
13581 |
+
{
|
13582 |
+
"epoch": 0.46,
|
13583 |
+
"learning_rate": 1.156358075738495e-06,
|
13584 |
+
"loss": 0.6621,
|
13585 |
+
"step": 11215
|
13586 |
+
},
|
13587 |
+
{
|
13588 |
+
"epoch": 0.46,
|
13589 |
+
"learning_rate": 1.1557214618955868e-06,
|
13590 |
+
"loss": 0.6703,
|
13591 |
+
"step": 11220
|
13592 |
+
},
|
13593 |
+
{
|
13594 |
+
"epoch": 0.46,
|
13595 |
+
"learning_rate": 1.1550847833673374e-06,
|
13596 |
+
"loss": 0.7204,
|
13597 |
+
"step": 11225
|
13598 |
+
},
|
13599 |
+
{
|
13600 |
+
"epoch": 0.46,
|
13601 |
+
"learning_rate": 1.154448040418218e-06,
|
13602 |
+
"loss": 0.6923,
|
13603 |
+
"step": 11230
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 0.46,
|
13607 |
+
"learning_rate": 1.1538112333127253e-06,
|
13608 |
+
"loss": 0.6608,
|
13609 |
+
"step": 11235
|
13610 |
+
},
|
13611 |
+
{
|
13612 |
+
"epoch": 0.46,
|
13613 |
+
"learning_rate": 1.1531743623153842e-06,
|
13614 |
+
"loss": 0.6824,
|
13615 |
+
"step": 11240
|
13616 |
+
},
|
13617 |
+
{
|
13618 |
+
"epoch": 0.46,
|
13619 |
+
"learning_rate": 1.1525374276907449e-06,
|
13620 |
+
"loss": 0.7322,
|
13621 |
+
"step": 11245
|
13622 |
+
},
|
13623 |
+
{
|
13624 |
+
"epoch": 0.46,
|
13625 |
+
"learning_rate": 1.1519004297033847e-06,
|
13626 |
+
"loss": 0.6432,
|
13627 |
+
"step": 11250
|
13628 |
+
},
|
13629 |
+
{
|
13630 |
+
"epoch": 0.46,
|
13631 |
+
"learning_rate": 1.1512633686179071e-06,
|
13632 |
+
"loss": 0.6795,
|
13633 |
+
"step": 11255
|
13634 |
+
},
|
13635 |
+
{
|
13636 |
+
"epoch": 0.46,
|
13637 |
+
"learning_rate": 1.1506262446989417e-06,
|
13638 |
+
"loss": 0.7229,
|
13639 |
+
"step": 11260
|
13640 |
+
},
|
13641 |
+
{
|
13642 |
+
"epoch": 0.46,
|
13643 |
+
"learning_rate": 1.149989058211144e-06,
|
13644 |
+
"loss": 0.6954,
|
13645 |
+
"step": 11265
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 0.46,
|
13649 |
+
"learning_rate": 1.149351809419196e-06,
|
13650 |
+
"loss": 0.6879,
|
13651 |
+
"step": 11270
|
13652 |
+
},
|
13653 |
+
{
|
13654 |
+
"epoch": 0.46,
|
13655 |
+
"learning_rate": 1.148714498587805e-06,
|
13656 |
+
"loss": 0.6642,
|
13657 |
+
"step": 11275
|
13658 |
+
},
|
13659 |
+
{
|
13660 |
+
"epoch": 0.46,
|
13661 |
+
"learning_rate": 1.1480771259817048e-06,
|
13662 |
+
"loss": 0.7015,
|
13663 |
+
"step": 11280
|
13664 |
+
},
|
13665 |
+
{
|
13666 |
+
"epoch": 0.46,
|
13667 |
+
"learning_rate": 1.147439691865654e-06,
|
13668 |
+
"loss": 0.6467,
|
13669 |
+
"step": 11285
|
13670 |
+
},
|
13671 |
+
{
|
13672 |
+
"epoch": 0.46,
|
13673 |
+
"learning_rate": 1.1468021965044377e-06,
|
13674 |
+
"loss": 0.7045,
|
13675 |
+
"step": 11290
|
13676 |
+
},
|
13677 |
+
{
|
13678 |
+
"epoch": 0.46,
|
13679 |
+
"learning_rate": 1.1461646401628654e-06,
|
13680 |
+
"loss": 0.6635,
|
13681 |
+
"step": 11295
|
13682 |
+
},
|
13683 |
+
{
|
13684 |
+
"epoch": 0.46,
|
13685 |
+
"learning_rate": 1.1455270231057728e-06,
|
13686 |
+
"loss": 0.6943,
|
13687 |
+
"step": 11300
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 0.46,
|
13691 |
+
"learning_rate": 1.14488934559802e-06,
|
13692 |
+
"loss": 0.6626,
|
13693 |
+
"step": 11305
|
13694 |
+
},
|
13695 |
+
{
|
13696 |
+
"epoch": 0.46,
|
13697 |
+
"learning_rate": 1.1442516079044932e-06,
|
13698 |
+
"loss": 0.6716,
|
13699 |
+
"step": 11310
|
13700 |
+
},
|
13701 |
+
{
|
13702 |
+
"epoch": 0.46,
|
13703 |
+
"learning_rate": 1.1436138102901031e-06,
|
13704 |
+
"loss": 0.6748,
|
13705 |
+
"step": 11315
|
13706 |
+
},
|
13707 |
+
{
|
13708 |
+
"epoch": 0.46,
|
13709 |
+
"learning_rate": 1.142975953019785e-06,
|
13710 |
+
"loss": 0.7028,
|
13711 |
+
"step": 11320
|
13712 |
+
},
|
13713 |
+
{
|
13714 |
+
"epoch": 0.46,
|
13715 |
+
"learning_rate": 1.1423380363584999e-06,
|
13716 |
+
"loss": 0.6044,
|
13717 |
+
"step": 11325
|
13718 |
+
},
|
13719 |
+
{
|
13720 |
+
"epoch": 0.46,
|
13721 |
+
"learning_rate": 1.1417000605712316e-06,
|
13722 |
+
"loss": 0.6831,
|
13723 |
+
"step": 11330
|
13724 |
+
},
|
13725 |
+
{
|
13726 |
+
"epoch": 0.46,
|
13727 |
+
"learning_rate": 1.1410620259229908e-06,
|
13728 |
+
"loss": 0.6632,
|
13729 |
+
"step": 11335
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 0.46,
|
13733 |
+
"learning_rate": 1.1404239326788115e-06,
|
13734 |
+
"loss": 0.6393,
|
13735 |
+
"step": 11340
|
13736 |
+
},
|
13737 |
+
{
|
13738 |
+
"epoch": 0.46,
|
13739 |
+
"learning_rate": 1.1397857811037512e-06,
|
13740 |
+
"loss": 0.6501,
|
13741 |
+
"step": 11345
|
13742 |
+
},
|
13743 |
+
{
|
13744 |
+
"epoch": 0.46,
|
13745 |
+
"learning_rate": 1.1391475714628932e-06,
|
13746 |
+
"loss": 0.6398,
|
13747 |
+
"step": 11350
|
13748 |
+
},
|
13749 |
+
{
|
13750 |
+
"epoch": 0.46,
|
13751 |
+
"learning_rate": 1.138509304021344e-06,
|
13752 |
+
"loss": 0.6784,
|
13753 |
+
"step": 11355
|
13754 |
+
},
|
13755 |
+
{
|
13756 |
+
"epoch": 0.46,
|
13757 |
+
"learning_rate": 1.1378709790442346e-06,
|
13758 |
+
"loss": 0.7065,
|
13759 |
+
"step": 11360
|
13760 |
+
},
|
13761 |
+
{
|
13762 |
+
"epoch": 0.46,
|
13763 |
+
"learning_rate": 1.1372325967967196e-06,
|
13764 |
+
"loss": 0.6189,
|
13765 |
+
"step": 11365
|
13766 |
+
},
|
13767 |
+
{
|
13768 |
+
"epoch": 0.46,
|
13769 |
+
"learning_rate": 1.1365941575439772e-06,
|
13770 |
+
"loss": 0.6652,
|
13771 |
+
"step": 11370
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 0.46,
|
13775 |
+
"learning_rate": 1.1359556615512099e-06,
|
13776 |
+
"loss": 0.6752,
|
13777 |
+
"step": 11375
|
13778 |
+
},
|
13779 |
+
{
|
13780 |
+
"epoch": 0.46,
|
13781 |
+
"learning_rate": 1.1353171090836427e-06,
|
13782 |
+
"loss": 0.6668,
|
13783 |
+
"step": 11380
|
13784 |
+
},
|
13785 |
+
{
|
13786 |
+
"epoch": 0.46,
|
13787 |
+
"learning_rate": 1.134678500406525e-06,
|
13788 |
+
"loss": 0.6587,
|
13789 |
+
"step": 11385
|
13790 |
+
},
|
13791 |
+
{
|
13792 |
+
"epoch": 0.46,
|
13793 |
+
"learning_rate": 1.13403983578513e-06,
|
13794 |
+
"loss": 0.6873,
|
13795 |
+
"step": 11390
|
13796 |
+
},
|
13797 |
+
{
|
13798 |
+
"epoch": 0.46,
|
13799 |
+
"learning_rate": 1.1334011154847527e-06,
|
13800 |
+
"loss": 0.6975,
|
13801 |
+
"step": 11395
|
13802 |
+
},
|
13803 |
+
{
|
13804 |
+
"epoch": 0.46,
|
13805 |
+
"learning_rate": 1.1327623397707122e-06,
|
13806 |
+
"loss": 0.6784,
|
13807 |
+
"step": 11400
|
13808 |
+
},
|
13809 |
+
{
|
13810 |
+
"epoch": 0.46,
|
13811 |
+
"learning_rate": 1.1321235089083502e-06,
|
13812 |
+
"loss": 0.6643,
|
13813 |
+
"step": 11405
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 0.46,
|
13817 |
+
"learning_rate": 1.1314846231630315e-06,
|
13818 |
+
"loss": 0.6754,
|
13819 |
+
"step": 11410
|
13820 |
+
},
|
13821 |
+
{
|
13822 |
+
"epoch": 0.46,
|
13823 |
+
"learning_rate": 1.1308456828001441e-06,
|
13824 |
+
"loss": 0.6689,
|
13825 |
+
"step": 11415
|
13826 |
+
},
|
13827 |
+
{
|
13828 |
+
"epoch": 0.46,
|
13829 |
+
"learning_rate": 1.1302066880850975e-06,
|
13830 |
+
"loss": 0.6594,
|
13831 |
+
"step": 11420
|
13832 |
+
},
|
13833 |
+
{
|
13834 |
+
"epoch": 0.46,
|
13835 |
+
"learning_rate": 1.1295676392833253e-06,
|
13836 |
+
"loss": 0.6416,
|
13837 |
+
"step": 11425
|
13838 |
+
},
|
13839 |
+
{
|
13840 |
+
"epoch": 0.46,
|
13841 |
+
"learning_rate": 1.1289285366602826e-06,
|
13842 |
+
"loss": 0.7223,
|
13843 |
+
"step": 11430
|
13844 |
+
},
|
13845 |
+
{
|
13846 |
+
"epoch": 0.46,
|
13847 |
+
"learning_rate": 1.1282893804814468e-06,
|
13848 |
+
"loss": 0.6944,
|
13849 |
+
"step": 11435
|
13850 |
+
},
|
13851 |
+
{
|
13852 |
+
"epoch": 0.46,
|
13853 |
+
"learning_rate": 1.127650171012318e-06,
|
13854 |
+
"loss": 0.6598,
|
13855 |
+
"step": 11440
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 0.46,
|
13859 |
+
"learning_rate": 1.1270109085184182e-06,
|
13860 |
+
"loss": 0.7102,
|
13861 |
+
"step": 11445
|
13862 |
+
},
|
13863 |
+
{
|
13864 |
+
"epoch": 0.47,
|
13865 |
+
"learning_rate": 1.1263715932652919e-06,
|
13866 |
+
"loss": 0.6803,
|
13867 |
+
"step": 11450
|
13868 |
+
},
|
13869 |
+
{
|
13870 |
+
"epoch": 0.47,
|
13871 |
+
"learning_rate": 1.1257322255185044e-06,
|
13872 |
+
"loss": 0.65,
|
13873 |
+
"step": 11455
|
13874 |
+
},
|
13875 |
+
{
|
13876 |
+
"epoch": 0.47,
|
13877 |
+
"learning_rate": 1.1250928055436443e-06,
|
13878 |
+
"loss": 0.7018,
|
13879 |
+
"step": 11460
|
13880 |
+
},
|
13881 |
+
{
|
13882 |
+
"epoch": 0.47,
|
13883 |
+
"learning_rate": 1.12445333360632e-06,
|
13884 |
+
"loss": 0.6409,
|
13885 |
+
"step": 11465
|
13886 |
+
},
|
13887 |
+
{
|
13888 |
+
"epoch": 0.47,
|
13889 |
+
"learning_rate": 1.1238138099721634e-06,
|
13890 |
+
"loss": 0.6724,
|
13891 |
+
"step": 11470
|
13892 |
+
},
|
13893 |
+
{
|
13894 |
+
"epoch": 0.47,
|
13895 |
+
"learning_rate": 1.1231742349068271e-06,
|
13896 |
+
"loss": 0.6854,
|
13897 |
+
"step": 11475
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 0.47,
|
13901 |
+
"learning_rate": 1.1225346086759846e-06,
|
13902 |
+
"loss": 0.7102,
|
13903 |
+
"step": 11480
|
13904 |
+
},
|
13905 |
+
{
|
13906 |
+
"epoch": 0.47,
|
13907 |
+
"learning_rate": 1.1218949315453314e-06,
|
13908 |
+
"loss": 0.6584,
|
13909 |
+
"step": 11485
|
13910 |
+
},
|
13911 |
+
{
|
13912 |
+
"epoch": 0.47,
|
13913 |
+
"learning_rate": 1.1212552037805836e-06,
|
13914 |
+
"loss": 0.6584,
|
13915 |
+
"step": 11490
|
13916 |
+
},
|
13917 |
+
{
|
13918 |
+
"epoch": 0.47,
|
13919 |
+
"learning_rate": 1.1206154256474786e-06,
|
13920 |
+
"loss": 0.6846,
|
13921 |
+
"step": 11495
|
13922 |
+
},
|
13923 |
+
{
|
13924 |
+
"epoch": 0.47,
|
13925 |
+
"learning_rate": 1.119975597411775e-06,
|
13926 |
+
"loss": 0.7037,
|
13927 |
+
"step": 11500
|
13928 |
+
},
|
13929 |
+
{
|
13930 |
+
"epoch": 0.47,
|
13931 |
+
"learning_rate": 1.1193357193392512e-06,
|
13932 |
+
"loss": 0.6689,
|
13933 |
+
"step": 11505
|
13934 |
+
},
|
13935 |
+
{
|
13936 |
+
"epoch": 0.47,
|
13937 |
+
"learning_rate": 1.1186957916957078e-06,
|
13938 |
+
"loss": 0.6556,
|
13939 |
+
"step": 11510
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 0.47,
|
13943 |
+
"learning_rate": 1.1180558147469642e-06,
|
13944 |
+
"loss": 0.6557,
|
13945 |
+
"step": 11515
|
13946 |
+
},
|
13947 |
+
{
|
13948 |
+
"epoch": 0.47,
|
13949 |
+
"learning_rate": 1.1174157887588623e-06,
|
13950 |
+
"loss": 0.6662,
|
13951 |
+
"step": 11520
|
13952 |
+
},
|
13953 |
+
{
|
13954 |
+
"epoch": 0.47,
|
13955 |
+
"learning_rate": 1.1167757139972626e-06,
|
13956 |
+
"loss": 0.6702,
|
13957 |
+
"step": 11525
|
13958 |
+
},
|
13959 |
+
{
|
13960 |
+
"epoch": 0.47,
|
13961 |
+
"learning_rate": 1.116135590728047e-06,
|
13962 |
+
"loss": 0.6682,
|
13963 |
+
"step": 11530
|
13964 |
+
},
|
13965 |
+
{
|
13966 |
+
"epoch": 0.47,
|
13967 |
+
"learning_rate": 1.115495419217117e-06,
|
13968 |
+
"loss": 0.6855,
|
13969 |
+
"step": 11535
|
13970 |
+
},
|
13971 |
+
{
|
13972 |
+
"epoch": 0.47,
|
13973 |
+
"learning_rate": 1.114855199730394e-06,
|
13974 |
+
"loss": 0.6796,
|
13975 |
+
"step": 11540
|
13976 |
+
},
|
13977 |
+
{
|
13978 |
+
"epoch": 0.47,
|
13979 |
+
"learning_rate": 1.1142149325338199e-06,
|
13980 |
+
"loss": 0.6481,
|
13981 |
+
"step": 11545
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 0.47,
|
13985 |
+
"learning_rate": 1.1135746178933563e-06,
|
13986 |
+
"loss": 0.7167,
|
13987 |
+
"step": 11550
|
13988 |
+
},
|
13989 |
+
{
|
13990 |
+
"epoch": 0.47,
|
13991 |
+
"learning_rate": 1.112934256074984e-06,
|
13992 |
+
"loss": 0.6602,
|
13993 |
+
"step": 11555
|
13994 |
+
},
|
13995 |
+
{
|
13996 |
+
"epoch": 0.47,
|
13997 |
+
"learning_rate": 1.1122938473447038e-06,
|
13998 |
+
"loss": 0.6848,
|
13999 |
+
"step": 11560
|
14000 |
+
},
|
14001 |
+
{
|
14002 |
+
"epoch": 0.47,
|
14003 |
+
"learning_rate": 1.1116533919685361e-06,
|
14004 |
+
"loss": 0.6628,
|
14005 |
+
"step": 11565
|
14006 |
+
},
|
14007 |
+
{
|
14008 |
+
"epoch": 0.47,
|
14009 |
+
"learning_rate": 1.1110128902125201e-06,
|
14010 |
+
"loss": 0.6826,
|
14011 |
+
"step": 11570
|
14012 |
+
},
|
14013 |
+
{
|
14014 |
+
"epoch": 0.47,
|
14015 |
+
"learning_rate": 1.1103723423427153e-06,
|
14016 |
+
"loss": 0.6527,
|
14017 |
+
"step": 11575
|
14018 |
+
},
|
14019 |
+
{
|
14020 |
+
"epoch": 0.47,
|
14021 |
+
"learning_rate": 1.1097317486251992e-06,
|
14022 |
+
"loss": 0.6737,
|
14023 |
+
"step": 11580
|
14024 |
+
},
|
14025 |
+
{
|
14026 |
+
"epoch": 0.47,
|
14027 |
+
"learning_rate": 1.109091109326069e-06,
|
14028 |
+
"loss": 0.6854,
|
14029 |
+
"step": 11585
|
14030 |
+
},
|
14031 |
+
{
|
14032 |
+
"epoch": 0.47,
|
14033 |
+
"learning_rate": 1.1084504247114406e-06,
|
14034 |
+
"loss": 0.7145,
|
14035 |
+
"step": 11590
|
14036 |
+
},
|
14037 |
+
{
|
14038 |
+
"epoch": 0.47,
|
14039 |
+
"learning_rate": 1.107809695047449e-06,
|
14040 |
+
"loss": 0.6756,
|
14041 |
+
"step": 11595
|
14042 |
+
},
|
14043 |
+
{
|
14044 |
+
"epoch": 0.47,
|
14045 |
+
"learning_rate": 1.1071689206002474e-06,
|
14046 |
+
"loss": 0.6725,
|
14047 |
+
"step": 11600
|
14048 |
+
},
|
14049 |
+
{
|
14050 |
+
"epoch": 0.47,
|
14051 |
+
"learning_rate": 1.1065281016360083e-06,
|
14052 |
+
"loss": 0.7145,
|
14053 |
+
"step": 11605
|
14054 |
+
},
|
14055 |
+
{
|
14056 |
+
"epoch": 0.47,
|
14057 |
+
"learning_rate": 1.1058872384209224e-06,
|
14058 |
+
"loss": 0.6899,
|
14059 |
+
"step": 11610
|
14060 |
+
},
|
14061 |
+
{
|
14062 |
+
"epoch": 0.47,
|
14063 |
+
"learning_rate": 1.1052463312211983e-06,
|
14064 |
+
"loss": 0.6344,
|
14065 |
+
"step": 11615
|
14066 |
+
},
|
14067 |
+
{
|
14068 |
+
"epoch": 0.47,
|
14069 |
+
"learning_rate": 1.1046053803030637e-06,
|
14070 |
+
"loss": 0.654,
|
14071 |
+
"step": 11620
|
14072 |
+
},
|
14073 |
+
{
|
14074 |
+
"epoch": 0.47,
|
14075 |
+
"learning_rate": 1.1039643859327635e-06,
|
14076 |
+
"loss": 0.6741,
|
14077 |
+
"step": 11625
|
14078 |
+
},
|
14079 |
+
{
|
14080 |
+
"epoch": 0.47,
|
14081 |
+
"learning_rate": 1.1033233483765615e-06,
|
14082 |
+
"loss": 0.6814,
|
14083 |
+
"step": 11630
|
14084 |
+
},
|
14085 |
+
{
|
14086 |
+
"epoch": 0.47,
|
14087 |
+
"learning_rate": 1.1026822679007395e-06,
|
14088 |
+
"loss": 0.6565,
|
14089 |
+
"step": 11635
|
14090 |
+
},
|
14091 |
+
{
|
14092 |
+
"epoch": 0.47,
|
14093 |
+
"learning_rate": 1.1020411447715961e-06,
|
14094 |
+
"loss": 0.6916,
|
14095 |
+
"step": 11640
|
14096 |
+
},
|
14097 |
+
{
|
14098 |
+
"epoch": 0.47,
|
14099 |
+
"learning_rate": 1.1013999792554486e-06,
|
14100 |
+
"loss": 0.6894,
|
14101 |
+
"step": 11645
|
14102 |
+
},
|
14103 |
+
{
|
14104 |
+
"epoch": 0.47,
|
14105 |
+
"learning_rate": 1.1007587716186317e-06,
|
14106 |
+
"loss": 0.6698,
|
14107 |
+
"step": 11650
|
14108 |
+
},
|
14109 |
+
{
|
14110 |
+
"epoch": 0.47,
|
14111 |
+
"learning_rate": 1.1001175221274968e-06,
|
14112 |
+
"loss": 0.7096,
|
14113 |
+
"step": 11655
|
14114 |
+
},
|
14115 |
+
{
|
14116 |
+
"epoch": 0.47,
|
14117 |
+
"learning_rate": 1.0994762310484142e-06,
|
14118 |
+
"loss": 0.6887,
|
14119 |
+
"step": 11660
|
14120 |
+
},
|
14121 |
+
{
|
14122 |
+
"epoch": 0.47,
|
14123 |
+
"learning_rate": 1.0988348986477705e-06,
|
14124 |
+
"loss": 0.671,
|
14125 |
+
"step": 11665
|
14126 |
+
},
|
14127 |
+
{
|
14128 |
+
"epoch": 0.47,
|
14129 |
+
"learning_rate": 1.0981935251919693e-06,
|
14130 |
+
"loss": 0.6727,
|
14131 |
+
"step": 11670
|
14132 |
+
},
|
14133 |
+
{
|
14134 |
+
"epoch": 0.47,
|
14135 |
+
"learning_rate": 1.0975521109474318e-06,
|
14136 |
+
"loss": 0.6777,
|
14137 |
+
"step": 11675
|
14138 |
+
},
|
14139 |
+
{
|
14140 |
+
"epoch": 0.47,
|
14141 |
+
"learning_rate": 1.0969106561805952e-06,
|
14142 |
+
"loss": 0.6661,
|
14143 |
+
"step": 11680
|
14144 |
+
},
|
14145 |
+
{
|
14146 |
+
"epoch": 0.47,
|
14147 |
+
"learning_rate": 1.0962691611579154e-06,
|
14148 |
+
"loss": 0.6576,
|
14149 |
+
"step": 11685
|
14150 |
+
},
|
14151 |
+
{
|
14152 |
+
"epoch": 0.47,
|
14153 |
+
"learning_rate": 1.0956276261458629e-06,
|
14154 |
+
"loss": 0.6415,
|
14155 |
+
"step": 11690
|
14156 |
+
},
|
14157 |
+
{
|
14158 |
+
"epoch": 0.48,
|
14159 |
+
"learning_rate": 1.0949860514109264e-06,
|
14160 |
+
"loss": 0.6485,
|
14161 |
+
"step": 11695
|
14162 |
+
},
|
14163 |
+
{
|
14164 |
+
"epoch": 0.48,
|
14165 |
+
"learning_rate": 1.09434443721961e-06,
|
14166 |
+
"loss": 0.6966,
|
14167 |
+
"step": 11700
|
14168 |
+
},
|
14169 |
+
{
|
14170 |
+
"epoch": 0.48,
|
14171 |
+
"learning_rate": 1.0937027838384345e-06,
|
14172 |
+
"loss": 0.6746,
|
14173 |
+
"step": 11705
|
14174 |
+
},
|
14175 |
+
{
|
14176 |
+
"epoch": 0.48,
|
14177 |
+
"learning_rate": 1.093061091533938e-06,
|
14178 |
+
"loss": 0.6565,
|
14179 |
+
"step": 11710
|
14180 |
+
},
|
14181 |
+
{
|
14182 |
+
"epoch": 0.48,
|
14183 |
+
"learning_rate": 1.0924193605726733e-06,
|
14184 |
+
"loss": 0.7127,
|
14185 |
+
"step": 11715
|
14186 |
+
},
|
14187 |
+
{
|
14188 |
+
"epoch": 0.48,
|
14189 |
+
"learning_rate": 1.0917775912212099e-06,
|
14190 |
+
"loss": 0.6738,
|
14191 |
+
"step": 11720
|
14192 |
+
},
|
14193 |
+
{
|
14194 |
+
"epoch": 0.48,
|
14195 |
+
"learning_rate": 1.0911357837461332e-06,
|
14196 |
+
"loss": 0.6712,
|
14197 |
+
"step": 11725
|
14198 |
+
},
|
14199 |
+
{
|
14200 |
+
"epoch": 0.48,
|
14201 |
+
"learning_rate": 1.0904939384140445e-06,
|
14202 |
+
"loss": 0.6617,
|
14203 |
+
"step": 11730
|
14204 |
+
},
|
14205 |
+
{
|
14206 |
+
"epoch": 0.48,
|
14207 |
+
"learning_rate": 1.0898520554915607e-06,
|
14208 |
+
"loss": 0.6919,
|
14209 |
+
"step": 11735
|
14210 |
+
},
|
14211 |
+
{
|
14212 |
+
"epoch": 0.48,
|
14213 |
+
"learning_rate": 1.0892101352453142e-06,
|
14214 |
+
"loss": 0.6731,
|
14215 |
+
"step": 11740
|
14216 |
+
},
|
14217 |
+
{
|
14218 |
+
"epoch": 0.48,
|
14219 |
+
"learning_rate": 1.0885681779419537e-06,
|
14220 |
+
"loss": 0.6931,
|
14221 |
+
"step": 11745
|
14222 |
+
},
|
14223 |
+
{
|
14224 |
+
"epoch": 0.48,
|
14225 |
+
"learning_rate": 1.0879261838481426e-06,
|
14226 |
+
"loss": 0.631,
|
14227 |
+
"step": 11750
|
14228 |
+
},
|
14229 |
+
{
|
14230 |
+
"epoch": 0.48,
|
14231 |
+
"learning_rate": 1.0872841532305587e-06,
|
14232 |
+
"loss": 0.6515,
|
14233 |
+
"step": 11755
|
14234 |
+
},
|
14235 |
+
{
|
14236 |
+
"epoch": 0.48,
|
14237 |
+
"learning_rate": 1.0866420863558969e-06,
|
14238 |
+
"loss": 0.6674,
|
14239 |
+
"step": 11760
|
14240 |
+
},
|
14241 |
+
{
|
14242 |
+
"epoch": 0.48,
|
14243 |
+
"learning_rate": 1.0859999834908657e-06,
|
14244 |
+
"loss": 0.6514,
|
14245 |
+
"step": 11765
|
14246 |
+
},
|
14247 |
+
{
|
14248 |
+
"epoch": 0.48,
|
14249 |
+
"learning_rate": 1.0853578449021896e-06,
|
14250 |
+
"loss": 0.6393,
|
14251 |
+
"step": 11770
|
14252 |
+
},
|
14253 |
+
{
|
14254 |
+
"epoch": 0.48,
|
14255 |
+
"learning_rate": 1.084715670856607e-06,
|
14256 |
+
"loss": 0.6941,
|
14257 |
+
"step": 11775
|
14258 |
+
},
|
14259 |
+
{
|
14260 |
+
"epoch": 0.48,
|
14261 |
+
"learning_rate": 1.0840734616208712e-06,
|
14262 |
+
"loss": 0.664,
|
14263 |
+
"step": 11780
|
14264 |
+
},
|
14265 |
+
{
|
14266 |
+
"epoch": 0.48,
|
14267 |
+
"learning_rate": 1.0834312174617508e-06,
|
14268 |
+
"loss": 0.6636,
|
14269 |
+
"step": 11785
|
14270 |
+
},
|
14271 |
+
{
|
14272 |
+
"epoch": 0.48,
|
14273 |
+
"learning_rate": 1.0827889386460281e-06,
|
14274 |
+
"loss": 0.6756,
|
14275 |
+
"step": 11790
|
14276 |
+
},
|
14277 |
+
{
|
14278 |
+
"epoch": 0.48,
|
14279 |
+
"learning_rate": 1.0821466254405004e-06,
|
14280 |
+
"loss": 0.7116,
|
14281 |
+
"step": 11795
|
14282 |
+
},
|
14283 |
+
{
|
14284 |
+
"epoch": 0.48,
|
14285 |
+
"learning_rate": 1.0815042781119788e-06,
|
14286 |
+
"loss": 0.6647,
|
14287 |
+
"step": 11800
|
14288 |
+
},
|
14289 |
+
{
|
14290 |
+
"epoch": 0.48,
|
14291 |
+
"learning_rate": 1.0808618969272888e-06,
|
14292 |
+
"loss": 0.6404,
|
14293 |
+
"step": 11805
|
14294 |
+
},
|
14295 |
+
{
|
14296 |
+
"epoch": 0.48,
|
14297 |
+
"learning_rate": 1.0802194821532702e-06,
|
14298 |
+
"loss": 0.6711,
|
14299 |
+
"step": 11810
|
14300 |
+
},
|
14301 |
+
{
|
14302 |
+
"epoch": 0.48,
|
14303 |
+
"learning_rate": 1.079577034056776e-06,
|
14304 |
+
"loss": 0.6717,
|
14305 |
+
"step": 11815
|
14306 |
+
},
|
14307 |
+
{
|
14308 |
+
"epoch": 0.48,
|
14309 |
+
"learning_rate": 1.078934552904674e-06,
|
14310 |
+
"loss": 0.6705,
|
14311 |
+
"step": 11820
|
14312 |
+
},
|
14313 |
+
{
|
14314 |
+
"epoch": 0.48,
|
14315 |
+
"learning_rate": 1.0782920389638452e-06,
|
14316 |
+
"loss": 0.6713,
|
14317 |
+
"step": 11825
|
14318 |
+
},
|
14319 |
+
{
|
14320 |
+
"epoch": 0.48,
|
14321 |
+
"learning_rate": 1.0776494925011846e-06,
|
14322 |
+
"loss": 0.6474,
|
14323 |
+
"step": 11830
|
14324 |
+
},
|
14325 |
+
{
|
14326 |
+
"epoch": 0.48,
|
14327 |
+
"learning_rate": 1.0770069137836e-06,
|
14328 |
+
"loss": 0.6745,
|
14329 |
+
"step": 11835
|
14330 |
+
},
|
14331 |
+
{
|
14332 |
+
"epoch": 0.48,
|
14333 |
+
"learning_rate": 1.0763643030780126e-06,
|
14334 |
+
"loss": 0.6773,
|
14335 |
+
"step": 11840
|
14336 |
+
},
|
14337 |
+
{
|
14338 |
+
"epoch": 0.48,
|
14339 |
+
"learning_rate": 1.075721660651358e-06,
|
14340 |
+
"loss": 0.6767,
|
14341 |
+
"step": 11845
|
14342 |
+
},
|
14343 |
+
{
|
14344 |
+
"epoch": 0.48,
|
14345 |
+
"learning_rate": 1.0750789867705843e-06,
|
14346 |
+
"loss": 0.6758,
|
14347 |
+
"step": 11850
|
14348 |
+
},
|
14349 |
+
{
|
14350 |
+
"epoch": 0.48,
|
14351 |
+
"learning_rate": 1.0744362817026524e-06,
|
14352 |
+
"loss": 0.686,
|
14353 |
+
"step": 11855
|
14354 |
+
},
|
14355 |
+
{
|
14356 |
+
"epoch": 0.48,
|
14357 |
+
"learning_rate": 1.0737935457145364e-06,
|
14358 |
+
"loss": 0.6736,
|
14359 |
+
"step": 11860
|
14360 |
+
},
|
14361 |
+
{
|
14362 |
+
"epoch": 0.48,
|
14363 |
+
"learning_rate": 1.073150779073223e-06,
|
14364 |
+
"loss": 0.668,
|
14365 |
+
"step": 11865
|
14366 |
+
},
|
14367 |
+
{
|
14368 |
+
"epoch": 0.48,
|
14369 |
+
"learning_rate": 1.0725079820457123e-06,
|
14370 |
+
"loss": 0.7003,
|
14371 |
+
"step": 11870
|
14372 |
+
},
|
14373 |
+
{
|
14374 |
+
"epoch": 0.48,
|
14375 |
+
"learning_rate": 1.0718651548990163e-06,
|
14376 |
+
"loss": 0.682,
|
14377 |
+
"step": 11875
|
14378 |
+
},
|
14379 |
+
{
|
14380 |
+
"epoch": 0.48,
|
14381 |
+
"learning_rate": 1.0712222979001602e-06,
|
14382 |
+
"loss": 0.6445,
|
14383 |
+
"step": 11880
|
14384 |
+
},
|
14385 |
+
{
|
14386 |
+
"epoch": 0.48,
|
14387 |
+
"learning_rate": 1.0705794113161808e-06,
|
14388 |
+
"loss": 0.6872,
|
14389 |
+
"step": 11885
|
14390 |
+
},
|
14391 |
+
{
|
14392 |
+
"epoch": 0.48,
|
14393 |
+
"learning_rate": 1.0699364954141276e-06,
|
14394 |
+
"loss": 0.6936,
|
14395 |
+
"step": 11890
|
14396 |
+
},
|
14397 |
+
{
|
14398 |
+
"epoch": 0.48,
|
14399 |
+
"learning_rate": 1.0692935504610625e-06,
|
14400 |
+
"loss": 0.6195,
|
14401 |
+
"step": 11895
|
14402 |
+
},
|
14403 |
+
{
|
14404 |
+
"epoch": 0.48,
|
14405 |
+
"learning_rate": 1.068650576724059e-06,
|
14406 |
+
"loss": 0.6658,
|
14407 |
+
"step": 11900
|
14408 |
+
},
|
14409 |
+
{
|
14410 |
+
"epoch": 0.48,
|
14411 |
+
"learning_rate": 1.0680075744702034e-06,
|
14412 |
+
"loss": 0.6799,
|
14413 |
+
"step": 11905
|
14414 |
+
},
|
14415 |
+
{
|
14416 |
+
"epoch": 0.48,
|
14417 |
+
"learning_rate": 1.0673645439665925e-06,
|
14418 |
+
"loss": 0.6823,
|
14419 |
+
"step": 11910
|
14420 |
+
},
|
14421 |
+
{
|
14422 |
+
"epoch": 0.48,
|
14423 |
+
"learning_rate": 1.0667214854803357e-06,
|
14424 |
+
"loss": 0.6677,
|
14425 |
+
"step": 11915
|
14426 |
+
},
|
14427 |
+
{
|
14428 |
+
"epoch": 0.48,
|
14429 |
+
"learning_rate": 1.0660783992785541e-06,
|
14430 |
+
"loss": 0.6648,
|
14431 |
+
"step": 11920
|
14432 |
+
},
|
14433 |
+
{
|
14434 |
+
"epoch": 0.48,
|
14435 |
+
"learning_rate": 1.06543528562838e-06,
|
14436 |
+
"loss": 0.6313,
|
14437 |
+
"step": 11925
|
14438 |
+
},
|
14439 |
+
{
|
14440 |
+
"epoch": 0.48,
|
14441 |
+
"learning_rate": 1.0647921447969577e-06,
|
14442 |
+
"loss": 0.667,
|
14443 |
+
"step": 11930
|
14444 |
+
},
|
14445 |
+
{
|
14446 |
+
"epoch": 0.48,
|
14447 |
+
"learning_rate": 1.0641489770514418e-06,
|
14448 |
+
"loss": 0.6567,
|
14449 |
+
"step": 11935
|
14450 |
+
},
|
14451 |
+
{
|
14452 |
+
"epoch": 0.48,
|
14453 |
+
"learning_rate": 1.0635057826589987e-06,
|
14454 |
+
"loss": 0.6727,
|
14455 |
+
"step": 11940
|
14456 |
+
},
|
14457 |
+
{
|
14458 |
+
"epoch": 0.49,
|
14459 |
+
"learning_rate": 1.0628625618868056e-06,
|
14460 |
+
"loss": 0.6835,
|
14461 |
+
"step": 11945
|
14462 |
+
},
|
14463 |
+
{
|
14464 |
+
"epoch": 0.49,
|
14465 |
+
"learning_rate": 1.062219315002051e-06,
|
14466 |
+
"loss": 0.6329,
|
14467 |
+
"step": 11950
|
14468 |
+
},
|
14469 |
+
{
|
14470 |
+
"epoch": 0.49,
|
14471 |
+
"learning_rate": 1.061576042271934e-06,
|
14472 |
+
"loss": 0.6823,
|
14473 |
+
"step": 11955
|
14474 |
+
},
|
14475 |
+
{
|
14476 |
+
"epoch": 0.49,
|
14477 |
+
"learning_rate": 1.0609327439636647e-06,
|
14478 |
+
"loss": 0.6514,
|
14479 |
+
"step": 11960
|
14480 |
+
},
|
14481 |
+
{
|
14482 |
+
"epoch": 0.49,
|
14483 |
+
"learning_rate": 1.0602894203444633e-06,
|
14484 |
+
"loss": 0.716,
|
14485 |
+
"step": 11965
|
14486 |
+
},
|
14487 |
+
{
|
14488 |
+
"epoch": 0.49,
|
14489 |
+
"learning_rate": 1.0596460716815612e-06,
|
14490 |
+
"loss": 0.6778,
|
14491 |
+
"step": 11970
|
14492 |
+
},
|
14493 |
+
{
|
14494 |
+
"epoch": 0.49,
|
14495 |
+
"learning_rate": 1.059002698242199e-06,
|
14496 |
+
"loss": 0.6671,
|
14497 |
+
"step": 11975
|
14498 |
+
},
|
14499 |
+
{
|
14500 |
+
"epoch": 0.49,
|
14501 |
+
"learning_rate": 1.0583593002936298e-06,
|
14502 |
+
"loss": 0.6936,
|
14503 |
+
"step": 11980
|
14504 |
+
},
|
14505 |
+
{
|
14506 |
+
"epoch": 0.49,
|
14507 |
+
"learning_rate": 1.0577158781031147e-06,
|
14508 |
+
"loss": 0.7211,
|
14509 |
+
"step": 11985
|
14510 |
+
},
|
14511 |
+
{
|
14512 |
+
"epoch": 0.49,
|
14513 |
+
"learning_rate": 1.0570724319379254e-06,
|
14514 |
+
"loss": 0.6667,
|
14515 |
+
"step": 11990
|
14516 |
+
},
|
14517 |
+
{
|
14518 |
+
"epoch": 0.49,
|
14519 |
+
"learning_rate": 1.0564289620653446e-06,
|
14520 |
+
"loss": 0.7181,
|
14521 |
+
"step": 11995
|
14522 |
+
},
|
14523 |
+
{
|
14524 |
+
"epoch": 0.49,
|
14525 |
+
"learning_rate": 1.0557854687526632e-06,
|
14526 |
+
"loss": 0.6431,
|
14527 |
+
"step": 12000
|
14528 |
+
},
|
14529 |
+
{
|
14530 |
+
"epoch": 0.49,
|
14531 |
+
"eval_loss": 0.6379530429840088,
|
14532 |
+
"eval_runtime": 144.8913,
|
14533 |
+
"eval_samples_per_second": 16.329,
|
14534 |
+
"eval_steps_per_second": 2.726,
|
14535 |
+
"step": 12000
|
14536 |
}
|
14537 |
],
|
14538 |
"logging_steps": 5,
|
|
|
14540 |
"num_input_tokens_seen": 0,
|
14541 |
"num_train_epochs": 1,
|
14542 |
"save_steps": 400,
|
14543 |
+
"total_flos": 1676879253282816.0,
|
14544 |
"trial_name": null,
|
14545 |
"trial_params": null
|
14546 |
}
|