|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.2053878141384922, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 1.9456, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3010299956639812e-05, |
|
"loss": 1.6437, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4771212547196626e-05, |
|
"loss": 1.6438, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6020599913279625e-05, |
|
"loss": 1.493, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6989700043360187e-05, |
|
"loss": 1.5275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7781512503836436e-05, |
|
"loss": 1.4028, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.845098040014257e-05, |
|
"loss": 1.4608, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9030899869919437e-05, |
|
"loss": 1.2957, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.954242509439325e-05, |
|
"loss": 1.3968, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3591, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0413926851582255e-05, |
|
"loss": 1.3462, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0791812460476247e-05, |
|
"loss": 1.3115, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1139433523068368e-05, |
|
"loss": 1.2937, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1461280356782382e-05, |
|
"loss": 1.2266, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1760912590556812e-05, |
|
"loss": 1.1994, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.204119982655925e-05, |
|
"loss": 1.2548, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.2304489213782743e-05, |
|
"loss": 1.1843, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.255272505103306e-05, |
|
"loss": 1.2799, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.278753600952829e-05, |
|
"loss": 1.167, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.301029995663981e-05, |
|
"loss": 1.129, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3222192947339193e-05, |
|
"loss": 1.2135, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3424226808222067e-05, |
|
"loss": 1.2332, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.361727836017593e-05, |
|
"loss": 1.1461, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.380211241711606e-05, |
|
"loss": 1.1849, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3979400086720374e-05, |
|
"loss": 1.2342, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.414973347970818e-05, |
|
"loss": 1.2376, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.4313637641589878e-05, |
|
"loss": 1.1966, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.4471580313422194e-05, |
|
"loss": 1.2426, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.462397997898956e-05, |
|
"loss": 1.1851, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.4771212547196627e-05, |
|
"loss": 1.1906, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.491361693834273e-05, |
|
"loss": 1.1677, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.505149978319906e-05, |
|
"loss": 1.2344, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5185139398778874e-05, |
|
"loss": 1.2005, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5314789170422555e-05, |
|
"loss": 1.2777, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5440680443502755e-05, |
|
"loss": 1.2101, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5563025007672876e-05, |
|
"loss": 1.1983, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.568201724066995e-05, |
|
"loss": 1.2566, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5797835966168102e-05, |
|
"loss": 1.1816, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5910646070264996e-05, |
|
"loss": 1.2346, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.6020599913279625e-05, |
|
"loss": 1.2128, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.612783856719736e-05, |
|
"loss": 1.1537, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.6232492903979004e-05, |
|
"loss": 1.1787, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.6334684555795868e-05, |
|
"loss": 1.1315, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.6434526764861875e-05, |
|
"loss": 1.2758, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.653212513775344e-05, |
|
"loss": 1.1824, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.6627578316815746e-05, |
|
"loss": 1.193, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.6720978579357177e-05, |
|
"loss": 1.2263, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.6812412373755874e-05, |
|
"loss": 1.1467, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.690196080028514e-05, |
|
"loss": 1.1846, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.698970004336019e-05, |
|
"loss": 1.1736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_get_denotation_accuracy": 0.4383124846701006, |
|
"eval_loss": 0.9204946160316467, |
|
"eval_runtime": 236.4251, |
|
"eval_samples_per_second": 17.244, |
|
"eval_steps_per_second": 1.079, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7075701760979362e-05, |
|
"loss": 1.1186, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7160033436347994e-05, |
|
"loss": 1.0984, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7242758696007892e-05, |
|
"loss": 1.175, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.732393759822969e-05, |
|
"loss": 1.0451, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7403626894942436e-05, |
|
"loss": 1.1391, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7481880270062005e-05, |
|
"loss": 1.2073, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.755874855672492e-05, |
|
"loss": 1.091, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7634279935629375e-05, |
|
"loss": 1.19, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7708520116421442e-05, |
|
"loss": 1.0771, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7781512503836438e-05, |
|
"loss": 1.1513, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7853298350107673e-05, |
|
"loss": 1.0998, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7923916894982543e-05, |
|
"loss": 1.2006, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.799340549453582e-05, |
|
"loss": 1.1671, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8061799739838875e-05, |
|
"loss": 1.0936, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.812913356642856e-05, |
|
"loss": 1.1308, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.819543935541869e-05, |
|
"loss": 1.1227, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8260748027008266e-05, |
|
"loss": 1.1403, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8325089127062363e-05, |
|
"loss": 1.1926, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8388490907372553e-05, |
|
"loss": 1.1912, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8450980400142567e-05, |
|
"loss": 1.1237, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8512583487190755e-05, |
|
"loss": 1.0667, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8573324964312687e-05, |
|
"loss": 1.1059, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.863322860120456e-05, |
|
"loss": 1.0878, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8692317197309762e-05, |
|
"loss": 1.0756, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8750612633917003e-05, |
|
"loss": 1.082, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8808135922807917e-05, |
|
"loss": 1.0643, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.886490725172482e-05, |
|
"loss": 1.1299, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8920946026904808e-05, |
|
"loss": 1.1062, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8976270912904416e-05, |
|
"loss": 1.1176, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9030899869919436e-05, |
|
"loss": 1.1123, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9084850188786496e-05, |
|
"loss": 1.2128, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9138138523837167e-05, |
|
"loss": 1.1562, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.919078092376074e-05, |
|
"loss": 1.1277, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.924279286061882e-05, |
|
"loss": 1.0851, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.929418925714293e-05, |
|
"loss": 1.0564, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.934498451243568e-05, |
|
"loss": 1.1252, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.939519252618619e-05, |
|
"loss": 1.0855, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9444826721501686e-05, |
|
"loss": 1.1313, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9493900066449127e-05, |
|
"loss": 1.1188, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9542425094393252e-05, |
|
"loss": 1.1184, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9590413923210936e-05, |
|
"loss": 1.1324, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9637878273455557e-05, |
|
"loss": 1.0961, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.968482948553935e-05, |
|
"loss": 1.1489, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.973127853599699e-05, |
|
"loss": 1.1094, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.977723605288848e-05, |
|
"loss": 1.0903, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9822712330395685e-05, |
|
"loss": 1.0927, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9867717342662454e-05, |
|
"loss": 1.0697, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.991226075692495e-05, |
|
"loss": 1.1705, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9956351945975502e-05, |
|
"loss": 1.0569, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3e-05, |
|
"loss": 1.08, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_get_denotation_accuracy": 0.4383124846701006, |
|
"eval_loss": 0.9220618009567261, |
|
"eval_runtime": 267.6287, |
|
"eval_samples_per_second": 15.234, |
|
"eval_steps_per_second": 0.953, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9985789473684212e-05, |
|
"loss": 1.0803, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.997e-05, |
|
"loss": 1.0548, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.995421052631579e-05, |
|
"loss": 1.0935, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9938421052631577e-05, |
|
"loss": 1.1247, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.992263157894737e-05, |
|
"loss": 1.0353, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.990684210526316e-05, |
|
"loss": 1.1308, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9891052631578948e-05, |
|
"loss": 1.0504, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9875263157894738e-05, |
|
"loss": 1.123, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.985947368421053e-05, |
|
"loss": 1.1322, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.984368421052632e-05, |
|
"loss": 0.9887, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9827894736842106e-05, |
|
"loss": 1.1453, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9812105263157896e-05, |
|
"loss": 1.0818, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9796315789473683e-05, |
|
"loss": 1.0601, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9780526315789477e-05, |
|
"loss": 1.1313, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9764736842105264e-05, |
|
"loss": 1.1357, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9748947368421054e-05, |
|
"loss": 1.0524, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.973315789473684e-05, |
|
"loss": 1.0806, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.971736842105263e-05, |
|
"loss": 1.0605, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.970157894736842e-05, |
|
"loss": 1.1875, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9685789473684212e-05, |
|
"loss": 1.1273, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.967e-05, |
|
"loss": 1.1252, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.965421052631579e-05, |
|
"loss": 1.0654, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9638421052631583e-05, |
|
"loss": 1.0421, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.962263157894737e-05, |
|
"loss": 1.117, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.960684210526316e-05, |
|
"loss": 1.0849, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9591052631578947e-05, |
|
"loss": 1.1126, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9575263157894737e-05, |
|
"loss": 1.039, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9559473684210528e-05, |
|
"loss": 1.084, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9543684210526318e-05, |
|
"loss": 1.1559, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9527894736842105e-05, |
|
"loss": 1.1139, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9512105263157895e-05, |
|
"loss": 0.9822, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9496315789473682e-05, |
|
"loss": 1.0209, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9480526315789476e-05, |
|
"loss": 1.0462, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9464736842105263e-05, |
|
"loss": 1.1401, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9448947368421053e-05, |
|
"loss": 1.0874, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.943315789473684e-05, |
|
"loss": 1.1206, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9417368421052634e-05, |
|
"loss": 1.0679, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.940157894736842e-05, |
|
"loss": 1.101, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.938578947368421e-05, |
|
"loss": 0.9896, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9370000000000002e-05, |
|
"loss": 1.1539, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.935421052631579e-05, |
|
"loss": 1.0817, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9338421052631582e-05, |
|
"loss": 1.0821, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.932263157894737e-05, |
|
"loss": 1.1255, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.930684210526316e-05, |
|
"loss": 1.1148, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9291052631578947e-05, |
|
"loss": 1.0495, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9275263157894737e-05, |
|
"loss": 1.1169, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9259473684210527e-05, |
|
"loss": 1.1052, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9243684210526318e-05, |
|
"loss": 1.1417, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9227894736842105e-05, |
|
"loss": 0.9703, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9212105263157895e-05, |
|
"loss": 1.0687, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_get_denotation_accuracy": 0.4432180524895757, |
|
"eval_loss": 0.8963027000427246, |
|
"eval_runtime": 270.68, |
|
"eval_samples_per_second": 15.062, |
|
"eval_steps_per_second": 0.942, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9196315789473685e-05, |
|
"loss": 1.0476, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9180526315789476e-05, |
|
"loss": 1.1054, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9164736842105263e-05, |
|
"loss": 1.0612, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9148947368421053e-05, |
|
"loss": 1.0617, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9133157894736843e-05, |
|
"loss": 1.1465, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9117368421052634e-05, |
|
"loss": 1.0275, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9101578947368424e-05, |
|
"loss": 1.1222, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.908578947368421e-05, |
|
"loss": 1.0845, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.907e-05, |
|
"loss": 1.0929, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9054210526315788e-05, |
|
"loss": 1.1213, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9038421052631582e-05, |
|
"loss": 1.0569, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.902263157894737e-05, |
|
"loss": 1.0698, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.900684210526316e-05, |
|
"loss": 1.0309, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8991052631578946e-05, |
|
"loss": 1.0961, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.897526315789474e-05, |
|
"loss": 1.1316, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8959473684210527e-05, |
|
"loss": 1.073, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8943684210526317e-05, |
|
"loss": 1.077, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8927894736842104e-05, |
|
"loss": 1.0776, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8912105263157894e-05, |
|
"loss": 1.0831, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8896315789473685e-05, |
|
"loss": 1.0339, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8880526315789475e-05, |
|
"loss": 1.0182, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8864736842105265e-05, |
|
"loss": 1.1317, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8848947368421052e-05, |
|
"loss": 1.0502, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8833157894736843e-05, |
|
"loss": 1.0111, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8817368421052633e-05, |
|
"loss": 1.0059, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8801578947368423e-05, |
|
"loss": 1.0628, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.878578947368421e-05, |
|
"loss": 1.0761, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.877e-05, |
|
"loss": 1.0605, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.875421052631579e-05, |
|
"loss": 1.0272, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.873842105263158e-05, |
|
"loss": 1.0903, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8722631578947368e-05, |
|
"loss": 1.1161, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.870684210526316e-05, |
|
"loss": 1.0112, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8691052631578946e-05, |
|
"loss": 1.042, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.867526315789474e-05, |
|
"loss": 1.0366, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8659473684210526e-05, |
|
"loss": 1.0757, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8643684210526317e-05, |
|
"loss": 1.0347, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8627894736842104e-05, |
|
"loss": 1.0715, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8612105263157894e-05, |
|
"loss": 0.9993, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8596315789473688e-05, |
|
"loss": 1.0156, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8580526315789475e-05, |
|
"loss": 1.0539, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8564736842105265e-05, |
|
"loss": 1.0708, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8548947368421052e-05, |
|
"loss": 1.1075, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8533157894736846e-05, |
|
"loss": 1.1084, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8517368421052633e-05, |
|
"loss": 1.0305, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8501578947368423e-05, |
|
"loss": 1.0903, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.848578947368421e-05, |
|
"loss": 1.0329, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.847e-05, |
|
"loss": 1.092, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.845421052631579e-05, |
|
"loss": 1.0535, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.843842105263158e-05, |
|
"loss": 0.9838, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8422631578947368e-05, |
|
"loss": 0.9118, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_get_denotation_accuracy": 0.4255580083394653, |
|
"eval_loss": 0.943108856678009, |
|
"eval_runtime": 233.6145, |
|
"eval_samples_per_second": 17.452, |
|
"eval_steps_per_second": 1.092, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8406842105263158e-05, |
|
"loss": 1.0671, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8391052631578945e-05, |
|
"loss": 1.0006, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.837526315789474e-05, |
|
"loss": 1.0514, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8359473684210526e-05, |
|
"loss": 1.0597, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8343684210526316e-05, |
|
"loss": 1.0267, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8327894736842106e-05, |
|
"loss": 0.9874, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8312105263157897e-05, |
|
"loss": 1.0139, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8296315789473687e-05, |
|
"loss": 0.9834, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8280526315789474e-05, |
|
"loss": 1.1119, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8264736842105264e-05, |
|
"loss": 1.0442, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.824894736842105e-05, |
|
"loss": 1.007, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8233157894736845e-05, |
|
"loss": 1.0275, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8217368421052632e-05, |
|
"loss": 1.0664, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8201578947368422e-05, |
|
"loss": 1.0957, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.818578947368421e-05, |
|
"loss": 1.034, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.817e-05, |
|
"loss": 1.0778, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.815421052631579e-05, |
|
"loss": 1.0958, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.813842105263158e-05, |
|
"loss": 1.0358, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8122631578947367e-05, |
|
"loss": 1.0436, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8106842105263158e-05, |
|
"loss": 1.0577, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8091052631578948e-05, |
|
"loss": 1.0337, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8075263157894738e-05, |
|
"loss": 1.0033, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.805947368421053e-05, |
|
"loss": 1.1064, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8043684210526316e-05, |
|
"loss": 0.9971, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8027894736842106e-05, |
|
"loss": 1.0335, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8012105263157896e-05, |
|
"loss": 0.9978, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7996315789473687e-05, |
|
"loss": 0.9603, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7980526315789474e-05, |
|
"loss": 1.03, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7964736842105264e-05, |
|
"loss": 1.0269, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.794894736842105e-05, |
|
"loss": 1.1107, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7933157894736845e-05, |
|
"loss": 1.0932, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.791736842105263e-05, |
|
"loss": 1.0937, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7901578947368422e-05, |
|
"loss": 1.0952, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.788578947368421e-05, |
|
"loss": 1.0542, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7870000000000003e-05, |
|
"loss": 1.0937, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.785421052631579e-05, |
|
"loss": 1.1312, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.783842105263158e-05, |
|
"loss": 1.0614, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7822631578947367e-05, |
|
"loss": 1.0843, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7806842105263157e-05, |
|
"loss": 1.0934, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.779105263157895e-05, |
|
"loss": 1.0319, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7775263157894738e-05, |
|
"loss": 1.0297, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7759473684210528e-05, |
|
"loss": 0.9999, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7743684210526315e-05, |
|
"loss": 1.0612, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.772789473684211e-05, |
|
"loss": 1.022, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7712105263157896e-05, |
|
"loss": 1.0553, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7696315789473686e-05, |
|
"loss": 1.0087, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7680526315789473e-05, |
|
"loss": 1.0865, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7664736842105263e-05, |
|
"loss": 1.0538, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7648947368421054e-05, |
|
"loss": 1.0232, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7633157894736844e-05, |
|
"loss": 1.0017, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_get_denotation_accuracy": 0.45719892077507973, |
|
"eval_loss": 0.9026257395744324, |
|
"eval_runtime": 232.0877, |
|
"eval_samples_per_second": 17.567, |
|
"eval_steps_per_second": 1.099, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.761736842105263e-05, |
|
"loss": 1.0373, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.760157894736842e-05, |
|
"loss": 1.0501, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7585789473684208e-05, |
|
"loss": 1.0102, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7570000000000002e-05, |
|
"loss": 1.0003, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.755421052631579e-05, |
|
"loss": 1.0132, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.753842105263158e-05, |
|
"loss": 1.1177, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.752263157894737e-05, |
|
"loss": 0.9424, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.750684210526316e-05, |
|
"loss": 1.0153, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.749105263157895e-05, |
|
"loss": 1.0635, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7475263157894737e-05, |
|
"loss": 0.9576, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7459473684210528e-05, |
|
"loss": 1.0302, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7443684210526315e-05, |
|
"loss": 1.0201, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7427894736842108e-05, |
|
"loss": 1.0524, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7412105263157895e-05, |
|
"loss": 1.0559, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7396315789473686e-05, |
|
"loss": 1.018, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7380526315789473e-05, |
|
"loss": 1.0135, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7364736842105263e-05, |
|
"loss": 1.0447, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7348947368421053e-05, |
|
"loss": 1.0601, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7333157894736844e-05, |
|
"loss": 1.0182, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.731736842105263e-05, |
|
"loss": 1.0494, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.730157894736842e-05, |
|
"loss": 1.0011, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.728578947368421e-05, |
|
"loss": 1.0838, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.727e-05, |
|
"loss": 0.9707, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7254210526315792e-05, |
|
"loss": 0.9892, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.723842105263158e-05, |
|
"loss": 1.0623, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.722263157894737e-05, |
|
"loss": 1.0185, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.720684210526316e-05, |
|
"loss": 1.0887, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.719105263157895e-05, |
|
"loss": 1.0204, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7175263157894737e-05, |
|
"loss": 1.015, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7159473684210527e-05, |
|
"loss": 1.0167, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7143684210526314e-05, |
|
"loss": 1.0941, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7127894736842108e-05, |
|
"loss": 1.1229, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7112105263157895e-05, |
|
"loss": 1.0106, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7096315789473685e-05, |
|
"loss": 1.1508, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7080526315789472e-05, |
|
"loss": 1.0192, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7064736842105266e-05, |
|
"loss": 1.0381, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7048947368421053e-05, |
|
"loss": 0.9954, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7033157894736843e-05, |
|
"loss": 1.0687, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7017368421052633e-05, |
|
"loss": 1.0403, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.700157894736842e-05, |
|
"loss": 1.0272, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6985789473684214e-05, |
|
"loss": 1.0096, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.697e-05, |
|
"loss": 0.9641, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.695421052631579e-05, |
|
"loss": 0.975, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6938421052631578e-05, |
|
"loss": 0.9384, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.692263157894737e-05, |
|
"loss": 0.9905, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.690684210526316e-05, |
|
"loss": 0.9804, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.689105263157895e-05, |
|
"loss": 1.0942, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6875263157894736e-05, |
|
"loss": 1.0254, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6859473684210527e-05, |
|
"loss": 1.0575, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6843684210526317e-05, |
|
"loss": 1.0196, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_get_denotation_accuracy": 0.457934755948001, |
|
"eval_loss": 0.876184344291687, |
|
"eval_runtime": 244.0361, |
|
"eval_samples_per_second": 16.707, |
|
"eval_steps_per_second": 1.045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6827894736842107e-05, |
|
"loss": 1.0563, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6812105263157894e-05, |
|
"loss": 1.0039, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.6796315789473685e-05, |
|
"loss": 0.9485, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.678052631578947e-05, |
|
"loss": 1.0705, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6764736842105265e-05, |
|
"loss": 1.0055, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6748947368421056e-05, |
|
"loss": 1.0035, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6733157894736842e-05, |
|
"loss": 1.0492, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6717368421052633e-05, |
|
"loss": 1.1027, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.670157894736842e-05, |
|
"loss": 1.0883, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6685789473684214e-05, |
|
"loss": 1.0351, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.667e-05, |
|
"loss": 1.0188, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.665421052631579e-05, |
|
"loss": 0.9927, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6638421052631578e-05, |
|
"loss": 0.931, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.662263157894737e-05, |
|
"loss": 1.0144, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.660684210526316e-05, |
|
"loss": 1.0399, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.659105263157895e-05, |
|
"loss": 1.0065, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.6575263157894736e-05, |
|
"loss": 0.9821, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6559473684210526e-05, |
|
"loss": 1.0147, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6543684210526316e-05, |
|
"loss": 1.0128, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6527894736842107e-05, |
|
"loss": 1.1112, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6512105263157894e-05, |
|
"loss": 1.005, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6496315789473684e-05, |
|
"loss": 1.0822, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6480526315789474e-05, |
|
"loss": 1.0297, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6464736842105265e-05, |
|
"loss": 1.0213, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6448947368421055e-05, |
|
"loss": 1.0078, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6433157894736842e-05, |
|
"loss": 1.0381, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6417368421052632e-05, |
|
"loss": 0.9792, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6401578947368423e-05, |
|
"loss": 0.9916, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.6385789473684213e-05, |
|
"loss": 1.1386, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.637e-05, |
|
"loss": 0.9853, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.635421052631579e-05, |
|
"loss": 0.9641, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6338421052631577e-05, |
|
"loss": 1.0471, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.632263157894737e-05, |
|
"loss": 1.0737, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6306842105263158e-05, |
|
"loss": 1.0703, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6291052631578948e-05, |
|
"loss": 0.9755, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6275263157894735e-05, |
|
"loss": 0.9964, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6259473684210526e-05, |
|
"loss": 1.0181, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6243684210526316e-05, |
|
"loss": 1.1007, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6227894736842106e-05, |
|
"loss": 1.0173, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6212105263157897e-05, |
|
"loss": 0.9296, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6196315789473683e-05, |
|
"loss": 0.9584, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6180526315789477e-05, |
|
"loss": 1.0229, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6164736842105264e-05, |
|
"loss": 1.0355, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6148947368421055e-05, |
|
"loss": 1.0586, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.613315789473684e-05, |
|
"loss": 1.0587, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6117368421052632e-05, |
|
"loss": 0.9826, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6101578947368422e-05, |
|
"loss": 0.9878, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6085789473684212e-05, |
|
"loss": 1.0297, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.607e-05, |
|
"loss": 0.9465, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.605421052631579e-05, |
|
"loss": 1.0392, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_get_denotation_accuracy": 0.4650478292862399, |
|
"eval_loss": 0.897882878780365, |
|
"eval_runtime": 223.6292, |
|
"eval_samples_per_second": 18.231, |
|
"eval_steps_per_second": 1.14, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6038421052631577e-05, |
|
"loss": 1.0521, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.602263157894737e-05, |
|
"loss": 1.0067, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.6006842105263157e-05, |
|
"loss": 1.071, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5991052631578948e-05, |
|
"loss": 1.0192, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5975263157894738e-05, |
|
"loss": 1.0119, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.595947368421053e-05, |
|
"loss": 1.0436, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.594368421052632e-05, |
|
"loss": 0.9783, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5927894736842106e-05, |
|
"loss": 0.927, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5912105263157896e-05, |
|
"loss": 0.9704, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5896315789473683e-05, |
|
"loss": 1.0329, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5880526315789477e-05, |
|
"loss": 0.9621, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5864736842105264e-05, |
|
"loss": 0.9888, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5848947368421054e-05, |
|
"loss": 1.0215, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.583315789473684e-05, |
|
"loss": 1.0729, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.581736842105263e-05, |
|
"loss": 1.0184, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.580157894736842e-05, |
|
"loss": 0.9363, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.5785789473684212e-05, |
|
"loss": 1.0606, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.577e-05, |
|
"loss": 0.9232, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.575421052631579e-05, |
|
"loss": 1.0499, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.573842105263158e-05, |
|
"loss": 1.0413, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.572263157894737e-05, |
|
"loss": 0.9717, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.570684210526316e-05, |
|
"loss": 1.0151, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5691052631578947e-05, |
|
"loss": 1.0533, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5675263157894738e-05, |
|
"loss": 1.022, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5659473684210528e-05, |
|
"loss": 1.0458, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5643684210526318e-05, |
|
"loss": 1.0996, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5627894736842105e-05, |
|
"loss": 0.971, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5612105263157896e-05, |
|
"loss": 1.0158, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5596315789473682e-05, |
|
"loss": 0.9443, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5580526315789476e-05, |
|
"loss": 0.9103, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5564736842105263e-05, |
|
"loss": 1.0915, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5548947368421053e-05, |
|
"loss": 1.043, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.553315789473684e-05, |
|
"loss": 0.9867, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5517368421052634e-05, |
|
"loss": 1.0856, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.550157894736842e-05, |
|
"loss": 1.0218, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.548578947368421e-05, |
|
"loss": 0.9604, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.547e-05, |
|
"loss": 1.0484, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.545421052631579e-05, |
|
"loss": 0.9075, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5438421052631582e-05, |
|
"loss": 1.0553, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.542263157894737e-05, |
|
"loss": 1.0331, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.540684210526316e-05, |
|
"loss": 0.9569, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5391052631578947e-05, |
|
"loss": 1.0463, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5375263157894737e-05, |
|
"loss": 1.0559, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5359473684210527e-05, |
|
"loss": 1.0503, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5343684210526318e-05, |
|
"loss": 0.9853, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5327894736842105e-05, |
|
"loss": 1.0049, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5312105263157895e-05, |
|
"loss": 1.0293, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5296315789473685e-05, |
|
"loss": 1.0387, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5280526315789476e-05, |
|
"loss": 1.0535, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5264736842105263e-05, |
|
"loss": 0.9677, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_get_denotation_accuracy": 0.4665194996320824, |
|
"eval_loss": 0.889651358127594, |
|
"eval_runtime": 260.5569, |
|
"eval_samples_per_second": 15.647, |
|
"eval_steps_per_second": 0.979, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5248947368421053e-05, |
|
"loss": 1.0439, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.523315789473684e-05, |
|
"loss": 0.9977, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5217368421052634e-05, |
|
"loss": 1.0159, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.520157894736842e-05, |
|
"loss": 1.0015, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.518578947368421e-05, |
|
"loss": 1.0032, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.517e-05, |
|
"loss": 1.0141, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.515421052631579e-05, |
|
"loss": 1.0196, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5138421052631582e-05, |
|
"loss": 1.0079, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.512263157894737e-05, |
|
"loss": 1.101, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.510684210526316e-05, |
|
"loss": 1.0585, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5091052631578946e-05, |
|
"loss": 1.0242, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.507526315789474e-05, |
|
"loss": 0.9472, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5059473684210527e-05, |
|
"loss": 1.0035, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5043684210526317e-05, |
|
"loss": 1.0103, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5027894736842104e-05, |
|
"loss": 1.0457, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5012105263157894e-05, |
|
"loss": 1.0429, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4996315789473685e-05, |
|
"loss": 1.0362, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4980526315789475e-05, |
|
"loss": 1.0056, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4964736842105262e-05, |
|
"loss": 1.014, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4948947368421052e-05, |
|
"loss": 0.9206, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4933157894736843e-05, |
|
"loss": 1.0915, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4917368421052633e-05, |
|
"loss": 0.9606, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4901578947368423e-05, |
|
"loss": 1.0557, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.488578947368421e-05, |
|
"loss": 0.9718, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.487e-05, |
|
"loss": 0.967, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.485421052631579e-05, |
|
"loss": 1.0016, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.483842105263158e-05, |
|
"loss": 1.0913, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.482263157894737e-05, |
|
"loss": 0.9326, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.480684210526316e-05, |
|
"loss": 1.0497, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4791052631578946e-05, |
|
"loss": 0.9351, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.477526315789474e-05, |
|
"loss": 1.024, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4759473684210526e-05, |
|
"loss": 0.9998, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4743684210526317e-05, |
|
"loss": 1.0417, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4727894736842104e-05, |
|
"loss": 0.9122, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4712105263157897e-05, |
|
"loss": 1.1297, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4696315789473684e-05, |
|
"loss": 1.0099, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4680526315789475e-05, |
|
"loss": 1.1269, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.466473684210526e-05, |
|
"loss": 1.0236, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4648947368421052e-05, |
|
"loss": 1.0352, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4633157894736846e-05, |
|
"loss": 0.9973, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4617368421052633e-05, |
|
"loss": 1.0016, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4601578947368423e-05, |
|
"loss": 0.9684, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.458578947368421e-05, |
|
"loss": 1.0226, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.457e-05, |
|
"loss": 0.9922, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.455421052631579e-05, |
|
"loss": 1.0112, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.453842105263158e-05, |
|
"loss": 0.977, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4522631578947368e-05, |
|
"loss": 0.9721, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4506842105263158e-05, |
|
"loss": 1.0738, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.449105263157895e-05, |
|
"loss": 0.9887, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.447526315789474e-05, |
|
"loss": 1.055, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_get_denotation_accuracy": 0.47633063527103264, |
|
"eval_loss": 0.8773789405822754, |
|
"eval_runtime": 254.7458, |
|
"eval_samples_per_second": 16.004, |
|
"eval_steps_per_second": 1.001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4459473684210526e-05, |
|
"loss": 0.953, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4443684210526316e-05, |
|
"loss": 1.0545, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4427894736842103e-05, |
|
"loss": 1.0534, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4412105263157897e-05, |
|
"loss": 1.0373, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4396315789473684e-05, |
|
"loss": 1.002, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4380526315789474e-05, |
|
"loss": 1.0608, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4364736842105264e-05, |
|
"loss": 1.0014, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.434894736842105e-05, |
|
"loss": 1.032, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4333157894736845e-05, |
|
"loss": 1.0434, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4317368421052632e-05, |
|
"loss": 1.057, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4301578947368422e-05, |
|
"loss": 0.9441, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.428578947368421e-05, |
|
"loss": 1.0191, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4270000000000003e-05, |
|
"loss": 1.1007, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.425421052631579e-05, |
|
"loss": 1.0085, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.423842105263158e-05, |
|
"loss": 1.0426, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4222631578947367e-05, |
|
"loss": 1.0341, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4206842105263158e-05, |
|
"loss": 1.0701, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4191052631578948e-05, |
|
"loss": 1.1301, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.417526315789474e-05, |
|
"loss": 1.0237, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4159473684210525e-05, |
|
"loss": 0.9653, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4143684210526316e-05, |
|
"loss": 0.9588, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4127894736842106e-05, |
|
"loss": 0.9547, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4112105263157896e-05, |
|
"loss": 0.9591, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4096315789473687e-05, |
|
"loss": 1.0202, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4080526315789474e-05, |
|
"loss": 1.0633, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4064736842105264e-05, |
|
"loss": 0.9712, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4048947368421054e-05, |
|
"loss": 0.9645, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4033157894736845e-05, |
|
"loss": 0.9802, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.401736842105263e-05, |
|
"loss": 1.0162, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.4001578947368422e-05, |
|
"loss": 1.0307, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.398578947368421e-05, |
|
"loss": 1.0398, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3970000000000003e-05, |
|
"loss": 1.0292, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.395421052631579e-05, |
|
"loss": 0.9821, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.393842105263158e-05, |
|
"loss": 1.0242, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3922631578947367e-05, |
|
"loss": 0.9971, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3906842105263157e-05, |
|
"loss": 0.9161, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3891052631578948e-05, |
|
"loss": 0.9727, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3875263157894738e-05, |
|
"loss": 0.98, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3859473684210528e-05, |
|
"loss": 1.0035, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3843684210526315e-05, |
|
"loss": 0.9578, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.382789473684211e-05, |
|
"loss": 1.0698, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3812105263157896e-05, |
|
"loss": 0.9207, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3796315789473686e-05, |
|
"loss": 0.9419, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3780526315789473e-05, |
|
"loss": 1.0114, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3764736842105263e-05, |
|
"loss": 0.9258, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3748947368421054e-05, |
|
"loss": 0.9218, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3733157894736844e-05, |
|
"loss": 1.0667, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.371736842105263e-05, |
|
"loss": 1.057, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.370157894736842e-05, |
|
"loss": 0.9798, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.368578947368421e-05, |
|
"loss": 1.0044, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_get_denotation_accuracy": 0.46431199411331864, |
|
"eval_loss": 0.9005911350250244, |
|
"eval_runtime": 253.4522, |
|
"eval_samples_per_second": 16.086, |
|
"eval_steps_per_second": 1.006, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3670000000000002e-05, |
|
"loss": 0.984, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.365421052631579e-05, |
|
"loss": 0.9185, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.363842105263158e-05, |
|
"loss": 1.0296, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.3622631578947366e-05, |
|
"loss": 1.0351, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.360684210526316e-05, |
|
"loss": 0.9271, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.359105263157895e-05, |
|
"loss": 1.0033, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3575263157894737e-05, |
|
"loss": 0.9761, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3559473684210528e-05, |
|
"loss": 0.9967, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3543684210526315e-05, |
|
"loss": 1.0026, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.352789473684211e-05, |
|
"loss": 1.0534, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3512105263157895e-05, |
|
"loss": 1.0229, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3496315789473686e-05, |
|
"loss": 0.9959, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3480526315789473e-05, |
|
"loss": 1.0232, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3464736842105263e-05, |
|
"loss": 0.9669, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3448947368421053e-05, |
|
"loss": 1.0366, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3433157894736844e-05, |
|
"loss": 0.991, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.341736842105263e-05, |
|
"loss": 0.9509, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.340157894736842e-05, |
|
"loss": 1.1378, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.338578947368421e-05, |
|
"loss": 1.0201, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.337e-05, |
|
"loss": 0.9643, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.335421052631579e-05, |
|
"loss": 0.9404, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.333842105263158e-05, |
|
"loss": 0.9785, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.332263157894737e-05, |
|
"loss": 0.9684, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.330684210526316e-05, |
|
"loss": 1.0068, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.329105263157895e-05, |
|
"loss": 1.043, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.3275263157894737e-05, |
|
"loss": 0.9789, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.3259473684210527e-05, |
|
"loss": 0.9395, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.3243684210526314e-05, |
|
"loss": 0.9646, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3227894736842108e-05, |
|
"loss": 1.0677, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3212105263157895e-05, |
|
"loss": 1.0751, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3196315789473685e-05, |
|
"loss": 0.9828, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3180526315789472e-05, |
|
"loss": 1.007, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3164736842105266e-05, |
|
"loss": 1.0219, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3148947368421053e-05, |
|
"loss": 1.0095, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.3133157894736843e-05, |
|
"loss": 1.0138, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.311736842105263e-05, |
|
"loss": 0.9335, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.310157894736842e-05, |
|
"loss": 0.9974, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.308578947368421e-05, |
|
"loss": 0.9958, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.307e-05, |
|
"loss": 1.0, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.305421052631579e-05, |
|
"loss": 0.9487, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.303842105263158e-05, |
|
"loss": 0.9297, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.302263157894737e-05, |
|
"loss": 0.9855, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.300684210526316e-05, |
|
"loss": 0.9853, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.299105263157895e-05, |
|
"loss": 1.0419, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2975263157894736e-05, |
|
"loss": 1.0321, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2959473684210527e-05, |
|
"loss": 1.0031, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2943684210526317e-05, |
|
"loss": 0.9801, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2927894736842107e-05, |
|
"loss": 0.9084, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2912105263157894e-05, |
|
"loss": 0.9604, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2896315789473685e-05, |
|
"loss": 0.9234, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_get_denotation_accuracy": 0.46701005641402993, |
|
"eval_loss": 0.9108076095581055, |
|
"eval_runtime": 268.8446, |
|
"eval_samples_per_second": 15.165, |
|
"eval_steps_per_second": 0.949, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.288052631578947e-05, |
|
"loss": 0.9886, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2864736842105265e-05, |
|
"loss": 0.9723, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.2848947368421052e-05, |
|
"loss": 1.0236, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2833157894736843e-05, |
|
"loss": 1.0294, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2817368421052633e-05, |
|
"loss": 0.9711, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.280157894736842e-05, |
|
"loss": 0.9831, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2785789473684214e-05, |
|
"loss": 1.0243, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.277e-05, |
|
"loss": 0.9906, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.275421052631579e-05, |
|
"loss": 1.007, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2738421052631578e-05, |
|
"loss": 0.9364, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.272263157894737e-05, |
|
"loss": 1.0508, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.270684210526316e-05, |
|
"loss": 1.0433, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.269105263157895e-05, |
|
"loss": 1.0452, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2675263157894736e-05, |
|
"loss": 1.0136, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2659473684210526e-05, |
|
"loss": 1.0191, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2643684210526316e-05, |
|
"loss": 0.9874, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2627894736842107e-05, |
|
"loss": 1.044, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2612105263157894e-05, |
|
"loss": 1.0112, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2596315789473684e-05, |
|
"loss": 1.0305, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2580526315789474e-05, |
|
"loss": 1.0982, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2564736842105265e-05, |
|
"loss": 0.9984, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2548947368421055e-05, |
|
"loss": 1.0257, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2533157894736842e-05, |
|
"loss": 0.979, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2517368421052632e-05, |
|
"loss": 1.0156, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2501578947368423e-05, |
|
"loss": 1.0505, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2485789473684213e-05, |
|
"loss": 0.994, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.247e-05, |
|
"loss": 0.9947, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.245421052631579e-05, |
|
"loss": 1.0242, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2438421052631577e-05, |
|
"loss": 1.0089, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.242263157894737e-05, |
|
"loss": 0.9797, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2406842105263158e-05, |
|
"loss": 0.9705, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.239105263157895e-05, |
|
"loss": 0.9908, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2375263157894735e-05, |
|
"loss": 0.9629, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.235947368421053e-05, |
|
"loss": 0.9659, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2343684210526316e-05, |
|
"loss": 1.0484, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2327894736842106e-05, |
|
"loss": 1.0439, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2312105263157893e-05, |
|
"loss": 0.9348, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2296315789473684e-05, |
|
"loss": 0.956, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2280526315789477e-05, |
|
"loss": 1.0675, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2264736842105264e-05, |
|
"loss": 0.9814, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2248947368421055e-05, |
|
"loss": 0.9494, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.223315789473684e-05, |
|
"loss": 0.9037, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2217368421052632e-05, |
|
"loss": 1.0346, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2201578947368422e-05, |
|
"loss": 0.9887, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2185789473684213e-05, |
|
"loss": 0.9585, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.217e-05, |
|
"loss": 0.9893, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.215421052631579e-05, |
|
"loss": 1.0578, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.213842105263158e-05, |
|
"loss": 0.9491, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.212263157894737e-05, |
|
"loss": 0.9802, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2106842105263157e-05, |
|
"loss": 0.9155, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_get_denotation_accuracy": 0.4527839097375521, |
|
"eval_loss": 0.9444504976272583, |
|
"eval_runtime": 257.8608, |
|
"eval_samples_per_second": 15.811, |
|
"eval_steps_per_second": 0.989, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2091052631578948e-05, |
|
"loss": 1.0323, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2075263157894735e-05, |
|
"loss": 0.9507, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.205947368421053e-05, |
|
"loss": 1.0078, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.2043684210526315e-05, |
|
"loss": 1.0729, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.2027894736842106e-05, |
|
"loss": 1.0941, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.2012105263157896e-05, |
|
"loss": 1.0244, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1996315789473683e-05, |
|
"loss": 1.0042, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1980526315789477e-05, |
|
"loss": 0.93, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1964736842105264e-05, |
|
"loss": 1.0543, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1948947368421054e-05, |
|
"loss": 1.0498, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.193315789473684e-05, |
|
"loss": 0.9669, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1917368421052635e-05, |
|
"loss": 0.9927, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.190157894736842e-05, |
|
"loss": 0.9735, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1885789473684212e-05, |
|
"loss": 0.9773, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.187e-05, |
|
"loss": 0.9675, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.185421052631579e-05, |
|
"loss": 1.062, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.183842105263158e-05, |
|
"loss": 0.9643, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.182263157894737e-05, |
|
"loss": 0.9739, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1806842105263157e-05, |
|
"loss": 0.9491, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1791052631578947e-05, |
|
"loss": 0.9716, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1775263157894734e-05, |
|
"loss": 0.9303, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1759473684210528e-05, |
|
"loss": 0.9936, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1743684210526318e-05, |
|
"loss": 0.9463, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1727894736842105e-05, |
|
"loss": 0.9315, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1712105263157896e-05, |
|
"loss": 1.0116, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1696315789473686e-05, |
|
"loss": 0.9661, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1680526315789476e-05, |
|
"loss": 0.9708, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1664736842105263e-05, |
|
"loss": 0.9284, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.1648947368421054e-05, |
|
"loss": 0.969, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.163315789473684e-05, |
|
"loss": 0.9366, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.1617368421052634e-05, |
|
"loss": 0.9587, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.160157894736842e-05, |
|
"loss": 1.031, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.158578947368421e-05, |
|
"loss": 0.9711, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.157e-05, |
|
"loss": 0.9997, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.155421052631579e-05, |
|
"loss": 1.0411, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.153842105263158e-05, |
|
"loss": 1.0691, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.152263157894737e-05, |
|
"loss": 1.0019, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.1506842105263156e-05, |
|
"loss": 0.9699, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.1491052631578947e-05, |
|
"loss": 0.9594, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.147526315789474e-05, |
|
"loss": 0.9815, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1459473684210527e-05, |
|
"loss": 1.0267, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1443684210526318e-05, |
|
"loss": 1.0074, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1427894736842105e-05, |
|
"loss": 0.9747, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1412105263157895e-05, |
|
"loss": 0.9391, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1396315789473685e-05, |
|
"loss": 1.0059, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1380526315789476e-05, |
|
"loss": 0.9702, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1364736842105263e-05, |
|
"loss": 0.9454, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1348947368421053e-05, |
|
"loss": 0.9945, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.133315789473684e-05, |
|
"loss": 0.9342, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1317368421052634e-05, |
|
"loss": 0.9979, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_get_denotation_accuracy": 0.47387785136129507, |
|
"eval_loss": 0.8795158863067627, |
|
"eval_runtime": 249.8736, |
|
"eval_samples_per_second": 16.316, |
|
"eval_steps_per_second": 1.021, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.130157894736842e-05, |
|
"loss": 0.9221, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.128578947368421e-05, |
|
"loss": 0.9871, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.1269999999999998e-05, |
|
"loss": 0.9627, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.125421052631579e-05, |
|
"loss": 0.9246, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.123842105263158e-05, |
|
"loss": 1.0387, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.122263157894737e-05, |
|
"loss": 0.9356, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.120684210526316e-05, |
|
"loss": 0.9432, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1191052631578946e-05, |
|
"loss": 0.9311, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.117526315789474e-05, |
|
"loss": 0.9183, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1159473684210527e-05, |
|
"loss": 0.9224, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1143684210526317e-05, |
|
"loss": 0.9364, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1127894736842104e-05, |
|
"loss": 1.0059, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1112105263157895e-05, |
|
"loss": 0.9316, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1096315789473685e-05, |
|
"loss": 0.9801, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.1080526315789475e-05, |
|
"loss": 0.9428, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1064736842105262e-05, |
|
"loss": 0.9937, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1048947368421053e-05, |
|
"loss": 0.9906, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1033157894736843e-05, |
|
"loss": 1.0346, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1017368421052633e-05, |
|
"loss": 0.9906, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.100157894736842e-05, |
|
"loss": 1.0524, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.098578947368421e-05, |
|
"loss": 0.961, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.097e-05, |
|
"loss": 0.9522, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.095421052631579e-05, |
|
"loss": 0.9366, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.093842105263158e-05, |
|
"loss": 1.0735, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.092263157894737e-05, |
|
"loss": 0.8852, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.090684210526316e-05, |
|
"loss": 0.9389, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.0891052631578946e-05, |
|
"loss": 1.0394, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.087526315789474e-05, |
|
"loss": 1.0329, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0859473684210526e-05, |
|
"loss": 0.9396, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0843684210526317e-05, |
|
"loss": 0.9846, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0827894736842104e-05, |
|
"loss": 1.0176, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0812105263157897e-05, |
|
"loss": 0.9905, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0796315789473684e-05, |
|
"loss": 0.9654, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0780526315789475e-05, |
|
"loss": 1.002, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.076473684210526e-05, |
|
"loss": 0.9142, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0748947368421052e-05, |
|
"loss": 0.9308, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0733157894736842e-05, |
|
"loss": 0.9985, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0717368421052633e-05, |
|
"loss": 1.0112, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.0701578947368423e-05, |
|
"loss": 0.9749, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.068578947368421e-05, |
|
"loss": 0.9981, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.067e-05, |
|
"loss": 0.9432, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.065421052631579e-05, |
|
"loss": 0.9547, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.063842105263158e-05, |
|
"loss": 1.0422, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0622631578947368e-05, |
|
"loss": 1.0602, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0606842105263158e-05, |
|
"loss": 0.9757, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.059105263157895e-05, |
|
"loss": 1.0051, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.057526315789474e-05, |
|
"loss": 0.9267, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0559473684210526e-05, |
|
"loss": 0.9647, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0543684210526316e-05, |
|
"loss": 1.0339, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0527894736842103e-05, |
|
"loss": 0.9185, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_get_denotation_accuracy": 0.47412312975226883, |
|
"eval_loss": 0.8895652294158936, |
|
"eval_runtime": 240.0315, |
|
"eval_samples_per_second": 16.985, |
|
"eval_steps_per_second": 1.062, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0512105263157897e-05, |
|
"loss": 0.9818, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0496315789473684e-05, |
|
"loss": 0.9737, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.0480526315789474e-05, |
|
"loss": 1.0036, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.046473684210526e-05, |
|
"loss": 0.9822, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.044894736842105e-05, |
|
"loss": 0.9819, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0433157894736845e-05, |
|
"loss": 0.9024, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0417368421052632e-05, |
|
"loss": 1.0799, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0401578947368422e-05, |
|
"loss": 0.9576, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.038578947368421e-05, |
|
"loss": 0.9659, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0370000000000003e-05, |
|
"loss": 0.9756, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.035421052631579e-05, |
|
"loss": 1.0247, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.033842105263158e-05, |
|
"loss": 0.9834, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0322631578947367e-05, |
|
"loss": 0.9607, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0306842105263158e-05, |
|
"loss": 0.8982, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.0291052631578948e-05, |
|
"loss": 0.9772, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.027526315789474e-05, |
|
"loss": 0.9037, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0259473684210525e-05, |
|
"loss": 0.9059, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0243684210526316e-05, |
|
"loss": 0.9981, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0227894736842106e-05, |
|
"loss": 1.0669, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0212105263157896e-05, |
|
"loss": 0.9879, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0196315789473683e-05, |
|
"loss": 1.0508, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0180526315789474e-05, |
|
"loss": 0.9472, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0164736842105264e-05, |
|
"loss": 0.9703, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0148947368421054e-05, |
|
"loss": 1.0772, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0133157894736845e-05, |
|
"loss": 1.0144, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.011736842105263e-05, |
|
"loss": 1.0318, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0101578947368422e-05, |
|
"loss": 0.9503, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.008578947368421e-05, |
|
"loss": 1.0449, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0070000000000003e-05, |
|
"loss": 1.0206, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.005421052631579e-05, |
|
"loss": 0.9528, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.003842105263158e-05, |
|
"loss": 0.9294, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0022631578947367e-05, |
|
"loss": 1.0417, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.000684210526316e-05, |
|
"loss": 1.0689, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9991052631578948e-05, |
|
"loss": 0.8967, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9975263157894738e-05, |
|
"loss": 0.9687, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9959473684210525e-05, |
|
"loss": 1.003, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9943684210526315e-05, |
|
"loss": 0.9772, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9927894736842106e-05, |
|
"loss": 0.9683, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9912105263157896e-05, |
|
"loss": 0.9457, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9896315789473686e-05, |
|
"loss": 0.9867, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9880526315789473e-05, |
|
"loss": 0.9552, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9864736842105264e-05, |
|
"loss": 0.9849, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9848947368421054e-05, |
|
"loss": 0.9488, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9833157894736844e-05, |
|
"loss": 1.0298, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.981736842105263e-05, |
|
"loss": 0.9471, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.980157894736842e-05, |
|
"loss": 0.9553, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9785789473684212e-05, |
|
"loss": 0.9433, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9770000000000002e-05, |
|
"loss": 1.0108, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.975421052631579e-05, |
|
"loss": 0.982, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.973842105263158e-05, |
|
"loss": 0.9941, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_get_denotation_accuracy": 0.4638214373313711, |
|
"eval_loss": 0.9287481904029846, |
|
"eval_runtime": 246.9862, |
|
"eval_samples_per_second": 16.507, |
|
"eval_steps_per_second": 1.032, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9722631578947366e-05, |
|
"loss": 0.9911, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.970684210526316e-05, |
|
"loss": 0.8973, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9691052631578947e-05, |
|
"loss": 0.9224, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9675263157894737e-05, |
|
"loss": 0.9358, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9659473684210528e-05, |
|
"loss": 0.9473, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9643684210526315e-05, |
|
"loss": 0.9648, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.962789473684211e-05, |
|
"loss": 0.9403, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9612105263157895e-05, |
|
"loss": 1.1487, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9596315789473686e-05, |
|
"loss": 1.009, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9580526315789473e-05, |
|
"loss": 0.915, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9564736842105266e-05, |
|
"loss": 0.92, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9548947368421053e-05, |
|
"loss": 0.9518, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9533157894736844e-05, |
|
"loss": 0.9731, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.951736842105263e-05, |
|
"loss": 0.9952, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.950157894736842e-05, |
|
"loss": 0.9623, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.948578947368421e-05, |
|
"loss": 0.9247, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.947e-05, |
|
"loss": 1.0254, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.945421052631579e-05, |
|
"loss": 0.9958, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.943842105263158e-05, |
|
"loss": 0.9872, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9422631578947366e-05, |
|
"loss": 0.9981, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.940684210526316e-05, |
|
"loss": 0.9812, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.939105263157895e-05, |
|
"loss": 0.9483, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9375263157894737e-05, |
|
"loss": 0.9892, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9359473684210527e-05, |
|
"loss": 1.034, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9343684210526318e-05, |
|
"loss": 0.9821, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9327894736842108e-05, |
|
"loss": 0.9924, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9312105263157895e-05, |
|
"loss": 0.9663, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9296315789473685e-05, |
|
"loss": 1.0233, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9280526315789472e-05, |
|
"loss": 1.0078, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9264736842105266e-05, |
|
"loss": 0.9736, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9248947368421053e-05, |
|
"loss": 0.9986, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9233157894736843e-05, |
|
"loss": 1.006, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.921736842105263e-05, |
|
"loss": 0.9502, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.920157894736842e-05, |
|
"loss": 0.9941, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.918578947368421e-05, |
|
"loss": 0.9671, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.917e-05, |
|
"loss": 0.9958, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9154210526315788e-05, |
|
"loss": 0.937, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.913842105263158e-05, |
|
"loss": 0.912, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9122631578947372e-05, |
|
"loss": 0.9927, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.910684210526316e-05, |
|
"loss": 1.0253, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.909105263157895e-05, |
|
"loss": 0.9658, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9075263157894736e-05, |
|
"loss": 1.0006, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9059473684210527e-05, |
|
"loss": 0.9963, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9043684210526317e-05, |
|
"loss": 0.9182, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9027894736842107e-05, |
|
"loss": 1.0314, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9012105263157894e-05, |
|
"loss": 0.9469, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8996315789473685e-05, |
|
"loss": 0.9323, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.898052631578947e-05, |
|
"loss": 0.8863, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8964736842105265e-05, |
|
"loss": 0.9131, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8948947368421052e-05, |
|
"loss": 0.9525, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_get_denotation_accuracy": 0.4677458915869512, |
|
"eval_loss": 0.9251512289047241, |
|
"eval_runtime": 248.4984, |
|
"eval_samples_per_second": 16.407, |
|
"eval_steps_per_second": 1.026, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8933157894736843e-05, |
|
"loss": 1.0018, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.891736842105263e-05, |
|
"loss": 0.9985, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8901578947368423e-05, |
|
"loss": 0.93, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.888578947368421e-05, |
|
"loss": 0.9973, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.887e-05, |
|
"loss": 1.058, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.885421052631579e-05, |
|
"loss": 1.0463, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8838421052631578e-05, |
|
"loss": 1.0016, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.882263157894737e-05, |
|
"loss": 0.9876, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.880684210526316e-05, |
|
"loss": 0.9702, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.879105263157895e-05, |
|
"loss": 0.9736, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8775263157894736e-05, |
|
"loss": 0.9938, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8759473684210526e-05, |
|
"loss": 0.9937, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8743684210526317e-05, |
|
"loss": 0.9345, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8727894736842107e-05, |
|
"loss": 1.0055, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8712105263157894e-05, |
|
"loss": 0.9901, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8696315789473684e-05, |
|
"loss": 1.0098, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8680526315789474e-05, |
|
"loss": 0.8845, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8664736842105265e-05, |
|
"loss": 0.9496, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8648947368421052e-05, |
|
"loss": 0.9832, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8633157894736842e-05, |
|
"loss": 1.0295, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.861736842105263e-05, |
|
"loss": 0.9603, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8601578947368423e-05, |
|
"loss": 0.9739, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8585789473684213e-05, |
|
"loss": 1.033, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.857e-05, |
|
"loss": 0.9615, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.855421052631579e-05, |
|
"loss": 0.9258, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8538421052631577e-05, |
|
"loss": 1.0015, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.852263157894737e-05, |
|
"loss": 0.9972, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8506842105263158e-05, |
|
"loss": 1.0129, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.849105263157895e-05, |
|
"loss": 1.0155, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8475263157894735e-05, |
|
"loss": 1.0267, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.845947368421053e-05, |
|
"loss": 0.9773, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8443684210526316e-05, |
|
"loss": 0.9881, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8427894736842106e-05, |
|
"loss": 0.9909, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8412105263157893e-05, |
|
"loss": 0.9886, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8396315789473684e-05, |
|
"loss": 0.9674, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8380526315789474e-05, |
|
"loss": 0.9482, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8364736842105264e-05, |
|
"loss": 0.9497, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.834894736842105e-05, |
|
"loss": 0.9422, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.833315789473684e-05, |
|
"loss": 0.8988, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8317368421052632e-05, |
|
"loss": 0.9614, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8301578947368422e-05, |
|
"loss": 0.9971, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8285789473684213e-05, |
|
"loss": 0.8827, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.827e-05, |
|
"loss": 0.9785, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.825421052631579e-05, |
|
"loss": 0.993, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.823842105263158e-05, |
|
"loss": 0.9771, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.822263157894737e-05, |
|
"loss": 0.9741, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8206842105263158e-05, |
|
"loss": 0.9621, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8191052631578948e-05, |
|
"loss": 0.9828, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8175263157894735e-05, |
|
"loss": 0.9377, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.815947368421053e-05, |
|
"loss": 1.0148, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_get_denotation_accuracy": 0.4616139318126073, |
|
"eval_loss": 0.9519839882850647, |
|
"eval_runtime": 259.0893, |
|
"eval_samples_per_second": 15.736, |
|
"eval_steps_per_second": 0.984, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8143684210526315e-05, |
|
"loss": 0.9269, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8127894736842106e-05, |
|
"loss": 1.024, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8112105263157893e-05, |
|
"loss": 0.8278, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8096315789473683e-05, |
|
"loss": 0.9145, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8080526315789473e-05, |
|
"loss": 0.9236, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8064736842105264e-05, |
|
"loss": 0.9723, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8048947368421054e-05, |
|
"loss": 0.9367, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.803315789473684e-05, |
|
"loss": 1.0005, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8017368421052635e-05, |
|
"loss": 1.0059, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8001578947368422e-05, |
|
"loss": 1.0237, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7985789473684212e-05, |
|
"loss": 0.9098, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.797e-05, |
|
"loss": 0.9344, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.795421052631579e-05, |
|
"loss": 1.0177, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.793842105263158e-05, |
|
"loss": 0.9476, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.792263157894737e-05, |
|
"loss": 0.9419, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7906842105263157e-05, |
|
"loss": 1.0584, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7891052631578947e-05, |
|
"loss": 0.9816, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7875263157894734e-05, |
|
"loss": 0.8823, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7859473684210528e-05, |
|
"loss": 0.8585, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7843684210526315e-05, |
|
"loss": 1.0386, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7827894736842105e-05, |
|
"loss": 1.0144, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7812105263157896e-05, |
|
"loss": 0.8985, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7796315789473686e-05, |
|
"loss": 0.9835, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7780526315789476e-05, |
|
"loss": 1.004, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7764736842105263e-05, |
|
"loss": 1.0201, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7748947368421054e-05, |
|
"loss": 1.0519, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.773315789473684e-05, |
|
"loss": 0.9284, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7717368421052634e-05, |
|
"loss": 0.9925, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.770157894736842e-05, |
|
"loss": 0.9654, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.768578947368421e-05, |
|
"loss": 0.9423, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.767e-05, |
|
"loss": 0.9918, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7654210526315792e-05, |
|
"loss": 0.9739, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.763842105263158e-05, |
|
"loss": 0.9541, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.762263157894737e-05, |
|
"loss": 0.9928, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7606842105263156e-05, |
|
"loss": 0.8675, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7591052631578947e-05, |
|
"loss": 1.0327, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7575263157894737e-05, |
|
"loss": 0.9771, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7559473684210528e-05, |
|
"loss": 0.9863, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7543684210526318e-05, |
|
"loss": 0.9929, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7527894736842105e-05, |
|
"loss": 1.0211, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7512105263157895e-05, |
|
"loss": 0.9558, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7496315789473685e-05, |
|
"loss": 1.0041, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7480526315789476e-05, |
|
"loss": 0.9815, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7464736842105263e-05, |
|
"loss": 0.9055, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7448947368421053e-05, |
|
"loss": 0.962, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7433157894736843e-05, |
|
"loss": 0.9739, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7417368421052634e-05, |
|
"loss": 0.9835, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.740157894736842e-05, |
|
"loss": 0.9523, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.738578947368421e-05, |
|
"loss": 0.9809, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7369999999999998e-05, |
|
"loss": 0.8853, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_get_denotation_accuracy": 0.46455727250429235, |
|
"eval_loss": 0.926638126373291, |
|
"eval_runtime": 236.6855, |
|
"eval_samples_per_second": 17.225, |
|
"eval_steps_per_second": 1.077, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7354210526315792e-05, |
|
"loss": 0.9512, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.733842105263158e-05, |
|
"loss": 0.9229, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.732263157894737e-05, |
|
"loss": 0.9306, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7306842105263156e-05, |
|
"loss": 0.9765, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7291052631578946e-05, |
|
"loss": 0.9626, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.727526315789474e-05, |
|
"loss": 0.9705, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7259473684210527e-05, |
|
"loss": 0.9375, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7243684210526317e-05, |
|
"loss": 0.9996, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7227894736842104e-05, |
|
"loss": 0.9406, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7212105263157898e-05, |
|
"loss": 0.975, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7196315789473685e-05, |
|
"loss": 1.0174, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7180526315789475e-05, |
|
"loss": 0.9311, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7164736842105262e-05, |
|
"loss": 0.9646, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7148947368421053e-05, |
|
"loss": 1.0456, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7133157894736843e-05, |
|
"loss": 0.9987, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7117368421052633e-05, |
|
"loss": 0.8931, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.710157894736842e-05, |
|
"loss": 0.921, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.708578947368421e-05, |
|
"loss": 1.0058, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7069999999999998e-05, |
|
"loss": 0.9869, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.705421052631579e-05, |
|
"loss": 0.9661, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7038421052631578e-05, |
|
"loss": 0.9359, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.702263157894737e-05, |
|
"loss": 0.9785, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.700684210526316e-05, |
|
"loss": 0.9785, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.699105263157895e-05, |
|
"loss": 0.9692, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.697526315789474e-05, |
|
"loss": 0.9677, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6959473684210526e-05, |
|
"loss": 0.9376, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6943684210526317e-05, |
|
"loss": 0.9206, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6927894736842104e-05, |
|
"loss": 0.9357, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6912105263157897e-05, |
|
"loss": 0.9246, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6896315789473684e-05, |
|
"loss": 1.0461, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6880526315789475e-05, |
|
"loss": 0.9399, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6864736842105262e-05, |
|
"loss": 0.9181, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6848947368421052e-05, |
|
"loss": 1.0124, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6833157894736842e-05, |
|
"loss": 1.0314, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6817368421052633e-05, |
|
"loss": 0.9966, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.680157894736842e-05, |
|
"loss": 0.8777, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.678578947368421e-05, |
|
"loss": 0.9791, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.677e-05, |
|
"loss": 0.9, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.675421052631579e-05, |
|
"loss": 1.0118, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.673842105263158e-05, |
|
"loss": 0.9493, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6722631578947368e-05, |
|
"loss": 0.9875, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.670684210526316e-05, |
|
"loss": 0.981, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.669105263157895e-05, |
|
"loss": 0.9798, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.667526315789474e-05, |
|
"loss": 0.909, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6659473684210526e-05, |
|
"loss": 0.9592, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6643684210526316e-05, |
|
"loss": 0.9579, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6627894736842103e-05, |
|
"loss": 0.9824, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6612105263157897e-05, |
|
"loss": 0.9537, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6596315789473684e-05, |
|
"loss": 0.9407, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6580526315789474e-05, |
|
"loss": 0.9297, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_get_denotation_accuracy": 0.4782928623988227, |
|
"eval_loss": 0.9149366021156311, |
|
"eval_runtime": 246.4757, |
|
"eval_samples_per_second": 16.541, |
|
"eval_steps_per_second": 1.035, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.656473684210526e-05, |
|
"loss": 0.9581, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6548947368421055e-05, |
|
"loss": 1.0649, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6533157894736842e-05, |
|
"loss": 0.8826, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6517368421052632e-05, |
|
"loss": 0.9945, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6501578947368423e-05, |
|
"loss": 0.9891, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.648578947368421e-05, |
|
"loss": 0.9985, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6470000000000003e-05, |
|
"loss": 0.919, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.645421052631579e-05, |
|
"loss": 0.9338, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.643842105263158e-05, |
|
"loss": 0.957, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6422631578947367e-05, |
|
"loss": 1.0452, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6406842105263158e-05, |
|
"loss": 1.0478, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6391052631578948e-05, |
|
"loss": 1.0028, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.637526315789474e-05, |
|
"loss": 0.9938, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6359473684210525e-05, |
|
"loss": 0.9639, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6343684210526316e-05, |
|
"loss": 1.0243, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6327894736842106e-05, |
|
"loss": 1.0092, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6312105263157896e-05, |
|
"loss": 0.9481, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6296315789473683e-05, |
|
"loss": 1.0041, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6280526315789474e-05, |
|
"loss": 0.8919, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.626473684210526e-05, |
|
"loss": 0.923, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6248947368421054e-05, |
|
"loss": 0.971, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6233157894736845e-05, |
|
"loss": 0.9875, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6217368421052632e-05, |
|
"loss": 0.9286, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6201578947368422e-05, |
|
"loss": 0.915, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.618578947368421e-05, |
|
"loss": 0.9622, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6170000000000003e-05, |
|
"loss": 1.0241, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.615421052631579e-05, |
|
"loss": 0.9866, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.613842105263158e-05, |
|
"loss": 1.0167, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6122631578947367e-05, |
|
"loss": 0.9456, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.610684210526316e-05, |
|
"loss": 0.9988, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6091052631578948e-05, |
|
"loss": 0.9432, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6075263157894738e-05, |
|
"loss": 0.9822, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6059473684210525e-05, |
|
"loss": 0.9149, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6043684210526315e-05, |
|
"loss": 0.8641, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6027894736842106e-05, |
|
"loss": 0.9149, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6012105263157896e-05, |
|
"loss": 0.9868, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.5996315789473683e-05, |
|
"loss": 0.9498, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.5980526315789473e-05, |
|
"loss": 0.9213, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.5964736842105264e-05, |
|
"loss": 0.9884, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5948947368421054e-05, |
|
"loss": 1.0028, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5933157894736844e-05, |
|
"loss": 0.9571, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.591736842105263e-05, |
|
"loss": 0.9484, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.590157894736842e-05, |
|
"loss": 0.9944, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5885789473684212e-05, |
|
"loss": 0.9915, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5870000000000002e-05, |
|
"loss": 1.0459, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.585421052631579e-05, |
|
"loss": 1.0178, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.583842105263158e-05, |
|
"loss": 0.9805, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5822631578947366e-05, |
|
"loss": 0.9694, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.580684210526316e-05, |
|
"loss": 0.9671, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5791052631578947e-05, |
|
"loss": 0.9269, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_get_denotation_accuracy": 0.4726514594064263, |
|
"eval_loss": 0.9314199686050415, |
|
"eval_runtime": 229.6675, |
|
"eval_samples_per_second": 17.752, |
|
"eval_steps_per_second": 1.11, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5775263157894737e-05, |
|
"loss": 0.9701, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5759473684210524e-05, |
|
"loss": 0.9517, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5743684210526315e-05, |
|
"loss": 0.9066, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5727894736842105e-05, |
|
"loss": 1.0189, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5712105263157895e-05, |
|
"loss": 1.101, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5696315789473686e-05, |
|
"loss": 0.9877, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5680526315789473e-05, |
|
"loss": 1.002, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5664736842105266e-05, |
|
"loss": 0.9769, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5648947368421053e-05, |
|
"loss": 0.9049, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5633157894736844e-05, |
|
"loss": 0.9607, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.561736842105263e-05, |
|
"loss": 0.9429, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.560157894736842e-05, |
|
"loss": 0.9807, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.558578947368421e-05, |
|
"loss": 0.8555, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5570000000000002e-05, |
|
"loss": 0.9652, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.555421052631579e-05, |
|
"loss": 0.9268, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.553842105263158e-05, |
|
"loss": 1.0229, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5522631578947366e-05, |
|
"loss": 0.9754, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.550684210526316e-05, |
|
"loss": 1.0279, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5491052631578947e-05, |
|
"loss": 0.9685, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5475263157894737e-05, |
|
"loss": 0.9277, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5459473684210524e-05, |
|
"loss": 0.8861, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5443684210526318e-05, |
|
"loss": 0.9403, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5427894736842108e-05, |
|
"loss": 0.9502, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5412105263157895e-05, |
|
"loss": 0.9233, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5396315789473685e-05, |
|
"loss": 0.9991, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5380526315789472e-05, |
|
"loss": 0.9546, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5364736842105266e-05, |
|
"loss": 0.9074, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5348947368421053e-05, |
|
"loss": 0.9613, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5333157894736843e-05, |
|
"loss": 0.9134, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.531736842105263e-05, |
|
"loss": 0.919, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.530157894736842e-05, |
|
"loss": 0.9763, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.528578947368421e-05, |
|
"loss": 0.9336, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.527e-05, |
|
"loss": 0.9394, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.525421052631579e-05, |
|
"loss": 1.007, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5238421052631578e-05, |
|
"loss": 0.8863, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.522263157894737e-05, |
|
"loss": 1.0601, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5206842105263159e-05, |
|
"loss": 0.9999, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5191052631578948e-05, |
|
"loss": 1.0112, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5175263157894736e-05, |
|
"loss": 0.8849, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5159473684210525e-05, |
|
"loss": 0.9973, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5143684210526317e-05, |
|
"loss": 1.0451, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5127894736842106e-05, |
|
"loss": 0.9296, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5112105263157894e-05, |
|
"loss": 0.9863, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5096315789473683e-05, |
|
"loss": 0.9274, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5080526315789475e-05, |
|
"loss": 0.989, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5064736842105264e-05, |
|
"loss": 1.0079, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5048947368421054e-05, |
|
"loss": 0.8703, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5033157894736843e-05, |
|
"loss": 0.9883, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5017368421052631e-05, |
|
"loss": 0.9986, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5001578947368423e-05, |
|
"loss": 0.9734, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_get_denotation_accuracy": 0.4677458915869512, |
|
"eval_loss": 0.9230586886405945, |
|
"eval_runtime": 241.6832, |
|
"eval_samples_per_second": 16.869, |
|
"eval_steps_per_second": 1.055, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4985789473684212e-05, |
|
"loss": 0.9085, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.497e-05, |
|
"loss": 1.0361, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.495421052631579e-05, |
|
"loss": 0.9538, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.493842105263158e-05, |
|
"loss": 1.0394, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4922631578947368e-05, |
|
"loss": 0.9146, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4906842105263159e-05, |
|
"loss": 0.8953, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4891052631578947e-05, |
|
"loss": 1.0349, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4875263157894738e-05, |
|
"loss": 0.9213, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4859473684210526e-05, |
|
"loss": 0.9519, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4843684210526315e-05, |
|
"loss": 0.9537, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4827894736842105e-05, |
|
"loss": 0.9251, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4812105263157894e-05, |
|
"loss": 0.9124, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4796315789473684e-05, |
|
"loss": 0.9608, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4780526315789475e-05, |
|
"loss": 0.9014, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4764736842105265e-05, |
|
"loss": 0.9642, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4748947368421054e-05, |
|
"loss": 1.0173, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4733157894736842e-05, |
|
"loss": 0.9941, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4717368421052633e-05, |
|
"loss": 0.9867, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4701578947368421e-05, |
|
"loss": 0.9687, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4685789473684212e-05, |
|
"loss": 0.9218, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.467e-05, |
|
"loss": 0.9298, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.465421052631579e-05, |
|
"loss": 0.9505, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4638421052631579e-05, |
|
"loss": 0.9584, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4622631578947368e-05, |
|
"loss": 0.8743, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4606842105263158e-05, |
|
"loss": 0.9154, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4591052631578947e-05, |
|
"loss": 0.9337, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4575263157894737e-05, |
|
"loss": 0.8526, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4559473684210526e-05, |
|
"loss": 0.9251, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4543684210526316e-05, |
|
"loss": 0.9347, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4527894736842105e-05, |
|
"loss": 0.9755, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4512105263157895e-05, |
|
"loss": 0.9797, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4496315789473685e-05, |
|
"loss": 0.9268, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4480526315789474e-05, |
|
"loss": 1.0137, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4464736842105264e-05, |
|
"loss": 0.9812, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4448947368421053e-05, |
|
"loss": 0.9803, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4433157894736843e-05, |
|
"loss": 0.9592, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4417368421052632e-05, |
|
"loss": 0.9835, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.440157894736842e-05, |
|
"loss": 0.961, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4385789473684211e-05, |
|
"loss": 1.0207, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.437e-05, |
|
"loss": 0.9549, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.435421052631579e-05, |
|
"loss": 0.9783, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4338421052631579e-05, |
|
"loss": 0.8843, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4322631578947369e-05, |
|
"loss": 1.0533, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4306842105263158e-05, |
|
"loss": 0.9515, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4291052631578946e-05, |
|
"loss": 0.9166, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4275263157894737e-05, |
|
"loss": 0.9588, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4259473684210527e-05, |
|
"loss": 0.9285, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4243684210526317e-05, |
|
"loss": 1.013, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4227894736842106e-05, |
|
"loss": 0.9996, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4212105263157896e-05, |
|
"loss": 0.9477, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_get_denotation_accuracy": 0.4795192543536914, |
|
"eval_loss": 0.8999451994895935, |
|
"eval_runtime": 241.3624, |
|
"eval_samples_per_second": 16.892, |
|
"eval_steps_per_second": 1.057, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4196315789473685e-05, |
|
"loss": 0.9456, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4180526315789474e-05, |
|
"loss": 0.9384, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4164736842105264e-05, |
|
"loss": 0.9776, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4148947368421053e-05, |
|
"loss": 0.9382, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4133157894736843e-05, |
|
"loss": 1.0121, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4117368421052631e-05, |
|
"loss": 0.9032, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4101578947368422e-05, |
|
"loss": 0.9912, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.408578947368421e-05, |
|
"loss": 0.9444, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4069999999999999e-05, |
|
"loss": 1.0416, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.405421052631579e-05, |
|
"loss": 0.8772, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4038421052631578e-05, |
|
"loss": 0.896, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4022631578947368e-05, |
|
"loss": 0.9684, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4006842105263157e-05, |
|
"loss": 0.9427, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3991052631578949e-05, |
|
"loss": 0.9574, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3975263157894738e-05, |
|
"loss": 0.9848, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3959473684210528e-05, |
|
"loss": 0.9153, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3943684210526317e-05, |
|
"loss": 0.8842, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3927894736842105e-05, |
|
"loss": 0.9311, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3912105263157896e-05, |
|
"loss": 0.9831, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3896315789473684e-05, |
|
"loss": 0.9475, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3880526315789475e-05, |
|
"loss": 1.005, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3864736842105263e-05, |
|
"loss": 0.8998, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3848947368421054e-05, |
|
"loss": 0.9842, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3833157894736842e-05, |
|
"loss": 0.9932, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3817368421052631e-05, |
|
"loss": 0.9029, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3801578947368421e-05, |
|
"loss": 1.0074, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.378578947368421e-05, |
|
"loss": 1.033, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.377e-05, |
|
"loss": 0.9659, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3754210526315789e-05, |
|
"loss": 0.9908, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.373842105263158e-05, |
|
"loss": 0.9173, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.372263157894737e-05, |
|
"loss": 0.946, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3706842105263158e-05, |
|
"loss": 0.9739, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3691052631578949e-05, |
|
"loss": 0.9435, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3675263157894737e-05, |
|
"loss": 0.9885, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3659473684210528e-05, |
|
"loss": 0.893, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3643684210526316e-05, |
|
"loss": 1.0282, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3627894736842107e-05, |
|
"loss": 0.9377, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3612105263157895e-05, |
|
"loss": 1.0104, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3596315789473684e-05, |
|
"loss": 1.0209, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3580526315789474e-05, |
|
"loss": 0.973, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3564736842105263e-05, |
|
"loss": 0.9634, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3548947368421053e-05, |
|
"loss": 0.9921, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3533157894736842e-05, |
|
"loss": 1.0186, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3517368421052632e-05, |
|
"loss": 0.9819, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.350157894736842e-05, |
|
"loss": 0.927, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.348578947368421e-05, |
|
"loss": 0.9331, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3470000000000001e-05, |
|
"loss": 0.9586, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.345421052631579e-05, |
|
"loss": 0.9382, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.343842105263158e-05, |
|
"loss": 0.9198, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3422631578947369e-05, |
|
"loss": 0.9324, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_get_denotation_accuracy": 0.47902869757174393, |
|
"eval_loss": 0.8948884010314941, |
|
"eval_runtime": 232.3445, |
|
"eval_samples_per_second": 17.547, |
|
"eval_steps_per_second": 1.098, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.340684210526316e-05, |
|
"loss": 0.9529, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3391052631578948e-05, |
|
"loss": 0.9848, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3375263157894737e-05, |
|
"loss": 0.968, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3359473684210527e-05, |
|
"loss": 0.9602, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3343684210526316e-05, |
|
"loss": 0.9568, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3327894736842106e-05, |
|
"loss": 1.0069, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3312105263157895e-05, |
|
"loss": 0.9421, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3296315789473685e-05, |
|
"loss": 0.952, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3280526315789474e-05, |
|
"loss": 0.9882, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3264736842105262e-05, |
|
"loss": 1.028, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3248947368421053e-05, |
|
"loss": 0.8531, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3233157894736841e-05, |
|
"loss": 1.0165, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3217368421052632e-05, |
|
"loss": 0.8926, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3201578947368422e-05, |
|
"loss": 0.9704, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3185789473684212e-05, |
|
"loss": 0.9456, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3170000000000001e-05, |
|
"loss": 0.9441, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.315421052631579e-05, |
|
"loss": 0.9143, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.313842105263158e-05, |
|
"loss": 0.9443, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3122631578947369e-05, |
|
"loss": 0.9467, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3106842105263159e-05, |
|
"loss": 0.9355, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3091052631578948e-05, |
|
"loss": 0.9463, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3075263157894738e-05, |
|
"loss": 1.0012, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3059473684210527e-05, |
|
"loss": 0.9949, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3043684210526315e-05, |
|
"loss": 0.9865, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3027894736842106e-05, |
|
"loss": 0.935, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3012105263157894e-05, |
|
"loss": 0.9432, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2996315789473685e-05, |
|
"loss": 0.9185, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2980526315789473e-05, |
|
"loss": 0.9587, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2964736842105263e-05, |
|
"loss": 0.9803, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2948947368421052e-05, |
|
"loss": 1.0278, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2933157894736842e-05, |
|
"loss": 0.9842, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2917368421052633e-05, |
|
"loss": 0.8908, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2901578947368421e-05, |
|
"loss": 0.9412, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2885789473684212e-05, |
|
"loss": 0.8609, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.287e-05, |
|
"loss": 0.9532, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.285421052631579e-05, |
|
"loss": 0.9701, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.283842105263158e-05, |
|
"loss": 0.9302, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2822631578947368e-05, |
|
"loss": 1.0146, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2806842105263158e-05, |
|
"loss": 0.9609, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2791052631578947e-05, |
|
"loss": 0.8539, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2775263157894737e-05, |
|
"loss": 1.0032, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2759473684210526e-05, |
|
"loss": 0.9099, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2743684210526316e-05, |
|
"loss": 0.937, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2727894736842105e-05, |
|
"loss": 0.9219, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2712105263157894e-05, |
|
"loss": 0.9502, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2696315789473684e-05, |
|
"loss": 0.9979, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2680526315789474e-05, |
|
"loss": 0.9364, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2664736842105265e-05, |
|
"loss": 0.9401, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2648947368421053e-05, |
|
"loss": 0.9467, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2633157894736844e-05, |
|
"loss": 0.9598, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_get_denotation_accuracy": 0.46701005641402993, |
|
"eval_loss": 0.9270404577255249, |
|
"eval_runtime": 235.7621, |
|
"eval_samples_per_second": 17.293, |
|
"eval_steps_per_second": 1.082, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2617368421052632e-05, |
|
"loss": 0.9844, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2601578947368421e-05, |
|
"loss": 0.9077, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2585789473684211e-05, |
|
"loss": 0.9628, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.257e-05, |
|
"loss": 0.9696, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.255421052631579e-05, |
|
"loss": 0.9577, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2538421052631579e-05, |
|
"loss": 0.9052, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.252263157894737e-05, |
|
"loss": 0.9735, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2506842105263158e-05, |
|
"loss": 0.9496, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2491052631578947e-05, |
|
"loss": 0.8934, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2475263157894737e-05, |
|
"loss": 0.9669, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2459473684210526e-05, |
|
"loss": 0.8894, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2443684210526316e-05, |
|
"loss": 0.9323, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.2427894736842105e-05, |
|
"loss": 0.8627, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2412105263157897e-05, |
|
"loss": 0.9324, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2396315789473685e-05, |
|
"loss": 0.9671, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2380526315789474e-05, |
|
"loss": 0.9995, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2364736842105264e-05, |
|
"loss": 0.9526, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2348947368421053e-05, |
|
"loss": 0.9975, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2333157894736843e-05, |
|
"loss": 0.8614, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2317368421052632e-05, |
|
"loss": 0.9937, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2301578947368422e-05, |
|
"loss": 0.997, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.228578947368421e-05, |
|
"loss": 0.9323, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.227e-05, |
|
"loss": 0.9201, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.225421052631579e-05, |
|
"loss": 1.0284, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2238421052631578e-05, |
|
"loss": 0.9643, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2222631578947369e-05, |
|
"loss": 0.9623, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2206842105263157e-05, |
|
"loss": 0.9669, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2191052631578948e-05, |
|
"loss": 0.9241, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2175263157894736e-05, |
|
"loss": 0.8715, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2159473684210527e-05, |
|
"loss": 0.9781, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2143684210526317e-05, |
|
"loss": 0.8883, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2127894736842106e-05, |
|
"loss": 1.0113, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2112105263157896e-05, |
|
"loss": 0.9311, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2096315789473685e-05, |
|
"loss": 0.8775, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2080526315789475e-05, |
|
"loss": 0.9003, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2064736842105264e-05, |
|
"loss": 0.9946, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2048947368421052e-05, |
|
"loss": 0.954, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2033157894736843e-05, |
|
"loss": 0.9033, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2017368421052631e-05, |
|
"loss": 1.013, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2001578947368422e-05, |
|
"loss": 0.9724, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.198578947368421e-05, |
|
"loss": 0.9102, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.197e-05, |
|
"loss": 0.8131, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.195421052631579e-05, |
|
"loss": 0.9199, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1938421052631578e-05, |
|
"loss": 0.9115, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1922631578947368e-05, |
|
"loss": 1.0054, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1906842105263157e-05, |
|
"loss": 0.8474, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1891052631578949e-05, |
|
"loss": 0.8419, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1875263157894738e-05, |
|
"loss": 0.8427, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1859473684210528e-05, |
|
"loss": 0.8966, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1843684210526317e-05, |
|
"loss": 0.8734, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_get_denotation_accuracy": 0.47780230561687514, |
|
"eval_loss": 0.9081824421882629, |
|
"eval_runtime": 227.871, |
|
"eval_samples_per_second": 17.892, |
|
"eval_steps_per_second": 1.119, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1827894736842105e-05, |
|
"loss": 0.9143, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1812105263157896e-05, |
|
"loss": 0.839, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1796315789473684e-05, |
|
"loss": 0.8653, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1780526315789474e-05, |
|
"loss": 0.8873, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1764736842105263e-05, |
|
"loss": 0.8528, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1748947368421053e-05, |
|
"loss": 0.8685, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1733157894736842e-05, |
|
"loss": 0.8989, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.171736842105263e-05, |
|
"loss": 0.8823, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1701578947368421e-05, |
|
"loss": 0.9632, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.168578947368421e-05, |
|
"loss": 0.9019, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.167e-05, |
|
"loss": 0.9031, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1654210526315789e-05, |
|
"loss": 0.8701, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.1638421052631579e-05, |
|
"loss": 0.9202, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.162263157894737e-05, |
|
"loss": 0.9147, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1606842105263158e-05, |
|
"loss": 0.8666, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1591052631578948e-05, |
|
"loss": 0.9134, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1575263157894737e-05, |
|
"loss": 0.8758, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1559473684210527e-05, |
|
"loss": 0.8481, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1543684210526316e-05, |
|
"loss": 0.8792, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1527894736842106e-05, |
|
"loss": 0.899, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1512105263157895e-05, |
|
"loss": 0.8806, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1496315789473685e-05, |
|
"loss": 0.7982, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1480526315789474e-05, |
|
"loss": 0.8807, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1464736842105263e-05, |
|
"loss": 0.8392, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.1448947368421053e-05, |
|
"loss": 0.9382, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1433157894736842e-05, |
|
"loss": 0.9007, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1417368421052632e-05, |
|
"loss": 0.9237, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.140157894736842e-05, |
|
"loss": 0.8415, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1385789473684211e-05, |
|
"loss": 0.8427, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.137e-05, |
|
"loss": 0.8881, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.135421052631579e-05, |
|
"loss": 0.8442, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.133842105263158e-05, |
|
"loss": 0.8685, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1322631578947369e-05, |
|
"loss": 0.8822, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.130684210526316e-05, |
|
"loss": 0.8655, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1291052631578948e-05, |
|
"loss": 0.8487, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1275263157894738e-05, |
|
"loss": 0.9276, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.1259473684210527e-05, |
|
"loss": 0.8529, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1243684210526315e-05, |
|
"loss": 0.826, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1227894736842106e-05, |
|
"loss": 0.8792, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1212105263157894e-05, |
|
"loss": 0.8831, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1196315789473685e-05, |
|
"loss": 0.9012, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1180526315789473e-05, |
|
"loss": 0.8368, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1164736842105264e-05, |
|
"loss": 0.8694, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1148947368421052e-05, |
|
"loss": 0.9176, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1133157894736841e-05, |
|
"loss": 0.8446, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1117368421052631e-05, |
|
"loss": 0.8766, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1101578947368422e-05, |
|
"loss": 0.8434, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1085789473684212e-05, |
|
"loss": 0.8835, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.107e-05, |
|
"loss": 0.7653, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.1054210526315791e-05, |
|
"loss": 0.9371, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_get_denotation_accuracy": 0.46848172675987243, |
|
"eval_loss": 0.9864445924758911, |
|
"eval_runtime": 239.8054, |
|
"eval_samples_per_second": 17.001, |
|
"eval_steps_per_second": 1.063, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.103842105263158e-05, |
|
"loss": 0.8836, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.1022631578947368e-05, |
|
"loss": 0.8513, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.1006842105263159e-05, |
|
"loss": 0.8696, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0991052631578947e-05, |
|
"loss": 0.8958, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0975263157894738e-05, |
|
"loss": 0.8015, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0959473684210526e-05, |
|
"loss": 0.8608, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0943684210526317e-05, |
|
"loss": 0.8399, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0927894736842105e-05, |
|
"loss": 0.9047, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0912105263157894e-05, |
|
"loss": 0.9121, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0896315789473684e-05, |
|
"loss": 0.9045, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0880526315789473e-05, |
|
"loss": 0.8474, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.0864736842105263e-05, |
|
"loss": 0.8228, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0848947368421052e-05, |
|
"loss": 0.819, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0833157894736844e-05, |
|
"loss": 0.9136, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0817368421052633e-05, |
|
"loss": 0.8869, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0801578947368421e-05, |
|
"loss": 0.8792, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0785789473684212e-05, |
|
"loss": 0.9296, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.077e-05, |
|
"loss": 0.9138, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.075421052631579e-05, |
|
"loss": 0.8427, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.073842105263158e-05, |
|
"loss": 0.8654, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.072263157894737e-05, |
|
"loss": 0.8661, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0706842105263158e-05, |
|
"loss": 0.8933, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0691052631578947e-05, |
|
"loss": 0.9463, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0675263157894737e-05, |
|
"loss": 0.8524, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.0659473684210526e-05, |
|
"loss": 0.9295, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0643684210526316e-05, |
|
"loss": 0.8591, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0627894736842105e-05, |
|
"loss": 0.8902, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0612105263157895e-05, |
|
"loss": 0.8687, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0596315789473684e-05, |
|
"loss": 0.833, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0580526315789474e-05, |
|
"loss": 0.8154, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0564736842105264e-05, |
|
"loss": 0.84, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0548947368421053e-05, |
|
"loss": 0.8698, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0533157894736843e-05, |
|
"loss": 0.8736, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0517368421052632e-05, |
|
"loss": 0.8258, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0501578947368422e-05, |
|
"loss": 0.8427, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.0485789473684211e-05, |
|
"loss": 0.8363, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.047e-05, |
|
"loss": 0.8662, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.045421052631579e-05, |
|
"loss": 0.8508, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0438421052631579e-05, |
|
"loss": 0.868, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0422631578947369e-05, |
|
"loss": 0.8478, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0406842105263158e-05, |
|
"loss": 0.8924, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0391052631578948e-05, |
|
"loss": 0.8992, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0375263157894737e-05, |
|
"loss": 0.9264, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0359473684210525e-05, |
|
"loss": 0.9336, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0343684210526316e-05, |
|
"loss": 0.83, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0327894736842104e-05, |
|
"loss": 0.8183, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0312105263157896e-05, |
|
"loss": 0.903, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0296315789473685e-05, |
|
"loss": 0.8715, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0280526315789475e-05, |
|
"loss": 0.8773, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.0264736842105264e-05, |
|
"loss": 0.8945, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_get_denotation_accuracy": 0.47657591366200636, |
|
"eval_loss": 0.9642069339752197, |
|
"eval_runtime": 237.1275, |
|
"eval_samples_per_second": 17.193, |
|
"eval_steps_per_second": 1.075, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0248947368421053e-05, |
|
"loss": 0.8971, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0233157894736843e-05, |
|
"loss": 0.8717, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0217368421052632e-05, |
|
"loss": 0.9304, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0201578947368422e-05, |
|
"loss": 0.974, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.018578947368421e-05, |
|
"loss": 0.8908, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0170000000000001e-05, |
|
"loss": 0.8936, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.015421052631579e-05, |
|
"loss": 0.8176, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0138421052631578e-05, |
|
"loss": 0.839, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0122631578947369e-05, |
|
"loss": 0.9415, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0106842105263157e-05, |
|
"loss": 0.8843, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0091052631578947e-05, |
|
"loss": 0.877, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.0075263157894736e-05, |
|
"loss": 0.9242, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.0059473684210526e-05, |
|
"loss": 0.9133, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.0043684210526317e-05, |
|
"loss": 0.9046, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.0027894736842105e-05, |
|
"loss": 0.9339, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.0012105263157896e-05, |
|
"loss": 0.8824, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.996315789473684e-06, |
|
"loss": 0.8547, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.980526315789475e-06, |
|
"loss": 0.8779, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.964736842105263e-06, |
|
"loss": 0.8072, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.948947368421054e-06, |
|
"loss": 0.927, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.933157894736842e-06, |
|
"loss": 0.8547, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.917368421052631e-06, |
|
"loss": 0.9088, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.901578947368421e-06, |
|
"loss": 0.9095, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.88578947368421e-06, |
|
"loss": 0.8842, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.87e-06, |
|
"loss": 0.8726, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.854210526315789e-06, |
|
"loss": 0.8749, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.83842105263158e-06, |
|
"loss": 0.8684, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.822631578947368e-06, |
|
"loss": 0.9212, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.806842105263157e-06, |
|
"loss": 0.8891, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.791052631578947e-06, |
|
"loss": 0.7833, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.775263157894737e-06, |
|
"loss": 0.9201, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.759473684210528e-06, |
|
"loss": 0.8358, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.743684210526316e-06, |
|
"loss": 0.8905, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.727894736842107e-06, |
|
"loss": 0.8893, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.712105263157895e-06, |
|
"loss": 0.853, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.696315789473684e-06, |
|
"loss": 0.9108, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.680526315789474e-06, |
|
"loss": 0.8194, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.664736842105263e-06, |
|
"loss": 0.8473, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.648947368421053e-06, |
|
"loss": 0.8879, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.633157894736842e-06, |
|
"loss": 0.8999, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.617368421052632e-06, |
|
"loss": 0.9429, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.601578947368421e-06, |
|
"loss": 0.8843, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.58578947368421e-06, |
|
"loss": 0.8704, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.57e-06, |
|
"loss": 0.9064, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.554210526315788e-06, |
|
"loss": 0.8679, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.538421052631579e-06, |
|
"loss": 0.8678, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.52263157894737e-06, |
|
"loss": 0.9391, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.50684210526316e-06, |
|
"loss": 0.8387, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.491052631578948e-06, |
|
"loss": 0.8408, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.475263157894737e-06, |
|
"loss": 0.9746, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_get_denotation_accuracy": 0.4736325729703213, |
|
"eval_loss": 0.96991366147995, |
|
"eval_runtime": 250.0702, |
|
"eval_samples_per_second": 16.303, |
|
"eval_steps_per_second": 1.02, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.459473684210527e-06, |
|
"loss": 0.8949, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.443684210526316e-06, |
|
"loss": 0.8376, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.427894736842106e-06, |
|
"loss": 0.848, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.412105263157895e-06, |
|
"loss": 0.8501, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.396315789473685e-06, |
|
"loss": 0.8243, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.380526315789474e-06, |
|
"loss": 0.8642, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.364736842105262e-06, |
|
"loss": 0.8485, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.348947368421053e-06, |
|
"loss": 0.9008, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.333157894736841e-06, |
|
"loss": 0.8867, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.317368421052632e-06, |
|
"loss": 0.9288, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.30157894736842e-06, |
|
"loss": 0.7863, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.28578947368421e-06, |
|
"loss": 0.9072, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.27e-06, |
|
"loss": 0.8691, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.25421052631579e-06, |
|
"loss": 0.8931, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.23842105263158e-06, |
|
"loss": 0.9736, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.222631578947369e-06, |
|
"loss": 0.8218, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.206842105263159e-06, |
|
"loss": 0.9309, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.191052631578948e-06, |
|
"loss": 0.8458, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.175263157894738e-06, |
|
"loss": 0.8162, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.159473684210527e-06, |
|
"loss": 0.8747, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.143684210526315e-06, |
|
"loss": 0.8609, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.127894736842106e-06, |
|
"loss": 0.8826, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.112105263157894e-06, |
|
"loss": 0.8272, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.096315789473685e-06, |
|
"loss": 0.8725, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.080526315789473e-06, |
|
"loss": 0.8752, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.064736842105264e-06, |
|
"loss": 0.8297, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.048947368421052e-06, |
|
"loss": 0.9066, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.033157894736841e-06, |
|
"loss": 0.8478, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.017368421052631e-06, |
|
"loss": 0.8775, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.001578947368422e-06, |
|
"loss": 0.8973, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.985789473684212e-06, |
|
"loss": 0.842, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.97e-06, |
|
"loss": 0.8286, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.954210526315791e-06, |
|
"loss": 0.8781, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.93842105263158e-06, |
|
"loss": 0.8913, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.92263157894737e-06, |
|
"loss": 0.8063, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.906842105263158e-06, |
|
"loss": 0.8941, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.891052631578947e-06, |
|
"loss": 0.8975, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.875263157894737e-06, |
|
"loss": 0.8663, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.859473684210526e-06, |
|
"loss": 0.8858, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.843684210526316e-06, |
|
"loss": 0.9231, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.827894736842105e-06, |
|
"loss": 0.8422, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.812105263157895e-06, |
|
"loss": 0.8881, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.796315789473684e-06, |
|
"loss": 0.8195, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.780526315789473e-06, |
|
"loss": 0.899, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.764736842105263e-06, |
|
"loss": 0.8601, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.748947368421052e-06, |
|
"loss": 0.9079, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.733157894736844e-06, |
|
"loss": 0.8702, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.717368421052632e-06, |
|
"loss": 0.8858, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.701578947368423e-06, |
|
"loss": 0.9535, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.685789473684211e-06, |
|
"loss": 0.8338, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_get_denotation_accuracy": 0.4780475840078489, |
|
"eval_loss": 0.9859474301338196, |
|
"eval_runtime": 240.5029, |
|
"eval_samples_per_second": 16.952, |
|
"eval_steps_per_second": 1.06, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.67e-06, |
|
"loss": 0.822, |
|
"step": 14510 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.65421052631579e-06, |
|
"loss": 0.8478, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.638421052631579e-06, |
|
"loss": 0.86, |
|
"step": 14530 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.62263157894737e-06, |
|
"loss": 0.9104, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.606842105263158e-06, |
|
"loss": 0.9088, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.591052631578948e-06, |
|
"loss": 0.9169, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.575263157894737e-06, |
|
"loss": 0.8256, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.559473684210526e-06, |
|
"loss": 0.9072, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.543684210526316e-06, |
|
"loss": 0.9258, |
|
"step": 14590 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.527894736842105e-06, |
|
"loss": 0.8833, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.512105263157895e-06, |
|
"loss": 0.7958, |
|
"step": 14610 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.496315789473684e-06, |
|
"loss": 0.8463, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.480526315789474e-06, |
|
"loss": 0.8489, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.464736842105264e-06, |
|
"loss": 0.8576, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.448947368421053e-06, |
|
"loss": 0.8827, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.433157894736843e-06, |
|
"loss": 0.9005, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.417368421052632e-06, |
|
"loss": 0.932, |
|
"step": 14670 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.401578947368422e-06, |
|
"loss": 0.8623, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.38578947368421e-06, |
|
"loss": 0.9182, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.370000000000001e-06, |
|
"loss": 0.8711, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.35421052631579e-06, |
|
"loss": 0.8164, |
|
"step": 14710 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.338421052631578e-06, |
|
"loss": 0.8631, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.322631578947369e-06, |
|
"loss": 0.9048, |
|
"step": 14730 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.306842105263157e-06, |
|
"loss": 0.9186, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.291052631578948e-06, |
|
"loss": 0.9031, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.275263157894736e-06, |
|
"loss": 0.8455, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.259473684210527e-06, |
|
"loss": 0.8546, |
|
"step": 14770 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.243684210526315e-06, |
|
"loss": 0.833, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.227894736842104e-06, |
|
"loss": 0.9135, |
|
"step": 14790 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.212105263157894e-06, |
|
"loss": 0.8699, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.196315789473685e-06, |
|
"loss": 0.8699, |
|
"step": 14810 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.180526315789475e-06, |
|
"loss": 0.8913, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.164736842105264e-06, |
|
"loss": 0.9578, |
|
"step": 14830 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.148947368421054e-06, |
|
"loss": 0.874, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.133157894736843e-06, |
|
"loss": 0.9424, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.117368421052631e-06, |
|
"loss": 0.8701, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.101578947368422e-06, |
|
"loss": 0.7962, |
|
"step": 14870 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.08578947368421e-06, |
|
"loss": 0.8808, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.07e-06, |
|
"loss": 0.8857, |
|
"step": 14890 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.05421052631579e-06, |
|
"loss": 0.9239, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.03842105263158e-06, |
|
"loss": 0.9216, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.022631578947368e-06, |
|
"loss": 0.8288, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.006842105263157e-06, |
|
"loss": 0.868, |
|
"step": 14930 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.991052631578947e-06, |
|
"loss": 0.8339, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.975263157894736e-06, |
|
"loss": 0.9501, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.959473684210526e-06, |
|
"loss": 0.9601, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.943684210526317e-06, |
|
"loss": 0.8391, |
|
"step": 14970 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.927894736842107e-06, |
|
"loss": 0.8242, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.912105263157896e-06, |
|
"loss": 0.8619, |
|
"step": 14990 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.896315789473684e-06, |
|
"loss": 0.9129, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_get_denotation_accuracy": 0.4890851116016679, |
|
"eval_loss": 0.9386902451515198, |
|
"eval_runtime": 230.2048, |
|
"eval_samples_per_second": 17.71, |
|
"eval_steps_per_second": 1.108, |
|
"step": 15000 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 2, |
|
"total_flos": 5.687599974087393e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|