|
{ |
|
"best_metric": 2.7371606826782227, |
|
"best_model_checkpoint": "/data1/attanasiog/babylm/roberta-tiny-10M/checkpoint-4150", |
|
"epoch": 89.57546563904945, |
|
"global_step": 4300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8e-05, |
|
"loss": 10.4287, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00016, |
|
"loss": 9.0477, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00024, |
|
"loss": 7.8228, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00032, |
|
"loss": 7.3343, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0004, |
|
"loss": 7.8031, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.06061240850112075, |
|
"eval_loss": 7.355990886688232, |
|
"eval_runtime": 145.9523, |
|
"eval_samples_per_second": 164.814, |
|
"eval_steps_per_second": 5.152, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.000399995625676045, |
|
"loss": 7.2898, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0003999825028955268, |
|
"loss": 7.1829, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0003999606322324786, |
|
"loss": 7.0831, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0003999300146435939, |
|
"loss": 6.8807, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00039989065146818525, |
|
"loss": 7.1948, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.11823707432860285, |
|
"eval_loss": 6.737408638000488, |
|
"eval_runtime": 145.8622, |
|
"eval_samples_per_second": 164.916, |
|
"eval_steps_per_second": 5.156, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0003998425444281255, |
|
"loss": 6.659, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00039978569562777234, |
|
"loss": 6.5924, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0003997201075538765, |
|
"loss": 6.5237, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0003996457830754729, |
|
"loss": 6.4927, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00039956272544375493, |
|
"loss": 6.8927, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.1414597356195163, |
|
"eval_loss": 6.502169609069824, |
|
"eval_runtime": 145.9635, |
|
"eval_samples_per_second": 164.801, |
|
"eval_steps_per_second": 5.152, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00039947093829193245, |
|
"loss": 6.4155, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00039937042563507283, |
|
"loss": 6.4041, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.00039926119186992537, |
|
"loss": 6.3875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0003991432417747288, |
|
"loss": 6.3543, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0003990165805090023, |
|
"loss": 6.7339, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_accuracy": 0.1482938589304516, |
|
"eval_loss": 6.400519847869873, |
|
"eval_runtime": 145.8639, |
|
"eval_samples_per_second": 164.914, |
|
"eval_steps_per_second": 5.155, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00039888121361332003, |
|
"loss": 6.3085, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0003987371470090686, |
|
"loss": 6.3213, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00039858438699818784, |
|
"loss": 6.2931, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0003984229402628956, |
|
"loss": 6.2716, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00039825281386539503, |
|
"loss": 6.6609, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_accuracy": 0.1509599365008845, |
|
"eval_loss": 6.3535308837890625, |
|
"eval_runtime": 145.9161, |
|
"eval_samples_per_second": 164.855, |
|
"eval_steps_per_second": 5.154, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.000398074015247566, |
|
"loss": 6.2501, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0003978865522306392, |
|
"loss": 6.2436, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0003976904330148543, |
|
"loss": 6.2418, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00039748566617910113, |
|
"loss": 6.6426, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.0003972722606805445, |
|
"loss": 6.1972, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_accuracy": 0.15188271193711186, |
|
"eval_loss": 6.332435607910156, |
|
"eval_runtime": 146.0198, |
|
"eval_samples_per_second": 164.738, |
|
"eval_steps_per_second": 5.15, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00039705022585423216, |
|
"loss": 6.2183, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0003968195714126868, |
|
"loss": 6.1899, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00039658030744548075, |
|
"loss": 6.192, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.0003963324444187952, |
|
"loss": 6.5971, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.0003960759931749619, |
|
"loss": 6.1685, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy": 0.15276707185574287, |
|
"eval_loss": 6.302943706512451, |
|
"eval_runtime": 145.9601, |
|
"eval_samples_per_second": 164.805, |
|
"eval_steps_per_second": 5.152, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00039581096493198893, |
|
"loss": 6.1653, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0003955373712830703, |
|
"loss": 6.1623, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00039525522419607854, |
|
"loss": 6.1604, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.0003949645360130412, |
|
"loss": 6.5496, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.0003946653194496012, |
|
"loss": 6.1302, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_accuracy": 0.152128546451089, |
|
"eval_loss": 6.2827558517456055, |
|
"eval_runtime": 145.9935, |
|
"eval_samples_per_second": 164.768, |
|
"eval_steps_per_second": 5.151, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00039435758759446025, |
|
"loss": 6.1368, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.00039404135390880664, |
|
"loss": 6.1171, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0003937166322257262, |
|
"loss": 6.1463, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.00039338343674959745, |
|
"loss": 6.537, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.00039304178205546976, |
|
"loss": 6.093, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"eval_accuracy": 0.15364162638834264, |
|
"eval_loss": 6.256844520568848, |
|
"eval_runtime": 146.039, |
|
"eval_samples_per_second": 164.716, |
|
"eval_steps_per_second": 5.149, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.00039269168308842634, |
|
"loss": 6.0973, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.00039233315516293006, |
|
"loss": 6.1012, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.00039196621396215403, |
|
"loss": 6.0809, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.000391590875537295, |
|
"loss": 6.4765, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 0.00039120715630687155, |
|
"loss": 6.0543, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"eval_accuracy": 0.15444620739515735, |
|
"eval_loss": 6.24298620223999, |
|
"eval_runtime": 145.9243, |
|
"eval_samples_per_second": 164.846, |
|
"eval_steps_per_second": 5.153, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.000390815073056006, |
|
"loss": 6.0777, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 0.00039041464293568983, |
|
"loss": 6.0697, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.00039000588346203374, |
|
"loss": 6.4636, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 0.0003895888125155014, |
|
"loss": 6.0487, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 0.00038916344834012695, |
|
"loss": 6.0479, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"eval_accuracy": 0.1541217862327054, |
|
"eval_loss": 6.234572887420654, |
|
"eval_runtime": 145.8799, |
|
"eval_samples_per_second": 164.896, |
|
"eval_steps_per_second": 5.155, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.00038872980954271757, |
|
"loss": 6.0617, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00038828791509203895, |
|
"loss": 6.0441, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 0.00038783778431798597, |
|
"loss": 6.4461, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.0003873794369107369, |
|
"loss": 6.0258, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 0.0003869128929198922, |
|
"loss": 6.0372, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"eval_accuracy": 0.1545538581772011, |
|
"eval_loss": 6.223215103149414, |
|
"eval_runtime": 145.9665, |
|
"eval_samples_per_second": 164.798, |
|
"eval_steps_per_second": 5.152, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 0.0003864381727535973, |
|
"loss": 6.0353, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.00038595529717765027, |
|
"loss": 6.041, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 0.0003854642873145931, |
|
"loss": 6.4207, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00038496516464278776, |
|
"loss": 6.006, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 0.00038445795099547697, |
|
"loss": 6.0127, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"eval_accuracy": 0.15411265298876436, |
|
"eval_loss": 6.213912010192871, |
|
"eval_runtime": 145.9328, |
|
"eval_samples_per_second": 164.836, |
|
"eval_steps_per_second": 5.153, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 0.0003839426685598287, |
|
"loss": 6.0006, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 0.000383419339875966, |
|
"loss": 6.0152, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.00038288798783598087, |
|
"loss": 6.3908, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 0.0003823486356829329, |
|
"loss": 5.9744, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 0.0003818013070098325, |
|
"loss": 5.968, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"eval_accuracy": 0.15472111663693397, |
|
"eval_loss": 6.20527458190918, |
|
"eval_runtime": 145.9446, |
|
"eval_samples_per_second": 164.823, |
|
"eval_steps_per_second": 5.153, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 0.0003812460257586089, |
|
"loss": 5.9813, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.000380682816219063, |
|
"loss": 6.0108, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 0.00038011170302780446, |
|
"loss": 6.3495, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 0.00037953271116717444, |
|
"loss": 5.9708, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 0.0003789458659641527, |
|
"loss": 5.9635, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_accuracy": 0.15486276242328167, |
|
"eval_loss": 6.199557781219482, |
|
"eval_runtime": 145.9791, |
|
"eval_samples_per_second": 164.784, |
|
"eval_steps_per_second": 5.151, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 0.0003783511930892495, |
|
"loss": 5.9756, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 0.00037774871855538275, |
|
"loss": 6.3631, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 0.00037713846871674045, |
|
"loss": 5.9497, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 0.0003765204702676274, |
|
"loss": 5.9433, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 0.0003758947502412978, |
|
"loss": 5.9479, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"eval_accuracy": 0.15478355696794033, |
|
"eval_loss": 6.195274353027344, |
|
"eval_runtime": 145.939, |
|
"eval_samples_per_second": 164.829, |
|
"eval_steps_per_second": 5.153, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.0003752613360087727, |
|
"loss": 5.9614, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 17.08, |
|
"learning_rate": 0.00037462025527764265, |
|
"loss": 6.326, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 0.00037397153609085553, |
|
"loss": 5.9293, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 0.0003733152068254901, |
|
"loss": 5.9305, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 0.00037265129619151483, |
|
"loss": 5.9371, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_accuracy": 0.15451778319531595, |
|
"eval_loss": 6.1887054443359375, |
|
"eval_runtime": 145.8431, |
|
"eval_samples_per_second": 164.938, |
|
"eval_steps_per_second": 5.156, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 0.00037197983323053143, |
|
"loss": 5.9348, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 0.00037130084731450515, |
|
"loss": 6.2994, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 0.0003706143681444795, |
|
"loss": 5.8969, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 0.0003699204257492774, |
|
"loss": 5.9219, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 0.0003692190504841871, |
|
"loss": 5.9046, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"eval_accuracy": 0.1545486653674884, |
|
"eval_loss": 6.161332130432129, |
|
"eval_runtime": 145.9406, |
|
"eval_samples_per_second": 164.827, |
|
"eval_steps_per_second": 5.153, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 0.00036851027302963493, |
|
"loss": 5.9011, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 0.00036779412438984294, |
|
"loss": 6.2593, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"learning_rate": 0.0003670706358914725, |
|
"loss": 5.8755, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 0.0003663398391822543, |
|
"loss": 5.8396, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.00036560176622960403, |
|
"loss": 5.8368, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"eval_accuracy": 0.15570189218059025, |
|
"eval_loss": 6.095159530639648, |
|
"eval_runtime": 145.7599, |
|
"eval_samples_per_second": 165.032, |
|
"eval_steps_per_second": 5.159, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.00036485644931922353, |
|
"loss": 5.8184, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 0.0003641039210536889, |
|
"loss": 6.1866, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 20.41, |
|
"learning_rate": 0.0003633442143510245, |
|
"loss": 5.7848, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 0.00036257736244326246, |
|
"loss": 5.7807, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"learning_rate": 0.0003618033988749895, |
|
"loss": 5.7914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"eval_accuracy": 0.15694020859066315, |
|
"eval_loss": 6.032991409301758, |
|
"eval_runtime": 145.9881, |
|
"eval_samples_per_second": 164.774, |
|
"eval_steps_per_second": 5.151, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.04, |
|
"learning_rate": 0.0003610223575018795, |
|
"loss": 6.1552, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 0.00036023427248921215, |
|
"loss": 5.7428, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"learning_rate": 0.0003594391783103792, |
|
"loss": 5.7276, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 21.66, |
|
"learning_rate": 0.00035863710974537563, |
|
"loss": 5.7289, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 0.00035782810187927875, |
|
"loss": 5.7026, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"eval_accuracy": 0.16123595961673237, |
|
"eval_loss": 5.942953109741211, |
|
"eval_runtime": 145.9911, |
|
"eval_samples_per_second": 164.77, |
|
"eval_steps_per_second": 5.151, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"learning_rate": 0.0003570121901007136, |
|
"loss": 6.0423, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 0.0003561894101003044, |
|
"loss": 5.6495, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"learning_rate": 0.00035535979786911396, |
|
"loss": 5.6223, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 0.00035452338969706876, |
|
"loss": 5.5675, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 0.00035368022217137184, |
|
"loss": 5.491, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"eval_accuracy": 0.19736824293775215, |
|
"eval_loss": 5.609994888305664, |
|
"eval_runtime": 146.0961, |
|
"eval_samples_per_second": 164.652, |
|
"eval_steps_per_second": 5.147, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 23.12, |
|
"learning_rate": 0.00035283033217490227, |
|
"loss": 5.6961, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 0.00035197375688460176, |
|
"loss": 5.239, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 0.0003511105337698484, |
|
"loss": 5.1252, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 0.0003502407005908177, |
|
"loss": 5.0182, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"learning_rate": 0.0003493642953968308, |
|
"loss": 4.9289, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_accuracy": 0.27019214299497635, |
|
"eval_loss": 4.960735321044922, |
|
"eval_runtime": 146.1516, |
|
"eval_samples_per_second": 164.589, |
|
"eval_steps_per_second": 5.145, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"learning_rate": 0.00034848135652469, |
|
"loss": 5.1346, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"learning_rate": 0.00034759192259700196, |
|
"loss": 4.7377, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 0.000346696032520488, |
|
"loss": 4.6538, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 0.00034579372548428235, |
|
"loss": 4.608, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 0.00034488504095821784, |
|
"loss": 4.5214, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.3050591252908655, |
|
"eval_loss": 4.579548358917236, |
|
"eval_runtime": 146.015, |
|
"eval_samples_per_second": 164.743, |
|
"eval_steps_per_second": 5.15, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 0.0003439700186910993, |
|
"loss": 4.7508, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"learning_rate": 0.00034304869870896513, |
|
"loss": 4.4132, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 0.00034212112131333587, |
|
"loss": 4.3489, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 25.82, |
|
"learning_rate": 0.0003411873270794518, |
|
"loss": 4.3454, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 26.04, |
|
"learning_rate": 0.00034024735685449773, |
|
"loss": 4.5663, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 26.04, |
|
"eval_accuracy": 0.32645309469898054, |
|
"eval_loss": 4.345365047454834, |
|
"eval_runtime": 146.0915, |
|
"eval_samples_per_second": 164.657, |
|
"eval_steps_per_second": 5.147, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"learning_rate": 0.00033930125175581647, |
|
"loss": 4.2188, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 26.45, |
|
"learning_rate": 0.0003383490531691099, |
|
"loss": 4.1928, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 26.66, |
|
"learning_rate": 0.0003373908027466289, |
|
"loss": 4.1575, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 0.00033642654240535134, |
|
"loss": 4.1106, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"learning_rate": 0.00033545631432514825, |
|
"loss": 4.3717, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"eval_accuracy": 0.3412254938630985, |
|
"eval_loss": 4.1738104820251465, |
|
"eval_runtime": 145.9707, |
|
"eval_samples_per_second": 164.793, |
|
"eval_steps_per_second": 5.152, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 27.29, |
|
"learning_rate": 0.00033448016094693895, |
|
"loss": 4.007, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 27.49, |
|
"learning_rate": 0.0003334981249708345, |
|
"loss": 4.003, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"learning_rate": 0.00033251024935427, |
|
"loss": 3.9491, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 0.0003315165773101249, |
|
"loss": 3.9411, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 0.00033051715230483374, |
|
"loss": 4.1483, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"eval_accuracy": 0.35552299245507185, |
|
"eval_loss": 4.033575534820557, |
|
"eval_runtime": 145.9738, |
|
"eval_samples_per_second": 164.79, |
|
"eval_steps_per_second": 5.152, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 0.0003295120180564838, |
|
"loss": 3.8395, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"learning_rate": 0.00032850121853290334, |
|
"loss": 3.8271, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 0.000327484797949738, |
|
"loss": 3.8272, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 0.00032646280076851684, |
|
"loss": 3.7855, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 0.0003254352716947074, |
|
"loss": 3.9988, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"eval_accuracy": 0.3677331361148426, |
|
"eval_loss": 3.91800594329834, |
|
"eval_runtime": 146.056, |
|
"eval_samples_per_second": 164.697, |
|
"eval_steps_per_second": 5.149, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 29.37, |
|
"learning_rate": 0.0003244022556757602, |
|
"loss": 3.7379, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 0.0003233637978991422, |
|
"loss": 3.6974, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 0.00032231994379036086, |
|
"loss": 3.6966, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 0.0003212707390109765, |
|
"loss": 3.6594, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 0.00032021622945660504, |
|
"loss": 3.8695, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"eval_accuracy": 0.37818666192863265, |
|
"eval_loss": 3.81080961227417, |
|
"eval_runtime": 146.0723, |
|
"eval_samples_per_second": 164.679, |
|
"eval_steps_per_second": 5.148, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"learning_rate": 0.0003191564612549106, |
|
"loss": 3.598, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 30.62, |
|
"learning_rate": 0.0003180914807635874, |
|
"loss": 3.5942, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"learning_rate": 0.00031702133456833236, |
|
"loss": 3.585, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 31.04, |
|
"learning_rate": 0.00031594606948080663, |
|
"loss": 3.7908, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 0.00031486573253658874, |
|
"loss": 3.5017, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"eval_accuracy": 0.3878577124364749, |
|
"eval_loss": 3.7240185737609863, |
|
"eval_runtime": 145.8744, |
|
"eval_samples_per_second": 164.902, |
|
"eval_steps_per_second": 5.155, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"learning_rate": 0.00031378037099311627, |
|
"loss": 3.5206, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 31.66, |
|
"learning_rate": 0.00031269003232761933, |
|
"loss": 3.5049, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 0.0003115947642350433, |
|
"loss": 3.4852, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 0.00031049461462596267, |
|
"loss": 3.6894, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"learning_rate": 0.00030938963162448544, |
|
"loss": 3.4311, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"eval_accuracy": 0.3973612765821424, |
|
"eval_loss": 3.6425790786743164, |
|
"eval_runtime": 146.1194, |
|
"eval_samples_per_second": 164.626, |
|
"eval_steps_per_second": 5.146, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 32.49, |
|
"learning_rate": 0.0003082798635661476, |
|
"loss": 3.4258, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 32.7, |
|
"learning_rate": 0.0003071653589957993, |
|
"loss": 3.4076, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"learning_rate": 0.000306046166665481, |
|
"loss": 3.4117, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 33.12, |
|
"learning_rate": 0.00030492233553229076, |
|
"loss": 3.5985, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00030379391475624304, |
|
"loss": 3.3517, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_accuracy": 0.40682330082568596, |
|
"eval_loss": 3.5615479946136475, |
|
"eval_runtime": 146.0666, |
|
"eval_samples_per_second": 164.685, |
|
"eval_steps_per_second": 5.148, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 33.53, |
|
"learning_rate": 0.0003026609536981183, |
|
"loss": 3.3431, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 33.74, |
|
"learning_rate": 0.0003015235019173034, |
|
"loss": 3.3546, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 33.95, |
|
"learning_rate": 0.00030038160916962404, |
|
"loss": 3.3378, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 34.16, |
|
"learning_rate": 0.00029923532540516843, |
|
"loss": 3.5305, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 34.37, |
|
"learning_rate": 0.00029808470076610167, |
|
"loss": 3.2856, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 34.37, |
|
"eval_accuracy": 0.41555171151451314, |
|
"eval_loss": 3.4914703369140625, |
|
"eval_runtime": 146.1721, |
|
"eval_samples_per_second": 164.566, |
|
"eval_steps_per_second": 5.145, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"learning_rate": 0.00029692978558447305, |
|
"loss": 3.273, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 0.0002957706303800139, |
|
"loss": 3.278, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"learning_rate": 0.0002946072858579282, |
|
"loss": 3.2614, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 0.0002934398029066739, |
|
"loss": 3.4456, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 35.41, |
|
"learning_rate": 0.0002922682325957376, |
|
"loss": 3.227, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 35.41, |
|
"eval_accuracy": 0.4255488250901363, |
|
"eval_loss": 3.41792893409729, |
|
"eval_runtime": 146.0068, |
|
"eval_samples_per_second": 164.753, |
|
"eval_steps_per_second": 5.15, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 35.62, |
|
"learning_rate": 0.00029109262617339987, |
|
"loss": 3.1995, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 35.82, |
|
"learning_rate": 0.0002899130350644941, |
|
"loss": 3.2058, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 0.00028872951086815685, |
|
"loss": 3.4183, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 36.25, |
|
"learning_rate": 0.00028754210535557036, |
|
"loss": 3.1514, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 36.45, |
|
"learning_rate": 0.00028635087046769857, |
|
"loss": 3.1675, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 36.45, |
|
"eval_accuracy": 0.43245804160401624, |
|
"eval_loss": 3.3635590076446533, |
|
"eval_runtime": 146.1639, |
|
"eval_samples_per_second": 164.575, |
|
"eval_steps_per_second": 5.145, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 36.66, |
|
"learning_rate": 0.00028515585831301456, |
|
"loss": 3.1645, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0002839571211652212, |
|
"loss": 3.1617, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"learning_rate": 0.00028275471146096466, |
|
"loss": 3.3333, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"learning_rate": 0.00028154868179754074, |
|
"loss": 3.1167, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 37.49, |
|
"learning_rate": 0.0002803390849305939, |
|
"loss": 3.0908, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 37.49, |
|
"eval_accuracy": 0.43940471782078516, |
|
"eval_loss": 3.30828595161438, |
|
"eval_runtime": 146.1043, |
|
"eval_samples_per_second": 164.643, |
|
"eval_steps_per_second": 5.147, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"learning_rate": 0.0002791259737718097, |
|
"loss": 3.1214, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 37.9, |
|
"learning_rate": 0.0002779094013866001, |
|
"loss": 3.0987, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 38.12, |
|
"learning_rate": 0.00027668942099178234, |
|
"loss": 3.2767, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 38.33, |
|
"learning_rate": 0.00027546608595325117, |
|
"loss": 3.0716, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 38.53, |
|
"learning_rate": 0.00027423944978364416, |
|
"loss": 3.0561, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 38.53, |
|
"eval_accuracy": 0.44727625227121054, |
|
"eval_loss": 3.25723934173584, |
|
"eval_runtime": 145.9616, |
|
"eval_samples_per_second": 164.804, |
|
"eval_steps_per_second": 5.152, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"learning_rate": 0.00027300956614000115, |
|
"loss": 3.0564, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"learning_rate": 0.00027177648882141704, |
|
"loss": 3.0583, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 39.16, |
|
"learning_rate": 0.0002705402717666883, |
|
"loss": 3.2319, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 39.37, |
|
"learning_rate": 0.00026930096905195363, |
|
"loss": 3.0204, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 39.58, |
|
"learning_rate": 0.00026805863488832865, |
|
"loss": 3.0139, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 39.58, |
|
"eval_accuracy": 0.4525324485267982, |
|
"eval_loss": 3.215851306915283, |
|
"eval_runtime": 146.1327, |
|
"eval_samples_per_second": 164.611, |
|
"eval_steps_per_second": 5.146, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 39.78, |
|
"learning_rate": 0.00026681332361953424, |
|
"loss": 3.0053, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 0.0002655650897195195, |
|
"loss": 3.0171, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 40.21, |
|
"learning_rate": 0.0002643139877900791, |
|
"loss": 3.1749, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 40.41, |
|
"learning_rate": 0.00026306007255846436, |
|
"loss": 2.9764, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"learning_rate": 0.00026180339887498953, |
|
"loss": 2.9837, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"eval_accuracy": 0.45754832554207525, |
|
"eval_loss": 3.1789309978485107, |
|
"eval_runtime": 146.1778, |
|
"eval_samples_per_second": 164.56, |
|
"eval_steps_per_second": 5.144, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 40.82, |
|
"learning_rate": 0.00026054402171063267, |
|
"loss": 2.9752, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 41.04, |
|
"learning_rate": 0.0002592819961546308, |
|
"loss": 3.1648, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 41.25, |
|
"learning_rate": 0.00025801737741207005, |
|
"loss": 2.9438, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 41.45, |
|
"learning_rate": 0.000256750220801471, |
|
"loss": 2.941, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 41.66, |
|
"learning_rate": 0.0002554805817523689, |
|
"loss": 2.9387, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 41.66, |
|
"eval_accuracy": 0.46179467604077673, |
|
"eval_loss": 3.1430864334106445, |
|
"eval_runtime": 146.0529, |
|
"eval_samples_per_second": 164.701, |
|
"eval_steps_per_second": 5.149, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 41.86, |
|
"learning_rate": 0.0002542085158028889, |
|
"loss": 2.9371, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 42.08, |
|
"learning_rate": 0.00025293407859731633, |
|
"loss": 3.1085, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 0.00025165732588366334, |
|
"loss": 2.8999, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 42.49, |
|
"learning_rate": 0.00025037831351122967, |
|
"loss": 2.9159, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 42.7, |
|
"learning_rate": 0.0002490970974281599, |
|
"loss": 2.9034, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 42.7, |
|
"eval_accuracy": 0.46535935335872575, |
|
"eval_loss": 3.116283655166626, |
|
"eval_runtime": 146.1195, |
|
"eval_samples_per_second": 164.626, |
|
"eval_steps_per_second": 5.146, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 42.9, |
|
"learning_rate": 0.00024781373367899597, |
|
"loss": 2.8936, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 43.12, |
|
"learning_rate": 0.00024652827840222606, |
|
"loss": 3.0697, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 0.00024524078782782807, |
|
"loss": 2.8913, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 43.53, |
|
"learning_rate": 0.00024395131827481062, |
|
"loss": 2.8624, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 43.74, |
|
"learning_rate": 0.0002426599261487494, |
|
"loss": 2.8822, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 43.74, |
|
"eval_accuracy": 0.46941429535485324, |
|
"eval_loss": 3.0841524600982666, |
|
"eval_runtime": 146.1268, |
|
"eval_samples_per_second": 164.617, |
|
"eval_steps_per_second": 5.146, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 43.95, |
|
"learning_rate": 0.00024136666793931935, |
|
"loss": 2.8655, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"learning_rate": 0.00024007160021782427, |
|
"loss": 3.0323, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 44.37, |
|
"learning_rate": 0.0002387747796347217, |
|
"loss": 2.8446, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 44.58, |
|
"learning_rate": 0.00023747626291714498, |
|
"loss": 2.8433, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"learning_rate": 0.000236176106866422, |
|
"loss": 2.836, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"eval_accuracy": 0.47268071006532664, |
|
"eval_loss": 3.0583226680755615, |
|
"eval_runtime": 145.9982, |
|
"eval_samples_per_second": 164.762, |
|
"eval_steps_per_second": 5.151, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"learning_rate": 0.00023487436835559035, |
|
"loss": 2.8457, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 45.21, |
|
"learning_rate": 0.00023357110432690954, |
|
"loss": 2.9941, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 45.41, |
|
"learning_rate": 0.00023226637178937022, |
|
"loss": 2.8208, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"learning_rate": 0.00023096022781620034, |
|
"loss": 2.8154, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 45.82, |
|
"learning_rate": 0.0002296527295423684, |
|
"loss": 2.8129, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 45.82, |
|
"eval_accuracy": 0.47600857452342976, |
|
"eval_loss": 3.035902738571167, |
|
"eval_runtime": 145.9849, |
|
"eval_samples_per_second": 164.777, |
|
"eval_steps_per_second": 5.151, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 46.04, |
|
"learning_rate": 0.00022834393416208486, |
|
"loss": 2.9871, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 46.25, |
|
"learning_rate": 0.0002270338989262994, |
|
"loss": 2.7892, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 0.00022572268114019726, |
|
"loss": 2.7843, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 46.66, |
|
"learning_rate": 0.00022441033816069202, |
|
"loss": 2.7867, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"learning_rate": 0.00022309692739391727, |
|
"loss": 2.7733, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"eval_accuracy": 0.47764141406488453, |
|
"eval_loss": 3.017348051071167, |
|
"eval_runtime": 146.0338, |
|
"eval_samples_per_second": 164.722, |
|
"eval_steps_per_second": 5.149, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 47.08, |
|
"learning_rate": 0.00022178250629271452, |
|
"loss": 2.981, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 47.29, |
|
"learning_rate": 0.00022046713235412103, |
|
"loss": 2.7598, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 47.49, |
|
"learning_rate": 0.00021915086311685404, |
|
"loss": 2.7769, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 47.7, |
|
"learning_rate": 0.00021783375615879415, |
|
"loss": 2.7753, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 47.9, |
|
"learning_rate": 0.0002165158690944665, |
|
"loss": 2.7589, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 47.9, |
|
"eval_accuracy": 0.4811929413931917, |
|
"eval_loss": 2.9977798461914062, |
|
"eval_runtime": 146.0602, |
|
"eval_samples_per_second": 164.692, |
|
"eval_steps_per_second": 5.149, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 48.12, |
|
"learning_rate": 0.00021519725957252063, |
|
"loss": 2.9409, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"learning_rate": 0.00021387798527320882, |
|
"loss": 2.7465, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 48.53, |
|
"learning_rate": 0.0002125581039058627, |
|
"loss": 2.7403, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 48.74, |
|
"learning_rate": 0.0002112376732063691, |
|
"loss": 2.7284, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"learning_rate": 0.00020991675093464448, |
|
"loss": 2.7378, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"eval_accuracy": 0.4831324380858166, |
|
"eval_loss": 2.9787769317626953, |
|
"eval_runtime": 146.0148, |
|
"eval_samples_per_second": 164.744, |
|
"eval_steps_per_second": 5.15, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 49.16, |
|
"learning_rate": 0.00020859539487210813, |
|
"loss": 2.9167, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 49.37, |
|
"learning_rate": 0.0002072736628191549, |
|
"loss": 2.7203, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 49.58, |
|
"learning_rate": 0.0002059516125926265, |
|
"loss": 2.7276, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 49.78, |
|
"learning_rate": 0.00020462930202328278, |
|
"loss": 2.7001, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 0.00020330678895327174, |
|
"loss": 2.7138, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"eval_accuracy": 0.4843915093446441, |
|
"eval_loss": 2.967425584793091, |
|
"eval_runtime": 146.0929, |
|
"eval_samples_per_second": 164.655, |
|
"eval_steps_per_second": 5.147, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 50.21, |
|
"learning_rate": 0.00020198413123359926, |
|
"loss": 2.8865, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 50.41, |
|
"learning_rate": 0.00020066138672159903, |
|
"loss": 2.698, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 50.62, |
|
"learning_rate": 0.00019933861327840098, |
|
"loss": 2.6978, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 50.82, |
|
"learning_rate": 0.00019801586876640073, |
|
"loss": 2.704, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 51.04, |
|
"learning_rate": 0.0001966932110467283, |
|
"loss": 2.8692, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 51.04, |
|
"eval_accuracy": 0.4874163939573572, |
|
"eval_loss": 2.9475862979888916, |
|
"eval_runtime": 145.9737, |
|
"eval_samples_per_second": 164.79, |
|
"eval_steps_per_second": 5.152, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 51.25, |
|
"learning_rate": 0.00019537069797671724, |
|
"loss": 2.6734, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 51.45, |
|
"learning_rate": 0.0001940483874073735, |
|
"loss": 2.6636, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 51.66, |
|
"learning_rate": 0.00019272633718084517, |
|
"loss": 2.6756, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 51.86, |
|
"learning_rate": 0.0001914046051278919, |
|
"loss": 2.6808, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 52.08, |
|
"learning_rate": 0.00019008324906535554, |
|
"loss": 2.8462, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 52.08, |
|
"eval_accuracy": 0.48931343115405407, |
|
"eval_loss": 2.934227466583252, |
|
"eval_runtime": 145.9977, |
|
"eval_samples_per_second": 164.763, |
|
"eval_steps_per_second": 5.151, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 52.29, |
|
"learning_rate": 0.0001887623267936309, |
|
"loss": 2.6553, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 52.49, |
|
"learning_rate": 0.00018744189609413734, |
|
"loss": 2.6559, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 52.7, |
|
"learning_rate": 0.0001861220147267912, |
|
"loss": 2.6536, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"learning_rate": 0.0001848027404274794, |
|
"loss": 2.6524, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"learning_rate": 0.00018348413090553354, |
|
"loss": 2.8312, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"eval_accuracy": 0.4900369570164547, |
|
"eval_loss": 2.9268674850463867, |
|
"eval_runtime": 146.0027, |
|
"eval_samples_per_second": 164.757, |
|
"eval_steps_per_second": 5.151, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 0.00018216624384120595, |
|
"loss": 2.6306, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 53.53, |
|
"learning_rate": 0.00018084913688314597, |
|
"loss": 2.6398, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 53.74, |
|
"learning_rate": 0.000179532867645879, |
|
"loss": 2.6318, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 53.95, |
|
"learning_rate": 0.0001782174937072855, |
|
"loss": 2.6358, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 54.16, |
|
"learning_rate": 0.00017690307260608278, |
|
"loss": 2.7834, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 54.16, |
|
"eval_accuracy": 0.4917280711401593, |
|
"eval_loss": 2.911123037338257, |
|
"eval_runtime": 146.0206, |
|
"eval_samples_per_second": 164.737, |
|
"eval_steps_per_second": 5.15, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 54.37, |
|
"learning_rate": 0.000175589661839308, |
|
"loss": 2.6226, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 54.58, |
|
"learning_rate": 0.00017427731885980282, |
|
"loss": 2.6183, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 54.78, |
|
"learning_rate": 0.0001729661010737007, |
|
"loss": 2.6313, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"learning_rate": 0.00017165606583791515, |
|
"loss": 2.6366, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 55.21, |
|
"learning_rate": 0.00017034727045763158, |
|
"loss": 2.7822, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 55.21, |
|
"eval_accuracy": 0.4934482911572486, |
|
"eval_loss": 2.8986542224884033, |
|
"eval_runtime": 146.1152, |
|
"eval_samples_per_second": 164.63, |
|
"eval_steps_per_second": 5.147, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 55.41, |
|
"learning_rate": 0.00016903977218379974, |
|
"loss": 2.5985, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 55.62, |
|
"learning_rate": 0.00016773362821062983, |
|
"loss": 2.6059, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 55.82, |
|
"learning_rate": 0.00016642889567309048, |
|
"loss": 2.6083, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 56.04, |
|
"learning_rate": 0.0001651256316444097, |
|
"loss": 2.7793, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"learning_rate": 0.0001638238931335781, |
|
"loss": 2.584, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"eval_accuracy": 0.49487679829418024, |
|
"eval_loss": 2.8844311237335205, |
|
"eval_runtime": 145.9294, |
|
"eval_samples_per_second": 164.84, |
|
"eval_steps_per_second": 5.153, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"learning_rate": 0.00016252373708285504, |
|
"loss": 2.5884, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 56.66, |
|
"learning_rate": 0.00016122522036527838, |
|
"loss": 2.5881, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"learning_rate": 0.00015992839978217578, |
|
"loss": 2.5866, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 57.08, |
|
"learning_rate": 0.00015863333206068067, |
|
"loss": 2.7644, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 57.29, |
|
"learning_rate": 0.00015734007385125067, |
|
"loss": 2.5668, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 57.29, |
|
"eval_accuracy": 0.49651714759851406, |
|
"eval_loss": 2.880821704864502, |
|
"eval_runtime": 146.1597, |
|
"eval_samples_per_second": 164.58, |
|
"eval_steps_per_second": 5.145, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 57.49, |
|
"learning_rate": 0.0001560486817251894, |
|
"loss": 2.5728, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 57.7, |
|
"learning_rate": 0.000154759212172172, |
|
"loss": 2.5765, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 57.9, |
|
"learning_rate": 0.00015347172159777396, |
|
"loss": 2.5794, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 58.12, |
|
"learning_rate": 0.000152186266321004, |
|
"loss": 2.7342, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 0.0001509029025718402, |
|
"loss": 2.5536, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"eval_accuracy": 0.4981620698137741, |
|
"eval_loss": 2.864001512527466, |
|
"eval_runtime": 146.123, |
|
"eval_samples_per_second": 164.622, |
|
"eval_steps_per_second": 5.146, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 58.53, |
|
"learning_rate": 0.0001496216864887704, |
|
"loss": 2.5466, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 58.74, |
|
"learning_rate": 0.00014834267411633674, |
|
"loss": 2.553, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 58.95, |
|
"learning_rate": 0.0001470659214026837, |
|
"loss": 2.5623, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 59.16, |
|
"learning_rate": 0.00014579148419711119, |
|
"loss": 2.727, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 59.37, |
|
"learning_rate": 0.00014451941824763113, |
|
"loss": 2.5403, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 59.37, |
|
"eval_accuracy": 0.49815218958648255, |
|
"eval_loss": 2.860569953918457, |
|
"eval_runtime": 146.132, |
|
"eval_samples_per_second": 164.611, |
|
"eval_steps_per_second": 5.146, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 59.58, |
|
"learning_rate": 0.000143249779198529, |
|
"loss": 2.5441, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 59.78, |
|
"learning_rate": 0.00014198262258793002, |
|
"loss": 2.5541, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 59.99, |
|
"learning_rate": 0.00014071800384536927, |
|
"loss": 2.5482, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 60.21, |
|
"learning_rate": 0.00013945597828936737, |
|
"loss": 2.6878, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 60.41, |
|
"learning_rate": 0.00013819660112501054, |
|
"loss": 2.5294, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 60.41, |
|
"eval_accuracy": 0.5007705653773009, |
|
"eval_loss": 2.8440916538238525, |
|
"eval_runtime": 146.0675, |
|
"eval_samples_per_second": 164.684, |
|
"eval_steps_per_second": 5.148, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 60.62, |
|
"learning_rate": 0.00013693992744153572, |
|
"loss": 2.5448, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 60.82, |
|
"learning_rate": 0.00013568601220992097, |
|
"loss": 2.5435, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 61.04, |
|
"learning_rate": 0.00013443491028048045, |
|
"loss": 2.71, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 61.25, |
|
"learning_rate": 0.0001331866763804658, |
|
"loss": 2.5199, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 61.45, |
|
"learning_rate": 0.0001319413651116714, |
|
"loss": 2.513, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 61.45, |
|
"eval_accuracy": 0.5013016714921779, |
|
"eval_loss": 2.840217113494873, |
|
"eval_runtime": 146.0072, |
|
"eval_samples_per_second": 164.752, |
|
"eval_steps_per_second": 5.15, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 61.66, |
|
"learning_rate": 0.00013069903094804644, |
|
"loss": 2.5158, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 61.86, |
|
"learning_rate": 0.0001294597282333118, |
|
"loss": 2.5292, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 62.08, |
|
"learning_rate": 0.00012822351117858303, |
|
"loss": 2.6752, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 62.29, |
|
"learning_rate": 0.0001269904338599989, |
|
"loss": 2.5094, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 62.49, |
|
"learning_rate": 0.0001257605502163558, |
|
"loss": 2.5105, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 62.49, |
|
"eval_accuracy": 0.5022339398713631, |
|
"eval_loss": 2.8315513134002686, |
|
"eval_runtime": 146.1095, |
|
"eval_samples_per_second": 164.637, |
|
"eval_steps_per_second": 5.147, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 62.7, |
|
"learning_rate": 0.00012453391404674885, |
|
"loss": 2.4981, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 62.9, |
|
"learning_rate": 0.00012331057900821768, |
|
"loss": 2.5072, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 63.12, |
|
"learning_rate": 0.0001220905986134, |
|
"loss": 2.6561, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"learning_rate": 0.00012087402622819039, |
|
"loss": 2.5062, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 63.53, |
|
"learning_rate": 0.00011966091506940616, |
|
"loss": 2.4897, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 63.53, |
|
"eval_accuracy": 0.502685487439774, |
|
"eval_loss": 2.823685646057129, |
|
"eval_runtime": 146.1084, |
|
"eval_samples_per_second": 164.638, |
|
"eval_steps_per_second": 5.147, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 63.74, |
|
"learning_rate": 0.00011845131820245934, |
|
"loss": 2.4945, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 63.95, |
|
"learning_rate": 0.00011724528853903536, |
|
"loss": 2.5023, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 64.16, |
|
"learning_rate": 0.00011604287883477889, |
|
"loss": 2.637, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 64.37, |
|
"learning_rate": 0.00011484414168698547, |
|
"loss": 2.4841, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 64.58, |
|
"learning_rate": 0.00011364912953230145, |
|
"loss": 2.4974, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 64.58, |
|
"eval_accuracy": 0.5039655187362361, |
|
"eval_loss": 2.818704605102539, |
|
"eval_runtime": 146.0534, |
|
"eval_samples_per_second": 164.7, |
|
"eval_steps_per_second": 5.149, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 64.78, |
|
"learning_rate": 0.00011245789464442964, |
|
"loss": 2.496, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"learning_rate": 0.00011127048913184326, |
|
"loss": 2.4902, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 65.21, |
|
"learning_rate": 0.00011008696493550599, |
|
"loss": 2.6366, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 65.41, |
|
"learning_rate": 0.00010890737382660015, |
|
"loss": 2.4739, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 65.62, |
|
"learning_rate": 0.00010773176740426248, |
|
"loss": 2.4799, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 65.62, |
|
"eval_accuracy": 0.5044451239477096, |
|
"eval_loss": 2.8128514289855957, |
|
"eval_runtime": 146.0215, |
|
"eval_samples_per_second": 164.736, |
|
"eval_steps_per_second": 5.15, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 65.82, |
|
"learning_rate": 0.00010656019709332606, |
|
"loss": 2.4707, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"learning_rate": 0.00010539271414207186, |
|
"loss": 2.6249, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"learning_rate": 0.00010422936961998609, |
|
"loss": 2.4617, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 66.45, |
|
"learning_rate": 0.00010307021441552707, |
|
"loss": 2.4508, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"learning_rate": 0.00010191529923389845, |
|
"loss": 2.4741, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"eval_accuracy": 0.5057173793056381, |
|
"eval_loss": 2.805563449859619, |
|
"eval_runtime": 146.0069, |
|
"eval_samples_per_second": 164.752, |
|
"eval_steps_per_second": 5.15, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 66.86, |
|
"learning_rate": 0.00010076467459483155, |
|
"loss": 2.4658, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 67.08, |
|
"learning_rate": 9.961839083037592e-05, |
|
"loss": 2.6267, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 67.29, |
|
"learning_rate": 9.847649808269658e-05, |
|
"loss": 2.4656, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 67.49, |
|
"learning_rate": 9.733904630188176e-05, |
|
"loss": 2.4421, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 67.7, |
|
"learning_rate": 9.620608524375703e-05, |
|
"loss": 2.4582, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 67.7, |
|
"eval_accuracy": 0.506052237287108, |
|
"eval_loss": 2.80246639251709, |
|
"eval_runtime": 145.9985, |
|
"eval_samples_per_second": 164.762, |
|
"eval_steps_per_second": 5.151, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 67.9, |
|
"learning_rate": 9.507766446770934e-05, |
|
"loss": 2.456, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 68.12, |
|
"learning_rate": 9.39538333345191e-05, |
|
"loss": 2.6204, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 68.33, |
|
"learning_rate": 9.283464100420063e-05, |
|
"loss": 2.4513, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 68.53, |
|
"learning_rate": 9.17201364338524e-05, |
|
"loss": 2.4486, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 68.74, |
|
"learning_rate": 9.061036837551466e-05, |
|
"loss": 2.4389, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 68.74, |
|
"eval_accuracy": 0.5075605292045352, |
|
"eval_loss": 2.791304111480713, |
|
"eval_runtime": 146.0353, |
|
"eval_samples_per_second": 164.72, |
|
"eval_steps_per_second": 5.149, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 68.95, |
|
"learning_rate": 8.950538537403736e-05, |
|
"loss": 2.4384, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 69.16, |
|
"learning_rate": 8.840523576495681e-05, |
|
"loss": 2.5977, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 69.37, |
|
"learning_rate": 8.730996767238072e-05, |
|
"loss": 2.4459, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 69.58, |
|
"learning_rate": 8.621962900688378e-05, |
|
"loss": 2.4281, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 69.78, |
|
"learning_rate": 8.513426746341128e-05, |
|
"loss": 2.4539, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 69.78, |
|
"eval_accuracy": 0.5071934293322717, |
|
"eval_loss": 2.7881319522857666, |
|
"eval_runtime": 145.9867, |
|
"eval_samples_per_second": 164.775, |
|
"eval_steps_per_second": 5.151, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"learning_rate": 8.405393051919333e-05, |
|
"loss": 2.4298, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 70.21, |
|
"learning_rate": 8.29786654316677e-05, |
|
"loss": 2.5885, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 70.41, |
|
"learning_rate": 8.190851923641259e-05, |
|
"loss": 2.4073, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 70.62, |
|
"learning_rate": 8.084353874508947e-05, |
|
"loss": 2.4379, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 70.82, |
|
"learning_rate": 7.978377054339499e-05, |
|
"loss": 2.4252, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 70.82, |
|
"eval_accuracy": 0.5081794918909719, |
|
"eval_loss": 2.7884321212768555, |
|
"eval_runtime": 146.1195, |
|
"eval_samples_per_second": 164.626, |
|
"eval_steps_per_second": 5.146, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 71.04, |
|
"learning_rate": 7.872926098902358e-05, |
|
"loss": 2.5932, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 71.25, |
|
"learning_rate": 7.768005620963916e-05, |
|
"loss": 2.4153, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 71.45, |
|
"learning_rate": 7.663620210085781e-05, |
|
"loss": 2.4195, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 71.66, |
|
"learning_rate": 7.55977443242399e-05, |
|
"loss": 2.4231, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 71.86, |
|
"learning_rate": 7.456472830529259e-05, |
|
"loss": 2.4287, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 71.86, |
|
"eval_accuracy": 0.5093288685486723, |
|
"eval_loss": 2.778383493423462, |
|
"eval_runtime": 145.9882, |
|
"eval_samples_per_second": 164.774, |
|
"eval_steps_per_second": 5.151, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 72.08, |
|
"learning_rate": 7.353719923148324e-05, |
|
"loss": 2.5804, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 72.29, |
|
"learning_rate": 7.251520205026205e-05, |
|
"loss": 2.4048, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 72.49, |
|
"learning_rate": 7.149878146709676e-05, |
|
"loss": 2.4008, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 72.7, |
|
"learning_rate": 7.048798194351625e-05, |
|
"loss": 2.41, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 72.9, |
|
"learning_rate": 6.948284769516627e-05, |
|
"loss": 2.4131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 72.9, |
|
"eval_accuracy": 0.5098879891877023, |
|
"eval_loss": 2.7781522274017334, |
|
"eval_runtime": 146.0156, |
|
"eval_samples_per_second": 164.743, |
|
"eval_steps_per_second": 5.15, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 73.12, |
|
"learning_rate": 6.848342268987511e-05, |
|
"loss": 2.5661, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"learning_rate": 6.748975064573007e-05, |
|
"loss": 2.3994, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 73.53, |
|
"learning_rate": 6.650187502916552e-05, |
|
"loss": 2.4078, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 73.74, |
|
"learning_rate": 6.551983905306107e-05, |
|
"loss": 2.4168, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 73.95, |
|
"learning_rate": 6.454368567485183e-05, |
|
"loss": 2.4016, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 73.95, |
|
"eval_accuracy": 0.5097699735946659, |
|
"eval_loss": 2.772381544113159, |
|
"eval_runtime": 146.0586, |
|
"eval_samples_per_second": 164.694, |
|
"eval_steps_per_second": 5.149, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 74.16, |
|
"learning_rate": 6.35734575946487e-05, |
|
"loss": 2.5732, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 74.37, |
|
"learning_rate": 6.260919725337109e-05, |
|
"loss": 2.3961, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 74.58, |
|
"learning_rate": 6.165094683089015e-05, |
|
"loss": 2.4073, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 74.78, |
|
"learning_rate": 6.069874824418356e-05, |
|
"loss": 2.3997, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"learning_rate": 5.975264314550229e-05, |
|
"loss": 2.3998, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_accuracy": 0.5110515365958426, |
|
"eval_loss": 2.7658748626708984, |
|
"eval_runtime": 146.0874, |
|
"eval_samples_per_second": 164.662, |
|
"eval_steps_per_second": 5.148, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 75.21, |
|
"learning_rate": 5.881267292054828e-05, |
|
"loss": 2.5492, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 75.41, |
|
"learning_rate": 5.787887868666417e-05, |
|
"loss": 2.3838, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 75.62, |
|
"learning_rate": 5.6951301291034945e-05, |
|
"loss": 2.398, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 75.82, |
|
"learning_rate": 5.602998130890065e-05, |
|
"loss": 2.4025, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 76.04, |
|
"learning_rate": 5.511495904178221e-05, |
|
"loss": 2.5475, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 76.04, |
|
"eval_accuracy": 0.510823536539714, |
|
"eval_loss": 2.7650203704833984, |
|
"eval_runtime": 146.0073, |
|
"eval_samples_per_second": 164.752, |
|
"eval_steps_per_second": 5.15, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 76.25, |
|
"learning_rate": 5.4206274515717736e-05, |
|
"loss": 2.4011, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 76.45, |
|
"learning_rate": 5.330396747951205e-05, |
|
"loss": 2.3818, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 76.66, |
|
"learning_rate": 5.240807740299811e-05, |
|
"loss": 2.3911, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 76.86, |
|
"learning_rate": 5.1518643475310034e-05, |
|
"loss": 2.389, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 77.08, |
|
"learning_rate": 5.0635704603169287e-05, |
|
"loss": 2.5443, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 77.08, |
|
"eval_accuracy": 0.5117344133064243, |
|
"eval_loss": 2.7620205879211426, |
|
"eval_runtime": 146.0022, |
|
"eval_samples_per_second": 164.758, |
|
"eval_steps_per_second": 5.151, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 77.29, |
|
"learning_rate": 4.975929940918236e-05, |
|
"loss": 2.38, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 77.49, |
|
"learning_rate": 4.8889466230151646e-05, |
|
"loss": 2.3758, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 77.7, |
|
"learning_rate": 4.8026243115398314e-05, |
|
"loss": 2.3744, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 77.9, |
|
"learning_rate": 4.7169667825097775e-05, |
|
"loss": 2.3784, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"learning_rate": 4.631977782862824e-05, |
|
"loss": 2.5381, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"eval_accuracy": 0.5115312635222847, |
|
"eval_loss": 2.76308274269104, |
|
"eval_runtime": 146.1953, |
|
"eval_samples_per_second": 164.54, |
|
"eval_steps_per_second": 5.144, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 78.33, |
|
"learning_rate": 4.547661030293129e-05, |
|
"loss": 2.3771, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 78.53, |
|
"learning_rate": 4.464020213088611e-05, |
|
"loss": 2.3786, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 78.74, |
|
"learning_rate": 4.381058989969564e-05, |
|
"loss": 2.3688, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 4.298780989928646e-05, |
|
"loss": 2.3792, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 79.16, |
|
"learning_rate": 4.217189812072131e-05, |
|
"loss": 2.5269, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 79.16, |
|
"eval_accuracy": 0.5122286175796967, |
|
"eval_loss": 2.7577943801879883, |
|
"eval_runtime": 146.122, |
|
"eval_samples_per_second": 164.623, |
|
"eval_steps_per_second": 5.146, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 79.37, |
|
"learning_rate": 4.136289025462443e-05, |
|
"loss": 2.3679, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 79.58, |
|
"learning_rate": 4.0560821689620856e-05, |
|
"loss": 2.3749, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 79.78, |
|
"learning_rate": 3.976572751078782e-05, |
|
"loss": 2.3605, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"learning_rate": 3.8977642498120594e-05, |
|
"loss": 2.3747, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 80.21, |
|
"learning_rate": 3.819660112501053e-05, |
|
"loss": 2.5288, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 80.21, |
|
"eval_accuracy": 0.5124386898610601, |
|
"eval_loss": 2.754046678543091, |
|
"eval_runtime": 146.1606, |
|
"eval_samples_per_second": 164.579, |
|
"eval_steps_per_second": 5.145, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 80.41, |
|
"learning_rate": 3.742263755673758e-05, |
|
"loss": 2.367, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 80.62, |
|
"learning_rate": 3.6655785648975585e-05, |
|
"loss": 2.3667, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 80.82, |
|
"learning_rate": 3.589607894631111e-05, |
|
"loss": 2.3717, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 81.04, |
|
"learning_rate": 3.514355068077655e-05, |
|
"loss": 2.5195, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 81.25, |
|
"learning_rate": 3.439823377039599e-05, |
|
"loss": 2.3669, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 81.25, |
|
"eval_accuracy": 0.5124800918682825, |
|
"eval_loss": 2.752890110015869, |
|
"eval_runtime": 145.9521, |
|
"eval_samples_per_second": 164.814, |
|
"eval_steps_per_second": 5.152, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 81.45, |
|
"learning_rate": 3.36601608177457e-05, |
|
"loss": 2.3595, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 81.66, |
|
"learning_rate": 3.292936410852754e-05, |
|
"loss": 2.3727, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 81.86, |
|
"learning_rate": 3.220587561015709e-05, |
|
"loss": 2.3707, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 3.148972697036507e-05, |
|
"loss": 2.508, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 82.29, |
|
"learning_rate": 3.078094951581289e-05, |
|
"loss": 2.3631, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 82.29, |
|
"eval_accuracy": 0.5132219293707184, |
|
"eval_loss": 2.749772071838379, |
|
"eval_runtime": 146.0679, |
|
"eval_samples_per_second": 164.684, |
|
"eval_steps_per_second": 5.148, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 82.49, |
|
"learning_rate": 3.007957425072265e-05, |
|
"loss": 2.3568, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 82.7, |
|
"learning_rate": 2.9385631855520546e-05, |
|
"loss": 2.3679, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 82.9, |
|
"learning_rate": 2.8699152685494925e-05, |
|
"loss": 2.3504, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 83.12, |
|
"learning_rate": 2.8020166769468616e-05, |
|
"loss": 2.5054, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 2.7348703808485223e-05, |
|
"loss": 2.3499, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_accuracy": 0.5135577468816207, |
|
"eval_loss": 2.7453861236572266, |
|
"eval_runtime": 146.0782, |
|
"eval_samples_per_second": 164.672, |
|
"eval_steps_per_second": 5.148, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 83.53, |
|
"learning_rate": 2.6684793174509915e-05, |
|
"loss": 2.3478, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 83.74, |
|
"learning_rate": 2.6028463909144574e-05, |
|
"loss": 2.3686, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 83.95, |
|
"learning_rate": 2.5379744722357403e-05, |
|
"loss": 2.3636, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 84.16, |
|
"learning_rate": 2.473866399122733e-05, |
|
"loss": 2.5195, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 84.37, |
|
"learning_rate": 2.410524975870221e-05, |
|
"loss": 2.3726, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 84.37, |
|
"eval_accuracy": 0.5140964497348997, |
|
"eval_loss": 2.7446117401123047, |
|
"eval_runtime": 146.09, |
|
"eval_samples_per_second": 164.659, |
|
"eval_steps_per_second": 5.148, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 84.58, |
|
"learning_rate": 2.347952973237262e-05, |
|
"loss": 2.3504, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 84.78, |
|
"learning_rate": 2.286153128325954e-05, |
|
"loss": 2.351, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"learning_rate": 2.2251281444617257e-05, |
|
"loss": 2.3506, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 85.21, |
|
"learning_rate": 2.1648806910750575e-05, |
|
"loss": 2.5104, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 85.41, |
|
"learning_rate": 2.1054134035847307e-05, |
|
"loss": 2.3411, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 85.41, |
|
"eval_accuracy": 0.5143741932133077, |
|
"eval_loss": 2.740255355834961, |
|
"eval_runtime": 146.0438, |
|
"eval_samples_per_second": 164.711, |
|
"eval_steps_per_second": 5.149, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 85.62, |
|
"learning_rate": 2.0467288832825583e-05, |
|
"loss": 2.3666, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 85.82, |
|
"learning_rate": 1.9888296972195587e-05, |
|
"loss": 2.3451, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 86.04, |
|
"learning_rate": 1.931718378093703e-05, |
|
"loss": 2.5151, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 86.25, |
|
"learning_rate": 1.875397424139109e-05, |
|
"loss": 2.3539, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 86.45, |
|
"learning_rate": 1.81986929901675e-05, |
|
"loss": 2.3321, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 86.45, |
|
"eval_accuracy": 0.5146461086764289, |
|
"eval_loss": 2.7371606826782227, |
|
"eval_runtime": 146.1164, |
|
"eval_samples_per_second": 164.629, |
|
"eval_steps_per_second": 5.147, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 86.66, |
|
"learning_rate": 1.765136431706711e-05, |
|
"loss": 2.3573, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 86.86, |
|
"learning_rate": 1.711201216401912e-05, |
|
"loss": 2.3422, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 87.08, |
|
"learning_rate": 1.6580660124034032e-05, |
|
"loss": 2.5055, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 87.29, |
|
"learning_rate": 1.605733144017132e-05, |
|
"loss": 2.3429, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 87.49, |
|
"learning_rate": 1.5542049004523053e-05, |
|
"loss": 2.3456, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 87.49, |
|
"eval_accuracy": 0.5146212850149416, |
|
"eval_loss": 2.7389299869537354, |
|
"eval_runtime": 146.0012, |
|
"eval_samples_per_second": 164.759, |
|
"eval_steps_per_second": 5.151, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 87.7, |
|
"learning_rate": 1.503483535721224e-05, |
|
"loss": 2.3608, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 87.9, |
|
"learning_rate": 1.4535712685406921e-05, |
|
"loss": 2.3466, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 88.12, |
|
"learning_rate": 1.4044702822349731e-05, |
|
"loss": 2.4892, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 88.33, |
|
"learning_rate": 1.3561827246402692e-05, |
|
"loss": 2.3418, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 88.53, |
|
"learning_rate": 1.3087107080107853e-05, |
|
"loss": 2.3372, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 88.53, |
|
"eval_accuracy": 0.515111201963272, |
|
"eval_loss": 2.7384002208709717, |
|
"eval_runtime": 145.8226, |
|
"eval_samples_per_second": 164.961, |
|
"eval_steps_per_second": 5.157, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 88.74, |
|
"learning_rate": 1.2620563089263093e-05, |
|
"loss": 2.3411, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 88.95, |
|
"learning_rate": 1.2162215682014012e-05, |
|
"loss": 2.3637, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 89.16, |
|
"learning_rate": 1.1712084907961053e-05, |
|
"loss": 2.4971, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 89.37, |
|
"learning_rate": 1.127019045728246e-05, |
|
"loss": 2.3476, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 89.58, |
|
"learning_rate": 1.0836551659873074e-05, |
|
"loss": 2.343, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 89.58, |
|
"eval_accuracy": 0.5144067649722459, |
|
"eval_loss": 2.7397918701171875, |
|
"eval_runtime": 146.0005, |
|
"eval_samples_per_second": 164.76, |
|
"eval_steps_per_second": 5.151, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 89.58, |
|
"step": 4300, |
|
"total_flos": 2.954083328682332e+17, |
|
"train_loss": 3.7431876293448516, |
|
"train_runtime": 42244.2763, |
|
"train_samples_per_second": 58.967, |
|
"train_steps_per_second": 0.114 |
|
} |
|
], |
|
"max_steps": 4800, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.954083328682332e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|