|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 371720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.375e-06, |
|
"loss": 6.8593, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.875e-05, |
|
"loss": 5.3677, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8125e-05, |
|
"loss": 5.0449, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.75e-05, |
|
"loss": 4.821, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.6874999999999994e-05, |
|
"loss": 4.6456, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5.625e-05, |
|
"loss": 4.5012, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.5625e-05, |
|
"loss": 4.3871, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.5e-05, |
|
"loss": 4.2893, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.437499999999999e-05, |
|
"loss": 4.2089, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.374999999999999e-05, |
|
"loss": 4.1406, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00010312499999999999, |
|
"loss": 4.0695, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001125, |
|
"loss": 4.0107, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012185624999999998, |
|
"loss": 3.9458, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.000131221875, |
|
"loss": 3.8982, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.000140596875, |
|
"loss": 3.8433, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.000149971875, |
|
"loss": 3.8086, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015933749999999996, |
|
"loss": 3.7712, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00016871249999999996, |
|
"loss": 3.7431, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.34807019476750434, |
|
"eval_loss": 3.8859505653381348, |
|
"eval_runtime": 152.2662, |
|
"eval_samples_per_second": 380.386, |
|
"eval_steps_per_second": 5.944, |
|
"step": 18586 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.000178078125, |
|
"loss": 3.698, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00018745312499999998, |
|
"loss": 3.6713, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00019681874999999998, |
|
"loss": 3.6498, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00020618437499999995, |
|
"loss": 3.627, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00021555937499999998, |
|
"loss": 3.6114, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00022493437499999998, |
|
"loss": 3.592, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00023430937499999997, |
|
"loss": 3.5844, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00024367499999999997, |
|
"loss": 3.5658, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00025305, |
|
"loss": 3.5562, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.000262425, |
|
"loss": 3.5434, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002718, |
|
"loss": 3.528, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.000281165625, |
|
"loss": 3.5166, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.000290540625, |
|
"loss": 3.5017, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00029990624999999993, |
|
"loss": 3.4968, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0002991257506181561, |
|
"loss": 3.4872, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0002982435535146591, |
|
"loss": 3.4725, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0002973604733309784, |
|
"loss": 3.4562, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00029647739314729777, |
|
"loss": 3.4495, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00029559519604380077, |
|
"loss": 3.4382, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.37586823096932304, |
|
"eval_loss": 3.62882661819458, |
|
"eval_runtime": 152.7372, |
|
"eval_samples_per_second": 379.213, |
|
"eval_steps_per_second": 5.925, |
|
"step": 37172 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002947129989403037, |
|
"loss": 3.3875, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00029382991875662304, |
|
"loss": 3.3815, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0002929468385729424, |
|
"loss": 3.3706, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0002920637583892617, |
|
"loss": 3.3661, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00029118067820558104, |
|
"loss": 3.3599, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00029029848110208404, |
|
"loss": 3.3582, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00028941540091840335, |
|
"loss": 3.3502, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00028853320381490636, |
|
"loss": 3.3462, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00028765012363122567, |
|
"loss": 3.3367, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00028676880960791236, |
|
"loss": 3.3362, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002858857294242317, |
|
"loss": 3.3322, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00028500264924055104, |
|
"loss": 3.324, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00028411956905687036, |
|
"loss": 3.3157, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00028323737195337336, |
|
"loss": 3.3197, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002823542917696927, |
|
"loss": 3.3102, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.000281471211586012, |
|
"loss": 3.3061, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002805881314023313, |
|
"loss": 3.3007, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0002797059342988343, |
|
"loss": 3.2958, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.38913569232753614, |
|
"eval_loss": 3.5019333362579346, |
|
"eval_runtime": 152.7028, |
|
"eval_samples_per_second": 379.299, |
|
"eval_steps_per_second": 5.927, |
|
"step": 55758 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0002788228541151536, |
|
"loss": 3.2785, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.0002779406570116566, |
|
"loss": 3.2343, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00027705757682797594, |
|
"loss": 3.236, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00027617626280466263, |
|
"loss": 3.2362, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.00027529318262098194, |
|
"loss": 3.2336, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00027441010243730126, |
|
"loss": 3.2288, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00027352702225362063, |
|
"loss": 3.2309, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00027264394206993995, |
|
"loss": 3.2278, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00027176174496644295, |
|
"loss": 3.2271, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00027087954786294595, |
|
"loss": 3.2284, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.00026999646767926526, |
|
"loss": 3.2305, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0002691133874955846, |
|
"loss": 3.2264, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002682303073119039, |
|
"loss": 3.2214, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0002673472271282232, |
|
"loss": 3.2197, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0002664650300247262, |
|
"loss": 3.2127, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0002655828329212292, |
|
"loss": 3.2139, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00026469975273754853, |
|
"loss": 3.2105, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00026381667255386785, |
|
"loss": 3.2163, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00026293359237018716, |
|
"loss": 3.2055, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3957415434142357, |
|
"eval_loss": 3.4540581703186035, |
|
"eval_runtime": 152.4456, |
|
"eval_samples_per_second": 379.939, |
|
"eval_steps_per_second": 5.937, |
|
"step": 74344 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0002620505121865065, |
|
"loss": 3.1626, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00026116831508300953, |
|
"loss": 3.144, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00026028523489932885, |
|
"loss": 3.1517, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00025940215471564816, |
|
"loss": 3.15, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0002585190745319675, |
|
"loss": 3.1589, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0002576368774284705, |
|
"loss": 3.1527, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0002567546803249735, |
|
"loss": 3.1576, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.0002558716001412928, |
|
"loss": 3.152, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0002549885199576121, |
|
"loss": 3.1581, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002541063228541151, |
|
"loss": 3.1541, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0002532232426704345, |
|
"loss": 3.1546, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002523401624867538, |
|
"loss": 3.1533, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0002514570823030731, |
|
"loss": 3.1549, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00025057576827975975, |
|
"loss": 3.152, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0002496926880960791, |
|
"loss": 3.1516, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.00024880960791239844, |
|
"loss": 3.1563, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00024792652772871775, |
|
"loss": 3.1519, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00024704433062522075, |
|
"loss": 3.1533, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.40060643151965947, |
|
"eval_loss": 3.402815103530884, |
|
"eval_runtime": 152.6189, |
|
"eval_samples_per_second": 379.507, |
|
"eval_steps_per_second": 5.93, |
|
"step": 92930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00024616125044154007, |
|
"loss": 3.1452, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0002452781702578594, |
|
"loss": 3.0832, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0002443959731543624, |
|
"loss": 3.0868, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.00024351289297068173, |
|
"loss": 3.092, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00024262981278700104, |
|
"loss": 3.0954, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00024174761568350402, |
|
"loss": 3.098, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00024086541858000705, |
|
"loss": 3.1, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00023998233839632636, |
|
"loss": 3.1025, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00023909925821264568, |
|
"loss": 3.0996, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00023821706110914868, |
|
"loss": 3.1028, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.00023733398092546802, |
|
"loss": 3.1036, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00023645090074178734, |
|
"loss": 3.1031, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00023556782055810665, |
|
"loss": 3.1023, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00023468474037442597, |
|
"loss": 3.1053, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00023380254327092897, |
|
"loss": 3.1094, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.00023291946308724831, |
|
"loss": 3.1036, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00023203638290356763, |
|
"loss": 3.1038, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00023115418580007063, |
|
"loss": 3.1039, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00023027110561638995, |
|
"loss": 3.1056, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.40350113936840293, |
|
"eval_loss": 3.380485773086548, |
|
"eval_runtime": 152.853, |
|
"eval_samples_per_second": 378.926, |
|
"eval_steps_per_second": 5.921, |
|
"step": 111516 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00022938802543270926, |
|
"loss": 3.0752, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0002285058283292123, |
|
"loss": 3.034, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00022762363122571526, |
|
"loss": 3.0444, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00022674055104203458, |
|
"loss": 3.0518, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0002258574708583539, |
|
"loss": 3.0495, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00022497527375485692, |
|
"loss": 3.0551, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.00022409219357117624, |
|
"loss": 3.0553, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00022320911338749556, |
|
"loss": 3.054, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00022232603320381487, |
|
"loss": 3.0601, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00022144383610031787, |
|
"loss": 3.0595, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.00022056075591663722, |
|
"loss": 3.0641, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00021967767573295653, |
|
"loss": 3.0614, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00021879547862945953, |
|
"loss": 3.0655, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.00021791239844577885, |
|
"loss": 3.0667, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00021703020134228188, |
|
"loss": 3.0629, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.0002161471211586012, |
|
"loss": 3.0646, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.0002152640409749205, |
|
"loss": 3.0681, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00021438096079123983, |
|
"loss": 3.0668, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00021349876368774283, |
|
"loss": 3.0671, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.40417266805683943, |
|
"eval_loss": 3.38333797454834, |
|
"eval_runtime": 152.8266, |
|
"eval_samples_per_second": 378.992, |
|
"eval_steps_per_second": 5.922, |
|
"step": 130102 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00021261656658424583, |
|
"loss": 3.0056, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00021173348640056514, |
|
"loss": 3.004, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00021085128929706814, |
|
"loss": 3.0074, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00020996820911338746, |
|
"loss": 3.0123, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0002090851289297068, |
|
"loss": 3.0156, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0002082029318262098, |
|
"loss": 3.0192, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00020731985164252912, |
|
"loss": 3.0201, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00020643765453903212, |
|
"loss": 3.0221, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00020555457435535144, |
|
"loss": 3.0277, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00020467149417167078, |
|
"loss": 3.0262, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.00020378929706817378, |
|
"loss": 3.0283, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.0002029062168844931, |
|
"loss": 3.028, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00020202401978099607, |
|
"loss": 3.0256, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00020114093959731544, |
|
"loss": 3.0298, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00020025874249381841, |
|
"loss": 3.0327, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00019937566231013773, |
|
"loss": 3.0332, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00019849258212645705, |
|
"loss": 3.0385, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00019761038502296007, |
|
"loss": 3.0331, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.40691104864888106, |
|
"eval_loss": 3.366875410079956, |
|
"eval_runtime": 152.6213, |
|
"eval_samples_per_second": 379.501, |
|
"eval_steps_per_second": 5.93, |
|
"step": 148688 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00019672818791946308, |
|
"loss": 3.0125, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.0001958451077357824, |
|
"loss": 2.9641, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.0001949620275521017, |
|
"loss": 2.9757, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.00019407894736842102, |
|
"loss": 2.9761, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.00019319586718474037, |
|
"loss": 2.978, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00019231367008124337, |
|
"loss": 2.9905, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.00019143058989756268, |
|
"loss": 2.9886, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.00019054839279406568, |
|
"loss": 2.9935, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.00018966531261038503, |
|
"loss": 2.9913, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.00018878223242670434, |
|
"loss": 2.9943, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.00018790003532320734, |
|
"loss": 2.9973, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 0.00018701695513952666, |
|
"loss": 2.9986, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.00018613475803602963, |
|
"loss": 3.0023, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.00018525167785234898, |
|
"loss": 2.9989, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.00018436948074885198, |
|
"loss": 3.0, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.0001834864005651713, |
|
"loss": 3.0023, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.0001826033203814906, |
|
"loss": 3.0023, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.00018172024019780993, |
|
"loss": 3.0039, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00018083804309431295, |
|
"loss": 3.0072, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.408285882043124, |
|
"eval_loss": 3.3615922927856445, |
|
"eval_runtime": 152.6155, |
|
"eval_samples_per_second": 379.516, |
|
"eval_steps_per_second": 5.93, |
|
"step": 167274 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.00017995496291063227, |
|
"loss": 2.9528, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.00017907276580713527, |
|
"loss": 2.9442, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.0001781896856234546, |
|
"loss": 2.9515, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.00017730748851995762, |
|
"loss": 2.9502, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 0.00017642440833627693, |
|
"loss": 2.9558, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.00017554132815259625, |
|
"loss": 2.9566, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 0.00017465913104909922, |
|
"loss": 2.9635, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0001737760508654186, |
|
"loss": 2.9669, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.0001728929706817379, |
|
"loss": 2.967, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00017201165665842457, |
|
"loss": 2.9745, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.00017112857647474388, |
|
"loss": 2.9685, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.0001702463793712469, |
|
"loss": 2.9699, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.00016936329918756623, |
|
"loss": 2.972, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.00016848021900388554, |
|
"loss": 2.9706, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.00016759713882020486, |
|
"loss": 2.9777, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.00016671494171670786, |
|
"loss": 2.9724, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0001658318615330272, |
|
"loss": 2.9779, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 0.0001649496644295302, |
|
"loss": 2.9771, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4078330234107961, |
|
"eval_loss": 3.3777239322662354, |
|
"eval_runtime": 152.9968, |
|
"eval_samples_per_second": 378.57, |
|
"eval_steps_per_second": 5.915, |
|
"step": 185860 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.00016406658424584952, |
|
"loss": 2.9716, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0001631843871423525, |
|
"loss": 2.9114, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.0001623013069586718, |
|
"loss": 2.9208, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00016141822677499118, |
|
"loss": 2.9265, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.0001605351465913105, |
|
"loss": 2.9286, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.00015965294948781347, |
|
"loss": 2.9316, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.00015876986930413278, |
|
"loss": 2.9347, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.0001578876722006358, |
|
"loss": 2.9384, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.00015700459201695513, |
|
"loss": 2.9406, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.00015612239491345813, |
|
"loss": 2.9389, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.00015523931472977745, |
|
"loss": 2.944, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 0.00015435623454609676, |
|
"loss": 2.9508, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0001534731543624161, |
|
"loss": 2.9459, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.00015259007417873542, |
|
"loss": 2.9463, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 0.00015170699399505474, |
|
"loss": 2.9486, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.00015082479689155774, |
|
"loss": 2.9517, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00014994171670787705, |
|
"loss": 2.9533, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 0.00014905951960438005, |
|
"loss": 2.9536, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.0001481764394206994, |
|
"loss": 2.9534, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.4088908356084872, |
|
"eval_loss": 3.374073028564453, |
|
"eval_runtime": 152.6907, |
|
"eval_samples_per_second": 379.329, |
|
"eval_steps_per_second": 5.927, |
|
"step": 204446 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.00014729335923701871, |
|
"loss": 2.9188, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.00014641116213352171, |
|
"loss": 2.8961, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.00014552808194984103, |
|
"loss": 2.9004, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.00014464588484634403, |
|
"loss": 2.9005, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 0.00014376280466266335, |
|
"loss": 2.9037, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 0.00014287972447898266, |
|
"loss": 2.9129, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.000141996644295302, |
|
"loss": 2.9132, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 0.000141114447191805, |
|
"loss": 2.9156, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 0.000140232250088308, |
|
"loss": 2.9192, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 0.00013934916990462732, |
|
"loss": 2.9185, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.00013846608972094667, |
|
"loss": 2.9243, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 0.00013758300953726598, |
|
"loss": 2.9236, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.00013670081243376898, |
|
"loss": 2.9244, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.0001358177322500883, |
|
"loss": 2.9289, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 0.00013493465206640762, |
|
"loss": 2.928, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 0.0001340533380430943, |
|
"loss": 2.9268, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 0.00013317025785941362, |
|
"loss": 2.9329, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 0.00013228717767573293, |
|
"loss": 2.9315, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00013140498057223594, |
|
"loss": 2.9279, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.40917695412429866, |
|
"eval_loss": 3.384521484375, |
|
"eval_runtime": 152.8405, |
|
"eval_samples_per_second": 378.957, |
|
"eval_steps_per_second": 5.921, |
|
"step": 223032 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 0.00013052190038855528, |
|
"loss": 2.8706, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 0.0001296388202048746, |
|
"loss": 2.8764, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.0001287557400211939, |
|
"loss": 2.8801, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.00012787265983751323, |
|
"loss": 2.8853, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.00012699046273401623, |
|
"loss": 2.8877, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.00012610738255033557, |
|
"loss": 2.8905, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 0.00012522430236665489, |
|
"loss": 2.8928, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 0.0001243421052631579, |
|
"loss": 2.898, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.0001234590250794772, |
|
"loss": 2.8982, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 0.0001225768279759802, |
|
"loss": 2.8923, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.00012169374779229953, |
|
"loss": 2.9047, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 0.00012081066760861885, |
|
"loss": 2.9063, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 0.00011992758742493818, |
|
"loss": 2.9029, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.00011904539032144117, |
|
"loss": 2.9057, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 0.0001181623101377605, |
|
"loss": 2.9073, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.00011727922995407981, |
|
"loss": 2.9081, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 0.00011639614977039916, |
|
"loss": 2.9086, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.00011551483574708583, |
|
"loss": 2.9063, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4105062396543199, |
|
"eval_loss": 3.36887264251709, |
|
"eval_runtime": 152.9632, |
|
"eval_samples_per_second": 378.653, |
|
"eval_steps_per_second": 5.916, |
|
"step": 241618 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 0.00011463175556340516, |
|
"loss": 2.8865, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 0.00011374867537972447, |
|
"loss": 2.8543, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 0.00011286647827622747, |
|
"loss": 2.8563, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 0.00011198339809254679, |
|
"loss": 2.8637, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.0001111003179088661, |
|
"loss": 2.866, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 0.00011021812080536912, |
|
"loss": 2.8713, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 0.00010933504062168844, |
|
"loss": 2.8737, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.00010845196043800777, |
|
"loss": 2.8684, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 0.00010756888025432708, |
|
"loss": 2.8772, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.00010668756623101377, |
|
"loss": 2.8758, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.00010580448604733308, |
|
"loss": 2.8799, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 0.0001049214058636524, |
|
"loss": 2.8829, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.00010403920876015541, |
|
"loss": 2.8794, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 0.00010315612857647473, |
|
"loss": 2.8887, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 0.00010227393147297774, |
|
"loss": 2.8846, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 0.00010139085128929706, |
|
"loss": 2.8851, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 0.00010050777110561639, |
|
"loss": 2.8858, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 9.962557400211938e-05, |
|
"loss": 2.8897, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 9.874249381843871e-05, |
|
"loss": 2.8913, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4104661333490982, |
|
"eval_loss": 3.371145486831665, |
|
"eval_runtime": 152.7732, |
|
"eval_samples_per_second": 379.124, |
|
"eval_steps_per_second": 5.924, |
|
"step": 260204 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 9.786029671494171e-05, |
|
"loss": 2.839, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 9.697721653126104e-05, |
|
"loss": 2.8365, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 9.609501942776402e-05, |
|
"loss": 2.8441, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 9.521282232426704e-05, |
|
"loss": 2.8514, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 9.432974214058636e-05, |
|
"loss": 2.8488, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 9.344666195690567e-05, |
|
"loss": 2.8537, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 9.2563581773225e-05, |
|
"loss": 2.8525, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 9.168050158954432e-05, |
|
"loss": 2.8573, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 9.079830448604733e-05, |
|
"loss": 2.8588, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 8.991522430236665e-05, |
|
"loss": 2.8604, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 8.903302719886966e-05, |
|
"loss": 2.8599, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 8.814994701518898e-05, |
|
"loss": 2.8603, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 8.726686683150828e-05, |
|
"loss": 2.8633, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 8.638378664782762e-05, |
|
"loss": 2.8652, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 8.550247262451431e-05, |
|
"loss": 2.865, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 8.461939244083362e-05, |
|
"loss": 2.865, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 8.373719533733661e-05, |
|
"loss": 2.8662, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 8.285411515365594e-05, |
|
"loss": 2.8704, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4098972553045123, |
|
"eval_loss": 3.3779125213623047, |
|
"eval_runtime": 152.7549, |
|
"eval_samples_per_second": 379.169, |
|
"eval_steps_per_second": 5.925, |
|
"step": 278790 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 8.197103496997526e-05, |
|
"loss": 2.8583, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 8.10879547862946e-05, |
|
"loss": 2.8214, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 8.020575768279759e-05, |
|
"loss": 2.8237, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 7.932267749911692e-05, |
|
"loss": 2.8222, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 7.844048039561992e-05, |
|
"loss": 2.8321, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 15.28, |
|
"learning_rate": 7.755740021193923e-05, |
|
"loss": 2.8329, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 7.667432002825856e-05, |
|
"loss": 2.8337, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 7.579212292476155e-05, |
|
"loss": 2.8375, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 7.490992582126457e-05, |
|
"loss": 2.8415, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 7.40268456375839e-05, |
|
"loss": 2.8366, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 7.314376545390321e-05, |
|
"loss": 2.8427, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 7.226068527022253e-05, |
|
"loss": 2.8424, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 7.137848816672553e-05, |
|
"loss": 2.8453, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 7.049540798304486e-05, |
|
"loss": 2.8451, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 6.961321087954786e-05, |
|
"loss": 2.8489, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 6.873101377605086e-05, |
|
"loss": 2.8442, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 6.784793359237018e-05, |
|
"loss": 2.844, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 6.69648534086895e-05, |
|
"loss": 2.8475, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 6.608177322500882e-05, |
|
"loss": 2.8491, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4111809257905712, |
|
"eval_loss": 3.376009464263916, |
|
"eval_runtime": 153.0344, |
|
"eval_samples_per_second": 378.477, |
|
"eval_steps_per_second": 5.914, |
|
"step": 297376 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 6.519957612151182e-05, |
|
"loss": 2.8234, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 6.431649593783115e-05, |
|
"loss": 2.804, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 6.343429883433415e-05, |
|
"loss": 2.8093, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 6.255121865065347e-05, |
|
"loss": 2.8146, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 6.16681384669728e-05, |
|
"loss": 2.8127, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 6.07859413634758e-05, |
|
"loss": 2.8193, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 5.990286117979512e-05, |
|
"loss": 2.8189, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 5.901978099611444e-05, |
|
"loss": 2.8211, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"learning_rate": 5.813670081243376e-05, |
|
"loss": 2.817, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 5.725450370893676e-05, |
|
"loss": 2.8202, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 5.6371423525256084e-05, |
|
"loss": 2.8212, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 5.5488343341575414e-05, |
|
"loss": 2.8274, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 5.4606146238078415e-05, |
|
"loss": 2.8263, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 5.372306605439774e-05, |
|
"loss": 2.8244, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 5.2839985870717054e-05, |
|
"loss": 2.8305, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 5.195778876722006e-05, |
|
"loss": 2.8253, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 5.107470858353938e-05, |
|
"loss": 2.831, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 5.019251148004238e-05, |
|
"loss": 2.83, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4115711057247888, |
|
"eval_loss": 3.375187397003174, |
|
"eval_runtime": 152.7823, |
|
"eval_samples_per_second": 379.101, |
|
"eval_steps_per_second": 5.923, |
|
"step": 315962 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 4.93094312963617e-05, |
|
"loss": 2.8325, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 4.842635111268103e-05, |
|
"loss": 2.7924, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 4.754415400918403e-05, |
|
"loss": 2.793, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 4.6661073825503354e-05, |
|
"loss": 2.7973, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 4.5778876722006355e-05, |
|
"loss": 2.7998, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 4.489579653832568e-05, |
|
"loss": 2.803, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 4.4012716354644994e-05, |
|
"loss": 2.8033, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 4.3130519251147995e-05, |
|
"loss": 2.8002, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 4.224743906746732e-05, |
|
"loss": 2.8042, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 4.1365241963970325e-05, |
|
"loss": 2.8048, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 4.048216178028965e-05, |
|
"loss": 2.806, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 3.959908159660897e-05, |
|
"loss": 2.8035, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 3.871688449311197e-05, |
|
"loss": 2.8083, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 3.7833804309431294e-05, |
|
"loss": 2.8089, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 3.6951607205934295e-05, |
|
"loss": 2.8063, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 3.6069410102437296e-05, |
|
"loss": 2.8133, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 3.518632991875662e-05, |
|
"loss": 2.807, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 3.430324973507594e-05, |
|
"loss": 2.8097, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 3.3420169551395265e-05, |
|
"loss": 2.8136, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.41081936442590883, |
|
"eval_loss": 3.3912463188171387, |
|
"eval_runtime": 153.0224, |
|
"eval_samples_per_second": 378.507, |
|
"eval_steps_per_second": 5.914, |
|
"step": 334548 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 3.2537972447898265e-05, |
|
"loss": 2.7965, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 3.165489226421759e-05, |
|
"loss": 2.7795, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 3.077181208053691e-05, |
|
"loss": 2.7846, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 2.9889614977039912e-05, |
|
"loss": 2.7852, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 2.9006534793359235e-05, |
|
"loss": 2.7818, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 2.8123454609678557e-05, |
|
"loss": 2.7855, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 2.724125750618156e-05, |
|
"loss": 2.7881, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 2.635817732250088e-05, |
|
"loss": 2.7901, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 2.5475097138820204e-05, |
|
"loss": 2.7895, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 2.4592016955139527e-05, |
|
"loss": 2.787, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 2.3709819851642528e-05, |
|
"loss": 2.7907, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 2.282762274814553e-05, |
|
"loss": 2.7957, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 2.194454256446485e-05, |
|
"loss": 2.7935, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 2.1061462380784174e-05, |
|
"loss": 2.794, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"learning_rate": 2.0179265277287175e-05, |
|
"loss": 2.7885, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 1.9296185093606498e-05, |
|
"loss": 2.7946, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 1.8413987990109502e-05, |
|
"loss": 2.7905, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 1.7531790886612503e-05, |
|
"loss": 2.7904, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 1.6648710702931826e-05, |
|
"loss": 2.7924, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.41071033170668786, |
|
"eval_loss": 3.3984034061431885, |
|
"eval_runtime": 153.8265, |
|
"eval_samples_per_second": 376.528, |
|
"eval_steps_per_second": 5.883, |
|
"step": 353134 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 1.5765630519251145e-05, |
|
"loss": 2.7766, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 1.4882550335570468e-05, |
|
"loss": 2.7713, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 1.4000353232073472e-05, |
|
"loss": 2.7724, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 1.3117273048392793e-05, |
|
"loss": 2.7711, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 1.2234192864712116e-05, |
|
"loss": 2.7774, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 1.1351995761215117e-05, |
|
"loss": 2.777, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"learning_rate": 1.046891557753444e-05, |
|
"loss": 2.778, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 9.58583539385376e-06, |
|
"loss": 2.7765, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 19.48, |
|
"learning_rate": 8.703638290356763e-06, |
|
"loss": 2.7765, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 19.53, |
|
"learning_rate": 7.820558106676086e-06, |
|
"loss": 2.7713, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 6.938361003179088e-06, |
|
"loss": 2.7701, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 6.05528081949841e-06, |
|
"loss": 2.7758, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 5.172200635817732e-06, |
|
"loss": 2.7754, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"learning_rate": 4.290003532320734e-06, |
|
"loss": 2.7755, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 3.406923348640056e-06, |
|
"loss": 2.7752, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 2.523843164959378e-06, |
|
"loss": 2.7719, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 1.6407629812787e-06, |
|
"loss": 2.7787, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 7.576827975980219e-07, |
|
"loss": 2.7792, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4106218559881334, |
|
"eval_loss": 3.404461622238159, |
|
"eval_runtime": 153.6422, |
|
"eval_samples_per_second": 376.98, |
|
"eval_steps_per_second": 5.89, |
|
"step": 371720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 371720, |
|
"total_flos": 1.56614628708864e+18, |
|
"train_loss": 3.0647617085172687, |
|
"train_runtime": 80733.64, |
|
"train_samples_per_second": 147.334, |
|
"train_steps_per_second": 4.604 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 371720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 5000, |
|
"total_flos": 1.56614628708864e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|