|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990627928772259, |
|
"eval_steps": 134, |
|
"global_step": 533, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 3.2302, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 3.179551601409912, |
|
"eval_runtime": 55.4081, |
|
"eval_samples_per_second": 24.329, |
|
"eval_steps_per_second": 1.029, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 3.1024, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2e-05, |
|
"loss": 3.2198, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 3.0828, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-05, |
|
"loss": 2.9847, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4e-05, |
|
"loss": 2.7929, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 2.7878, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 2.6629, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6e-05, |
|
"loss": 2.6465, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4e-05, |
|
"loss": 2.5353, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 2.5154, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.498, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 2.4848, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 2.4794, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6e-05, |
|
"loss": 2.4636, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 2.4932, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 2.4167, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.2e-05, |
|
"loss": 2.4665, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.6e-05, |
|
"loss": 2.3863, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8e-05, |
|
"loss": 2.4089, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.4e-05, |
|
"loss": 2.4105, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 2.4187, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 2.3987, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.6e-05, |
|
"loss": 2.3898, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4524, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 2.4081, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00010800000000000001, |
|
"loss": 2.4231, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 2.3854, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000116, |
|
"loss": 2.4511, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00012, |
|
"loss": 2.4137, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000124, |
|
"loss": 2.4189, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 2.4123, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000132, |
|
"loss": 2.3896, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 2.4448, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00014, |
|
"loss": 2.3936, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000144, |
|
"loss": 2.4505, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000148, |
|
"loss": 2.4321, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000152, |
|
"loss": 2.4273, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00015600000000000002, |
|
"loss": 2.4394, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00016, |
|
"loss": 2.4661, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000164, |
|
"loss": 2.4693, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000168, |
|
"loss": 2.4416, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000172, |
|
"loss": 2.4219, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 2.4929, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018, |
|
"loss": 2.5098, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 2.5142, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000188, |
|
"loss": 2.4719, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000192, |
|
"loss": 2.4686, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000196, |
|
"loss": 2.4731, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4735, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999788469031855, |
|
"loss": 2.5597, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999153885076487, |
|
"loss": 2.4634, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019998096274980728, |
|
"loss": 2.4727, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019996615683488039, |
|
"loss": 2.4603, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019994712173236604, |
|
"loss": 2.4999, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000199923858247567, |
|
"loss": 2.4886, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019989636736467278, |
|
"loss": 2.5067, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019986465024671794, |
|
"loss": 2.4929, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019982870823553308, |
|
"loss": 2.4504, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019978854285168784, |
|
"loss": 2.4733, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019974415579442675, |
|
"loss": 2.5515, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019969554894159723, |
|
"loss": 2.4798, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019964272434957022, |
|
"loss": 2.5098, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019958568425315314, |
|
"loss": 2.5603, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019952443106549533, |
|
"loss": 2.68, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019945896737798603, |
|
"loss": 2.7329, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001993892959601447, |
|
"loss": 2.5781, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019931541975950378, |
|
"loss": 2.5786, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019923734190148418, |
|
"loss": 2.5429, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001991550656892628, |
|
"loss": 2.4816, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019906859460363307, |
|
"loss": 2.5149, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019897793230285748, |
|
"loss": 2.5982, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019888308262251285, |
|
"loss": 2.5852, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019878404957532814, |
|
"loss": 2.5715, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019868083735101463, |
|
"loss": 2.52, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019857345031608868, |
|
"loss": 2.5562, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001984618930136869, |
|
"loss": 2.4902, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001983461701633742, |
|
"loss": 2.4734, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001982262866609439, |
|
"loss": 2.4944, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019810224757821064, |
|
"loss": 2.4315, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019797405816279585, |
|
"loss": 2.4392, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019784172383790582, |
|
"loss": 2.4658, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019770525020210204, |
|
"loss": 2.4779, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019756464302906465, |
|
"loss": 2.4132, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019741990826734794, |
|
"loss": 2.4443, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001972710520401287, |
|
"loss": 2.5367, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001971180806449473, |
|
"loss": 2.4563, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019696100055344124, |
|
"loss": 2.4693, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001967998184110713, |
|
"loss": 2.4599, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001966345410368404, |
|
"loss": 2.458, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019646517542300514, |
|
"loss": 2.4395, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019629172873477995, |
|
"loss": 2.453, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000196114208310034, |
|
"loss": 2.4638, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019593262165898076, |
|
"loss": 2.4314, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019574697646386027, |
|
"loss": 2.4776, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001955572805786141, |
|
"loss": 2.4625, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019536354202855308, |
|
"loss": 2.4451, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001951657690100178, |
|
"loss": 2.4473, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019496396989003193, |
|
"loss": 2.4928, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001947581532059481, |
|
"loss": 2.4132, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001945483276650868, |
|
"loss": 2.4591, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019433450214436797, |
|
"loss": 2.465, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001941166856899355, |
|
"loss": 2.3805, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001938948875167745, |
|
"loss": 2.4012, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019366911700832145, |
|
"loss": 2.3852, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019343938371606712, |
|
"loss": 2.3642, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019320569735915271, |
|
"loss": 2.39, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001929680678239585, |
|
"loss": 2.4373, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001927265051636856, |
|
"loss": 2.4666, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019248101959793066, |
|
"loss": 2.4267, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001922316215122536, |
|
"loss": 2.4158, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001919783214577381, |
|
"loss": 2.3634, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019172113015054532, |
|
"loss": 2.4179, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001914600584714605, |
|
"loss": 2.364, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019119511746543263, |
|
"loss": 2.3435, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019092631834110723, |
|
"loss": 2.2741, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019065367247035213, |
|
"loss": 2.3201, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019037719138777627, |
|
"loss": 2.4475, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 2.3559, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001898127705363696, |
|
"loss": 2.3547, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018952485464603664, |
|
"loss": 2.2853, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018923315129986835, |
|
"loss": 2.4317, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018893767283872305, |
|
"loss": 2.3741, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001886384317631697, |
|
"loss": 2.2959, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018833544073295917, |
|
"loss": 2.3672, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018802871256648872, |
|
"loss": 2.3465, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018771826024025946, |
|
"loss": 2.2856, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018740409688832764, |
|
"loss": 2.28, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018708623580174889, |
|
"loss": 2.2592, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018676469042801588, |
|
"loss": 2.3545, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018643947437048944, |
|
"loss": 2.3505, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018611060138782305, |
|
"loss": 2.3164, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018577808539338087, |
|
"loss": 2.3547, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018544194045464886, |
|
"loss": 2.3772, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.3696775436401367, |
|
"eval_runtime": 55.6786, |
|
"eval_samples_per_second": 24.21, |
|
"eval_steps_per_second": 1.024, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018510218079263995, |
|
"loss": 2.4287, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018475882078129212, |
|
"loss": 2.3653, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018441187494686053, |
|
"loss": 2.3392, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018406135796730287, |
|
"loss": 2.3177, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018370728467165828, |
|
"loss": 2.3777, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001833496700394202, |
|
"loss": 2.3158, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018298852919990252, |
|
"loss": 2.3367, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001826238774315995, |
|
"loss": 2.231, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018225573016153945, |
|
"loss": 2.2859, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001818841029646321, |
|
"loss": 2.2594, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018150901156300956, |
|
"loss": 2.2945, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018113047182536127, |
|
"loss": 2.2277, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018074849976626274, |
|
"loss": 2.285, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018036311154549784, |
|
"loss": 2.287, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017997432346737524, |
|
"loss": 2.2308, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017958215198003865, |
|
"loss": 2.3425, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017918661367477098, |
|
"loss": 2.1763, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00017878772528529232, |
|
"loss": 2.3044, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017838550368705217, |
|
"loss": 2.1648, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001779799658965153, |
|
"loss": 2.3134, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000177571129070442, |
|
"loss": 2.2027, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001771590105051622, |
|
"loss": 2.2242, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017674362763584375, |
|
"loss": 2.2897, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017632499803575474, |
|
"loss": 2.1541, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017590313941552002, |
|
"loss": 2.173, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001754780696223722, |
|
"loss": 2.2662, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017504980663939613, |
|
"loss": 2.2121, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017461836858476856, |
|
"loss": 2.2703, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017418377371099136, |
|
"loss": 2.2875, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017374604040411935, |
|
"loss": 2.2159, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017330518718298264, |
|
"loss": 2.2517, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017286123269840293, |
|
"loss": 2.2079, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017241419573240462, |
|
"loss": 2.1622, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001719640951974202, |
|
"loss": 2.2229, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017151095013548994, |
|
"loss": 2.2151, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017105477971745666, |
|
"loss": 2.232, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017059560324215436, |
|
"loss": 2.2971, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017013344013559197, |
|
"loss": 2.1772, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00016966830995013133, |
|
"loss": 2.2384, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016920023236366002, |
|
"loss": 2.1376, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016872922717875923, |
|
"loss": 2.1521, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016825531432186543, |
|
"loss": 2.2222, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016777851384242766, |
|
"loss": 2.1104, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001672988459120594, |
|
"loss": 2.2398, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016681633082368498, |
|
"loss": 2.1505, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001663309889906811, |
|
"loss": 2.1549, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001658428409460134, |
|
"loss": 2.2301, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001653519073413675, |
|
"loss": 2.1479, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001648582089462756, |
|
"loss": 2.1095, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001643617666472376, |
|
"loss": 2.1487, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016386260144683745, |
|
"loss": 2.1334, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016336073446285485, |
|
"loss": 2.0953, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016285618692737157, |
|
"loss": 2.1386, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 2.1415, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001618391356963468, |
|
"loss": 2.0492, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016132667502837165, |
|
"loss": 2.056, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016081161986220807, |
|
"loss": 2.2249, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016029399198787974, |
|
"loss": 2.1115, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00015977381330425163, |
|
"loss": 2.1275, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00015925110581810394, |
|
"loss": 1.9615, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015872589164320078, |
|
"loss": 2.1591, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001581981929993547, |
|
"loss": 2.1233, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015766803221148673, |
|
"loss": 2.2147, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001571354317086818, |
|
"loss": 2.1625, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001566004140232399, |
|
"loss": 2.0565, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015606300178972287, |
|
"loss": 2.1389, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015552321774399666, |
|
"loss": 2.126, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015498108472226964, |
|
"loss": 2.1251, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015443662566012645, |
|
"loss": 2.0343, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015388986359155758, |
|
"loss": 2.076, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015334082164798489, |
|
"loss": 1.9876, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015278952305728324, |
|
"loss": 2.1316, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015223599114279755, |
|
"loss": 2.06, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015168024932235617, |
|
"loss": 2.0479, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015112232110728015, |
|
"loss": 1.9749, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015056223010138857, |
|
"loss": 2.1165, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 2.0656, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014943565458893, |
|
"loss": 2.0241, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014886921774348472, |
|
"loss": 2.0393, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014830071342745112, |
|
"loss": 2.0376, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014773016569208283, |
|
"loss": 2.0739, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001471575986750828, |
|
"loss": 2.0254, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001465830365995821, |
|
"loss": 2.0989, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014600650377311522, |
|
"loss": 2.1056, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014542802458659152, |
|
"loss": 2.0197, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014484762351326343, |
|
"loss": 2.055, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001442653251076912, |
|
"loss": 2.058, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014368115400470392, |
|
"loss": 1.9918, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014309513491835734, |
|
"loss": 2.0102, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014250729264088843, |
|
"loss": 2.0356, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014191765204166643, |
|
"loss": 2.0166, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014132623806614063, |
|
"loss": 1.9309, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014073307573478526, |
|
"loss": 2.0886, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014013819014204075, |
|
"loss": 1.973, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00013954160645525217, |
|
"loss": 1.9996, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00013894334991360448, |
|
"loss": 1.9444, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00013834344582705474, |
|
"loss": 2.0583, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00013774191957526143, |
|
"loss": 1.9805, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013713879660651068, |
|
"loss": 2.0694, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013653410243663952, |
|
"loss": 2.0294, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013592786264795658, |
|
"loss": 1.966, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001353201028881598, |
|
"loss": 1.8961, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013471084886925122, |
|
"loss": 2.0144, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013410012636644935, |
|
"loss": 1.9803, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013348796121709862, |
|
"loss": 1.984, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001328743793195764, |
|
"loss": 2.011, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013225940663219726, |
|
"loss": 1.9768, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013164306917211476, |
|
"loss": 1.9221, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013102539301422086, |
|
"loss": 1.9793, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013040640429004267, |
|
"loss": 1.944, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000129786129186637, |
|
"loss": 1.8944, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001291645939454825, |
|
"loss": 2.0456, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00012854182486136942, |
|
"loss": 1.992, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00012791784828128724, |
|
"loss": 1.9942, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012729269060330999, |
|
"loss": 1.9879, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012666637827547932, |
|
"loss": 1.9231, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012603893779468604, |
|
"loss": 1.9368, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001254103957055485, |
|
"loss": 1.856, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012478077859929, |
|
"loss": 1.9358, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001241501131126138, |
|
"loss": 1.995, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012351842592657613, |
|
"loss": 1.8455, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012288574376545733, |
|
"loss": 1.8912, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 1.9491, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012161750162443371, |
|
"loss": 1.8341, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00012098199529902648, |
|
"loss": 1.8941, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001203456013052634, |
|
"loss": 1.8808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011970834656655199, |
|
"loss": 1.8794, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011907025804271461, |
|
"loss": 1.8828, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011843136272884794, |
|
"loss": 1.818, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011779168765418079, |
|
"loss": 1.847, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011715125988093074, |
|
"loss": 1.8181, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011651010650315923, |
|
"loss": 1.8297, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011586825464562514, |
|
"loss": 1.9047, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011522573146263744, |
|
"loss": 1.8538, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8599034547805786, |
|
"eval_runtime": 55.6724, |
|
"eval_samples_per_second": 24.213, |
|
"eval_steps_per_second": 1.024, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011458256413690633, |
|
"loss": 2.0143, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011393877987839327, |
|
"loss": 1.7532, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011329440592315971, |
|
"loss": 1.825, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011264946953221496, |
|
"loss": 1.7548, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011200399799036288, |
|
"loss": 1.8505, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011135801860504749, |
|
"loss": 1.8608, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00011071155870519777, |
|
"loss": 1.8352, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00011006464564007138, |
|
"loss": 1.9602, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010941730677809772, |
|
"loss": 1.7833, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010876956950572006, |
|
"loss": 1.8567, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010812146122623683, |
|
"loss": 1.8606, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010747300935864243, |
|
"loss": 1.7611, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001068242413364671, |
|
"loss": 1.8188, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010617518460661644, |
|
"loss": 1.7576, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010552586662821009, |
|
"loss": 1.8525, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010487631487142017, |
|
"loss": 1.7134, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010422655681630917, |
|
"loss": 1.8414, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010357661995166705, |
|
"loss": 1.815, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010292653177384876, |
|
"loss": 1.7789, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010227631978561056, |
|
"loss": 1.82, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010162601149494676, |
|
"loss": 1.8095, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010097563441392581, |
|
"loss": 1.7666, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00010032521605752646, |
|
"loss": 1.8287, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.967478394247357e-05, |
|
"loss": 1.8818, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.90243655860742e-05, |
|
"loss": 1.7085, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.837398850505324e-05, |
|
"loss": 1.7111, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.772368021438943e-05, |
|
"loss": 1.7282, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.707346822615128e-05, |
|
"loss": 1.8105, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.642338004833295e-05, |
|
"loss": 1.7592, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.577344318369084e-05, |
|
"loss": 1.6769, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.512368512857984e-05, |
|
"loss": 1.721, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.447413337178995e-05, |
|
"loss": 1.6353, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.38248153933836e-05, |
|
"loss": 1.654, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.317575866353292e-05, |
|
"loss": 1.7222, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.252699064135758e-05, |
|
"loss": 1.7727, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.187853877376318e-05, |
|
"loss": 1.8161, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.123043049427995e-05, |
|
"loss": 1.7088, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.058269322190228e-05, |
|
"loss": 1.7274, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.993535435992867e-05, |
|
"loss": 1.6949, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.928844129480227e-05, |
|
"loss": 1.7429, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.86419813949525e-05, |
|
"loss": 1.7443, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.799600200963716e-05, |
|
"loss": 1.5959, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.735053046778506e-05, |
|
"loss": 1.6557, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.670559407684034e-05, |
|
"loss": 1.6743, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.606122012160676e-05, |
|
"loss": 1.7672, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.541743586309365e-05, |
|
"loss": 1.6774, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.477426853736257e-05, |
|
"loss": 1.6597, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.413174535437487e-05, |
|
"loss": 1.7112, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.348989349684076e-05, |
|
"loss": 1.6499, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.284874011906927e-05, |
|
"loss": 1.625, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.220831234581922e-05, |
|
"loss": 1.6922, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.156863727115211e-05, |
|
"loss": 1.6797, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.092974195728542e-05, |
|
"loss": 1.6772, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.029165343344805e-05, |
|
"loss": 1.6439, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.965439869473664e-05, |
|
"loss": 1.6358, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.901800470097355e-05, |
|
"loss": 1.6808, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.838249837556629e-05, |
|
"loss": 1.6013, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 1.6607, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.711425623454267e-05, |
|
"loss": 1.7424, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.648157407342386e-05, |
|
"loss": 1.6395, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.584988688738622e-05, |
|
"loss": 1.7159, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.521922140071002e-05, |
|
"loss": 1.57, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.458960429445157e-05, |
|
"loss": 1.6616, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.396106220531398e-05, |
|
"loss": 1.6996, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.333362172452065e-05, |
|
"loss": 1.6087, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.270730939669006e-05, |
|
"loss": 1.6516, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.208215171871277e-05, |
|
"loss": 1.6155, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.145817513863057e-05, |
|
"loss": 1.7189, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.08354060545175e-05, |
|
"loss": 1.6217, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.021387081336301e-05, |
|
"loss": 1.6072, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.959359570995738e-05, |
|
"loss": 1.6163, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.897460698577918e-05, |
|
"loss": 1.5596, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.835693082788525e-05, |
|
"loss": 1.5035, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.774059336780277e-05, |
|
"loss": 1.5525, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.712562068042361e-05, |
|
"loss": 1.578, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.651203878290139e-05, |
|
"loss": 1.5724, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.589987363355068e-05, |
|
"loss": 1.6454, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.528915113074879e-05, |
|
"loss": 1.5945, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.46798971118402e-05, |
|
"loss": 1.5424, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.407213735204343e-05, |
|
"loss": 1.6592, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.34658975633605e-05, |
|
"loss": 1.6077, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.286120339348935e-05, |
|
"loss": 1.5977, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.225808042473858e-05, |
|
"loss": 1.6713, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.165655417294527e-05, |
|
"loss": 1.4955, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.105665008639557e-05, |
|
"loss": 1.5353, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.045839354474786e-05, |
|
"loss": 1.4601, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.986180985795926e-05, |
|
"loss": 1.5212, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.926692426521474e-05, |
|
"loss": 1.5534, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.867376193385936e-05, |
|
"loss": 1.5822, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.8082347958333625e-05, |
|
"loss": 1.583, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.749270735911158e-05, |
|
"loss": 1.484, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.6904865081642676e-05, |
|
"loss": 1.404, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.631884599529611e-05, |
|
"loss": 1.4449, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.573467489230879e-05, |
|
"loss": 1.553, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.515237648673656e-05, |
|
"loss": 1.4247, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.457197541340853e-05, |
|
"loss": 1.4967, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.399349622688479e-05, |
|
"loss": 1.5317, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.3416963400417905e-05, |
|
"loss": 1.5611, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.284240132491727e-05, |
|
"loss": 1.588, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.226983430791722e-05, |
|
"loss": 1.5095, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.16992865725489e-05, |
|
"loss": 1.4771, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.113078225651529e-05, |
|
"loss": 1.4928, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.0564345411070025e-05, |
|
"loss": 1.5018, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.4768, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.943776989861145e-05, |
|
"loss": 1.5867, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.8877678892719866e-05, |
|
"loss": 1.4886, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.831975067764387e-05, |
|
"loss": 1.5394, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.7764008857202425e-05, |
|
"loss": 1.4141, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.721047694271676e-05, |
|
"loss": 1.4668, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.665917835201512e-05, |
|
"loss": 1.507, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.611013640844245e-05, |
|
"loss": 1.4728, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.556337433987359e-05, |
|
"loss": 1.4566, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.501891527773038e-05, |
|
"loss": 1.4617, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.447678225600337e-05, |
|
"loss": 1.4384, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.393699821027716e-05, |
|
"loss": 1.5093, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.3399585976760105e-05, |
|
"loss": 1.4602, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.286456829131821e-05, |
|
"loss": 1.3591, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2331967788513295e-05, |
|
"loss": 1.4804, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.180180700064531e-05, |
|
"loss": 1.4427, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.127410835679926e-05, |
|
"loss": 1.3901, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.074889418189608e-05, |
|
"loss": 1.4564, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.022618669574839e-05, |
|
"loss": 1.4548, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.97060080121203e-05, |
|
"loss": 1.4485, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.9188380137791936e-05, |
|
"loss": 1.418, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.8673324971628357e-05, |
|
"loss": 1.4664, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.816086430365321e-05, |
|
"loss": 1.4593, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 1.4556, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7143813072628465e-05, |
|
"loss": 1.3472, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.663926553714518e-05, |
|
"loss": 1.2962, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.613739855316257e-05, |
|
"loss": 1.4875, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.563823335276244e-05, |
|
"loss": 1.382, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.5141791053724405e-05, |
|
"loss": 1.3698, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.46480926586325e-05, |
|
"loss": 1.4564, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.415715905398664e-05, |
|
"loss": 1.5117, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.422906517982483, |
|
"eval_runtime": 55.7451, |
|
"eval_samples_per_second": 24.182, |
|
"eval_steps_per_second": 1.023, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.366901100931892e-05, |
|
"loss": 1.407, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.3183669176315045e-05, |
|
"loss": 1.3833, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.27011540879406e-05, |
|
"loss": 1.4071, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.2221486157572324e-05, |
|
"loss": 1.439, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.174468567813461e-05, |
|
"loss": 1.4009, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.1270772821240776e-05, |
|
"loss": 1.417, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.079976763633996e-05, |
|
"loss": 1.2888, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.033169004986873e-05, |
|
"loss": 1.475, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.986655986440805e-05, |
|
"loss": 1.3512, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.940439675784563e-05, |
|
"loss": 1.2825, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.894522028254334e-05, |
|
"loss": 1.3424, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.8489049864510054e-05, |
|
"loss": 1.391, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.803590480257985e-05, |
|
"loss": 1.2999, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7585804267595384e-05, |
|
"loss": 1.39, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7138767301597067e-05, |
|
"loss": 1.3686, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.669481281701739e-05, |
|
"loss": 1.3871, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.6253959595880673e-05, |
|
"loss": 1.3755, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.581622628900868e-05, |
|
"loss": 1.4435, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.5381631415231454e-05, |
|
"loss": 1.3808, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.495019336060387e-05, |
|
"loss": 1.4653, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.4521930377627812e-05, |
|
"loss": 1.3653, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.4096860584479974e-05, |
|
"loss": 1.4224, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.367500196424529e-05, |
|
"loss": 1.3416, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.3256372364156286e-05, |
|
"loss": 1.3177, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.2840989494837793e-05, |
|
"loss": 1.403, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.242887092955801e-05, |
|
"loss": 1.4582, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.202003410348473e-05, |
|
"loss": 1.4202, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.1614496312947852e-05, |
|
"loss": 1.3492, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.121227471470768e-05, |
|
"loss": 1.201, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0813386325229055e-05, |
|
"loss": 1.3131, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0417848019961372e-05, |
|
"loss": 1.3579, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.002567653262479e-05, |
|
"loss": 1.3943, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9636888454502178e-05, |
|
"loss": 1.4777, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.925150023373726e-05, |
|
"loss": 1.4449, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8869528174638752e-05, |
|
"loss": 1.3711, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8490988436990488e-05, |
|
"loss": 1.2473, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8115897035367934e-05, |
|
"loss": 1.4266, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.774426983846058e-05, |
|
"loss": 1.3646, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7376122568400532e-05, |
|
"loss": 1.2857, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7011470800097496e-05, |
|
"loss": 1.3349, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6650329960579792e-05, |
|
"loss": 1.3196, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6292715328341712e-05, |
|
"loss": 1.3533, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.593864203269716e-05, |
|
"loss": 1.3276, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5588125053139468e-05, |
|
"loss": 1.434, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5241179218707891e-05, |
|
"loss": 1.4045, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4897819207360098e-05, |
|
"loss": 1.4106, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4558059545351143e-05, |
|
"loss": 1.3125, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4221914606619135e-05, |
|
"loss": 1.2878, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3889398612176941e-05, |
|
"loss": 1.3981, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3560525629510568e-05, |
|
"loss": 1.2914, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3235309571984156e-05, |
|
"loss": 1.2669, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2913764198251132e-05, |
|
"loss": 1.405, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.259590311167238e-05, |
|
"loss": 1.3743, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2281739759740574e-05, |
|
"loss": 1.2633, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1971287433511313e-05, |
|
"loss": 1.2473, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.166455926704082e-05, |
|
"loss": 1.366, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1361568236830323e-05, |
|
"loss": 1.286, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1062327161276963e-05, |
|
"loss": 1.3363, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0766848700131648e-05, |
|
"loss": 1.3854, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.0475145353963389e-05, |
|
"loss": 1.4039, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.01872294636304e-05, |
|
"loss": 1.446, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 1.3371, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.62280861222372e-06, |
|
"loss": 1.3936, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.346327529647868e-06, |
|
"loss": 1.2979, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.073681658892775e-06, |
|
"loss": 1.3019, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.804882534567382e-06, |
|
"loss": 1.3869, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.53994152853952e-06, |
|
"loss": 1.3959, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.278869849454718e-06, |
|
"loss": 1.2561, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.021678542261924e-06, |
|
"loss": 1.3396, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.76837848774642e-06, |
|
"loss": 1.3443, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.5189804020693536e-06, |
|
"loss": 1.3167, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.2734948363144206e-06, |
|
"loss": 1.2964, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.031932176041523e-06, |
|
"loss": 1.3125, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.794302640847294e-06, |
|
"loss": 1.2952, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.560616283932897e-06, |
|
"loss": 1.2655, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.330882991678577e-06, |
|
"loss": 1.3877, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.1051124832254944e-06, |
|
"loss": 1.33, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.883314310064492e-06, |
|
"loss": 1.3948, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.6654978556320405e-06, |
|
"loss": 1.3569, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.451672334913216e-06, |
|
"loss": 1.2655, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.2418467940519185e-06, |
|
"loss": 1.3639, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.036030109968082e-06, |
|
"loss": 1.427, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.834230989982213e-06, |
|
"loss": 1.3306, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.63645797144695e-06, |
|
"loss": 1.3196, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.442719421385922e-06, |
|
"loss": 1.2518, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.253023536139733e-06, |
|
"loss": 1.2918, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.067378341019257e-06, |
|
"loss": 1.3128, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.885791689966023e-06, |
|
"loss": 1.3452, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.7082712652200867e-06, |
|
"loss": 1.4055, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5348245769948773e-06, |
|
"loss": 1.2194, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.3654589631595955e-06, |
|
"loss": 1.338, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2001815889286856e-06, |
|
"loss": 1.158, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.0389994465587545e-06, |
|
"loss": 1.3595, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.881919355052709e-06, |
|
"loss": 1.2421, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.728947959871353e-06, |
|
"loss": 1.3172, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.580091732652101e-06, |
|
"loss": 1.2558, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.435356970935354e-06, |
|
"loss": 1.2086, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.294749797897955e-06, |
|
"loss": 1.3303, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.158276162094197e-06, |
|
"loss": 1.3262, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0259418372041485e-06, |
|
"loss": 1.3066, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8977524217893783e-06, |
|
"loss": 1.3071, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7737133390561046e-06, |
|
"loss": 1.338, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6538298366257976e-06, |
|
"loss": 1.3407, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5381069863131037e-06, |
|
"loss": 1.4355, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.426549683911349e-06, |
|
"loss": 1.341, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3191626489853615e-06, |
|
"loss": 1.3546, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2159504246718522e-06, |
|
"loss": 1.3322, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1169173774871478e-06, |
|
"loss": 1.3374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0220676971425257e-06, |
|
"loss": 1.3706, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.314053963669245e-07, |
|
"loss": 1.3249, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.44934310737211e-07, |
|
"loss": 1.4235, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.62658098515856e-07, |
|
"loss": 1.2759, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.845802404962243e-07, |
|
"loss": 1.342, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.107040398553144e-07, |
|
"loss": 1.4004, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.410326220139705e-07, |
|
"loss": 1.3671, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.7556893450466653e-07, |
|
"loss": 1.2746, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.143157468468717e-07, |
|
"loss": 1.3585, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5727565042978915e-07, |
|
"loss": 1.3258, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.044510584027771e-07, |
|
"loss": 1.2909, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.558442055732524e-07, |
|
"loss": 1.2929, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.1145714831216634e-07, |
|
"loss": 1.389, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7129176446692984e-07, |
|
"loss": 1.4132, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3534975328205467e-07, |
|
"loss": 1.2848, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0363263532724432e-07, |
|
"loss": 1.3483, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.614175243301213e-08, |
|
"loss": 1.3588, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.287826763398229e-08, |
|
"loss": 1.3012, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.384316511964025e-08, |
|
"loss": 1.3524, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.9037250192732726e-08, |
|
"loss": 1.3232, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.46114923513941e-09, |
|
"loss": 1.3092, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.1153096814607865e-09, |
|
"loss": 1.2231, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.4531, |
|
"step": 533 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 533, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 4.023661144257331e+17, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|