smolm-autoreg-bpe-counterfactual-babylm-indef_articles_with_pl_nouns_removal-1e-3
/
trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 20.0, | |
"eval_steps": 500, | |
"global_step": 744020, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.03, | |
"learning_rate": 3.125e-05, | |
"loss": 6.2858, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 6.25e-05, | |
"loss": 5.118, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 9.375e-05, | |
"loss": 4.8257, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 0.000125, | |
"loss": 4.62, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 0.00015625, | |
"loss": 4.4689, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.0001875, | |
"loss": 4.3516, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 0.00021875, | |
"loss": 4.2636, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 0.00025, | |
"loss": 4.1906, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 0.00028125000000000003, | |
"loss": 4.1354, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 0.0003125, | |
"loss": 4.0582, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.00034365625, | |
"loss": 3.9986, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.00037490625, | |
"loss": 3.9535, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 0.00040615625, | |
"loss": 3.9158, | |
"step": 13000 | |
}, | |
{ | |
"epoch": 0.38, | |
"learning_rate": 0.00043737500000000005, | |
"loss": 3.8747, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 0.000468625, | |
"loss": 3.8471, | |
"step": 15000 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 0.00049984375, | |
"loss": 3.8178, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 0.00053109375, | |
"loss": 3.794, | |
"step": 17000 | |
}, | |
{ | |
"epoch": 0.48, | |
"learning_rate": 0.0005623125, | |
"loss": 3.7705, | |
"step": 18000 | |
}, | |
{ | |
"epoch": 0.51, | |
"learning_rate": 0.0005935625, | |
"loss": 3.757, | |
"step": 19000 | |
}, | |
{ | |
"epoch": 0.54, | |
"learning_rate": 0.00062478125, | |
"loss": 3.731, | |
"step": 20000 | |
}, | |
{ | |
"epoch": 0.56, | |
"learning_rate": 0.0006560312499999999, | |
"loss": 3.7135, | |
"step": 21000 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 0.00068725, | |
"loss": 3.6993, | |
"step": 22000 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 0.00071846875, | |
"loss": 3.6745, | |
"step": 23000 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 0.00074971875, | |
"loss": 3.668, | |
"step": 24000 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.0007809375, | |
"loss": 3.6522, | |
"step": 25000 | |
}, | |
{ | |
"epoch": 0.7, | |
"learning_rate": 0.0008121875, | |
"loss": 3.6422, | |
"step": 26000 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 0.0008434062500000001, | |
"loss": 3.6284, | |
"step": 27000 | |
}, | |
{ | |
"epoch": 0.75, | |
"learning_rate": 0.00087465625, | |
"loss": 3.6213, | |
"step": 28000 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.00090590625, | |
"loss": 3.6071, | |
"step": 29000 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.000937125, | |
"loss": 3.5975, | |
"step": 30000 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 0.000968375, | |
"loss": 3.5921, | |
"step": 31000 | |
}, | |
{ | |
"epoch": 0.86, | |
"learning_rate": 0.00099959375, | |
"loss": 3.5862, | |
"step": 32000 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.0009986138029830622, | |
"loss": 3.5652, | |
"step": 33000 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.000997209348052021, | |
"loss": 3.5558, | |
"step": 34000 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 0.0009958048931209798, | |
"loss": 3.5398, | |
"step": 35000 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 0.0009944018426448695, | |
"loss": 3.527, | |
"step": 36000 | |
}, | |
{ | |
"epoch": 0.99, | |
"learning_rate": 0.0009929973877138283, | |
"loss": 3.5148, | |
"step": 37000 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.3670570705333534, | |
"eval_loss": 3.7269980907440186, | |
"eval_runtime": 147.4611, | |
"eval_samples_per_second": 392.781, | |
"eval_steps_per_second": 6.137, | |
"step": 37201 | |
}, | |
{ | |
"epoch": 1.02, | |
"learning_rate": 0.0009915929327827871, | |
"loss": 3.4761, | |
"step": 38000 | |
}, | |
{ | |
"epoch": 1.05, | |
"learning_rate": 0.0009901898823066768, | |
"loss": 3.4631, | |
"step": 39000 | |
}, | |
{ | |
"epoch": 1.08, | |
"learning_rate": 0.0009887854273756356, | |
"loss": 3.4614, | |
"step": 40000 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 0.0009873809724445942, | |
"loss": 3.4538, | |
"step": 41000 | |
}, | |
{ | |
"epoch": 1.13, | |
"learning_rate": 0.000985976517513553, | |
"loss": 3.4587, | |
"step": 42000 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.0009845720625825118, | |
"loss": 3.4417, | |
"step": 43000 | |
}, | |
{ | |
"epoch": 1.18, | |
"learning_rate": 0.0009831690121064015, | |
"loss": 3.4351, | |
"step": 44000 | |
}, | |
{ | |
"epoch": 1.21, | |
"learning_rate": 0.0009817645571753603, | |
"loss": 3.4312, | |
"step": 45000 | |
}, | |
{ | |
"epoch": 1.24, | |
"learning_rate": 0.0009803601022443191, | |
"loss": 3.4235, | |
"step": 46000 | |
}, | |
{ | |
"epoch": 1.26, | |
"learning_rate": 0.0009789570517682088, | |
"loss": 3.4191, | |
"step": 47000 | |
}, | |
{ | |
"epoch": 1.29, | |
"learning_rate": 0.0009775525968371674, | |
"loss": 3.4077, | |
"step": 48000 | |
}, | |
{ | |
"epoch": 1.32, | |
"learning_rate": 0.0009761495463610572, | |
"loss": 3.4119, | |
"step": 49000 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 0.000974745091430016, | |
"loss": 3.392, | |
"step": 50000 | |
}, | |
{ | |
"epoch": 1.37, | |
"learning_rate": 0.0009733406364989748, | |
"loss": 3.3985, | |
"step": 51000 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.0009719361815679335, | |
"loss": 3.3884, | |
"step": 52000 | |
}, | |
{ | |
"epoch": 1.42, | |
"learning_rate": 0.0009705317266368922, | |
"loss": 3.3792, | |
"step": 53000 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 0.000969127271705851, | |
"loss": 3.3734, | |
"step": 54000 | |
}, | |
{ | |
"epoch": 1.48, | |
"learning_rate": 0.0009677242212297408, | |
"loss": 3.3699, | |
"step": 55000 | |
}, | |
{ | |
"epoch": 1.51, | |
"learning_rate": 0.0009663197662986994, | |
"loss": 3.3721, | |
"step": 56000 | |
}, | |
{ | |
"epoch": 1.53, | |
"learning_rate": 0.0009649167158225893, | |
"loss": 3.3688, | |
"step": 57000 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.000963512260891548, | |
"loss": 3.3598, | |
"step": 58000 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.0009621078059605067, | |
"loss": 3.3608, | |
"step": 59000 | |
}, | |
{ | |
"epoch": 1.61, | |
"learning_rate": 0.0009607047554843966, | |
"loss": 3.3529, | |
"step": 60000 | |
}, | |
{ | |
"epoch": 1.64, | |
"learning_rate": 0.0009593003005533553, | |
"loss": 3.3448, | |
"step": 61000 | |
}, | |
{ | |
"epoch": 1.67, | |
"learning_rate": 0.000957895845622314, | |
"loss": 3.3475, | |
"step": 62000 | |
}, | |
{ | |
"epoch": 1.69, | |
"learning_rate": 0.0009564927951462038, | |
"loss": 3.3339, | |
"step": 63000 | |
}, | |
{ | |
"epoch": 1.72, | |
"learning_rate": 0.0009550883402151626, | |
"loss": 3.3342, | |
"step": 64000 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.0009536852897390523, | |
"loss": 3.3323, | |
"step": 65000 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.0009522808348080109, | |
"loss": 3.3332, | |
"step": 66000 | |
}, | |
{ | |
"epoch": 1.8, | |
"learning_rate": 0.0009508763798769697, | |
"loss": 3.32, | |
"step": 67000 | |
}, | |
{ | |
"epoch": 1.83, | |
"learning_rate": 0.0009494733294008595, | |
"loss": 3.3214, | |
"step": 68000 | |
}, | |
{ | |
"epoch": 1.85, | |
"learning_rate": 0.0009480688744698182, | |
"loss": 3.3226, | |
"step": 69000 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 0.000946664419538777, | |
"loss": 3.3158, | |
"step": 70000 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.0009452613690626668, | |
"loss": 3.3179, | |
"step": 71000 | |
}, | |
{ | |
"epoch": 1.94, | |
"learning_rate": 0.0009438569141316255, | |
"loss": 3.3089, | |
"step": 72000 | |
}, | |
{ | |
"epoch": 1.96, | |
"learning_rate": 0.0009424524592005843, | |
"loss": 3.3103, | |
"step": 73000 | |
}, | |
{ | |
"epoch": 1.99, | |
"learning_rate": 0.0009410480042695429, | |
"loss": 3.3074, | |
"step": 74000 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.38968938775071477, | |
"eval_loss": 3.484098196029663, | |
"eval_runtime": 146.602, | |
"eval_samples_per_second": 395.083, | |
"eval_steps_per_second": 6.173, | |
"step": 74402 | |
}, | |
{ | |
"epoch": 2.02, | |
"learning_rate": 0.0009396435493385018, | |
"loss": 3.2586, | |
"step": 75000 | |
}, | |
{ | |
"epoch": 2.04, | |
"learning_rate": 0.0009382419033173227, | |
"loss": 3.2401, | |
"step": 76000 | |
}, | |
{ | |
"epoch": 2.07, | |
"learning_rate": 0.0009368374483862813, | |
"loss": 3.2462, | |
"step": 77000 | |
}, | |
{ | |
"epoch": 2.1, | |
"learning_rate": 0.00093543299345524, | |
"loss": 3.2422, | |
"step": 78000 | |
}, | |
{ | |
"epoch": 2.12, | |
"learning_rate": 0.0009340285385241988, | |
"loss": 3.2446, | |
"step": 79000 | |
}, | |
{ | |
"epoch": 2.15, | |
"learning_rate": 0.0009326254880480886, | |
"loss": 3.2477, | |
"step": 80000 | |
}, | |
{ | |
"epoch": 2.18, | |
"learning_rate": 0.0009312224375719783, | |
"loss": 3.2454, | |
"step": 81000 | |
}, | |
{ | |
"epoch": 2.2, | |
"learning_rate": 0.000929817982640937, | |
"loss": 3.2402, | |
"step": 82000 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.0009284135277098959, | |
"loss": 3.2414, | |
"step": 83000 | |
}, | |
{ | |
"epoch": 2.26, | |
"learning_rate": 0.0009270104772337856, | |
"loss": 3.2409, | |
"step": 84000 | |
}, | |
{ | |
"epoch": 2.28, | |
"learning_rate": 0.0009256060223027443, | |
"loss": 3.2413, | |
"step": 85000 | |
}, | |
{ | |
"epoch": 2.31, | |
"learning_rate": 0.000924201567371703, | |
"loss": 3.2392, | |
"step": 86000 | |
}, | |
{ | |
"epoch": 2.34, | |
"learning_rate": 0.0009227985168955928, | |
"loss": 3.2399, | |
"step": 87000 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 0.0009213940619645515, | |
"loss": 3.2404, | |
"step": 88000 | |
}, | |
{ | |
"epoch": 2.39, | |
"learning_rate": 0.0009199896070335103, | |
"loss": 3.2342, | |
"step": 89000 | |
}, | |
{ | |
"epoch": 2.42, | |
"learning_rate": 0.000918585152102469, | |
"loss": 3.2307, | |
"step": 90000 | |
}, | |
{ | |
"epoch": 2.45, | |
"learning_rate": 0.0009171821016263588, | |
"loss": 3.2342, | |
"step": 91000 | |
}, | |
{ | |
"epoch": 2.47, | |
"learning_rate": 0.0009157790511502487, | |
"loss": 3.2372, | |
"step": 92000 | |
}, | |
{ | |
"epoch": 2.5, | |
"learning_rate": 0.0009143745962192074, | |
"loss": 3.2274, | |
"step": 93000 | |
}, | |
{ | |
"epoch": 2.53, | |
"learning_rate": 0.0009129715457430971, | |
"loss": 3.2337, | |
"step": 94000 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.000911567090812056, | |
"loss": 3.2228, | |
"step": 95000 | |
}, | |
{ | |
"epoch": 2.58, | |
"learning_rate": 0.0009101626358810146, | |
"loss": 3.2285, | |
"step": 96000 | |
}, | |
{ | |
"epoch": 2.61, | |
"learning_rate": 0.0009087581809499733, | |
"loss": 3.2247, | |
"step": 97000 | |
}, | |
{ | |
"epoch": 2.63, | |
"learning_rate": 0.000907355130473863, | |
"loss": 3.2241, | |
"step": 98000 | |
}, | |
{ | |
"epoch": 2.66, | |
"learning_rate": 0.0009059520799977529, | |
"loss": 3.2243, | |
"step": 99000 | |
}, | |
{ | |
"epoch": 2.69, | |
"learning_rate": 0.0009045476250667116, | |
"loss": 3.221, | |
"step": 100000 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 0.0009031431701356703, | |
"loss": 3.2195, | |
"step": 101000 | |
}, | |
{ | |
"epoch": 2.74, | |
"learning_rate": 0.0009017401196595602, | |
"loss": 3.2168, | |
"step": 102000 | |
}, | |
{ | |
"epoch": 2.77, | |
"learning_rate": 0.0009003356647285189, | |
"loss": 3.2185, | |
"step": 103000 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 0.0008989326142524087, | |
"loss": 3.2177, | |
"step": 104000 | |
}, | |
{ | |
"epoch": 2.82, | |
"learning_rate": 0.0008975281593213675, | |
"loss": 3.2188, | |
"step": 105000 | |
}, | |
{ | |
"epoch": 2.85, | |
"learning_rate": 0.0008961237043903261, | |
"loss": 3.2152, | |
"step": 106000 | |
}, | |
{ | |
"epoch": 2.88, | |
"learning_rate": 0.0008947192494592848, | |
"loss": 3.2146, | |
"step": 107000 | |
}, | |
{ | |
"epoch": 2.9, | |
"learning_rate": 0.0008933176034381056, | |
"loss": 3.2083, | |
"step": 108000 | |
}, | |
{ | |
"epoch": 2.93, | |
"learning_rate": 0.0008919131485070644, | |
"loss": 3.2108, | |
"step": 109000 | |
}, | |
{ | |
"epoch": 2.96, | |
"learning_rate": 0.0008905086935760231, | |
"loss": 3.2138, | |
"step": 110000 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 0.0008891056430999129, | |
"loss": 3.1988, | |
"step": 111000 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.3979050669647656, | |
"eval_loss": 3.4299747943878174, | |
"eval_runtime": 149.1667, | |
"eval_samples_per_second": 388.29, | |
"eval_steps_per_second": 6.067, | |
"step": 111603 | |
}, | |
{ | |
"epoch": 3.01, | |
"learning_rate": 0.0008877011881688717, | |
"loss": 3.1786, | |
"step": 112000 | |
}, | |
{ | |
"epoch": 3.04, | |
"learning_rate": 0.0008862967332378304, | |
"loss": 3.14, | |
"step": 113000 | |
}, | |
{ | |
"epoch": 3.06, | |
"learning_rate": 0.0008848936827617202, | |
"loss": 3.1408, | |
"step": 114000 | |
}, | |
{ | |
"epoch": 3.09, | |
"learning_rate": 0.000883489227830679, | |
"loss": 3.1491, | |
"step": 115000 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.0008820847728996376, | |
"loss": 3.1428, | |
"step": 116000 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 0.0008806803179685963, | |
"loss": 3.1515, | |
"step": 117000 | |
}, | |
{ | |
"epoch": 3.17, | |
"learning_rate": 0.0008792772674924862, | |
"loss": 3.1396, | |
"step": 118000 | |
}, | |
{ | |
"epoch": 3.2, | |
"learning_rate": 0.0008778728125614449, | |
"loss": 3.1518, | |
"step": 119000 | |
}, | |
{ | |
"epoch": 3.23, | |
"learning_rate": 0.0008764683576304036, | |
"loss": 3.1464, | |
"step": 120000 | |
}, | |
{ | |
"epoch": 3.25, | |
"learning_rate": 0.0008750653071542935, | |
"loss": 3.1562, | |
"step": 121000 | |
}, | |
{ | |
"epoch": 3.28, | |
"learning_rate": 0.0008736608522232522, | |
"loss": 3.1454, | |
"step": 122000 | |
}, | |
{ | |
"epoch": 3.31, | |
"learning_rate": 0.0008722563972922109, | |
"loss": 3.153, | |
"step": 123000 | |
}, | |
{ | |
"epoch": 3.33, | |
"learning_rate": 0.0008708547512710317, | |
"loss": 3.1608, | |
"step": 124000 | |
}, | |
{ | |
"epoch": 3.36, | |
"learning_rate": 0.0008694502963399905, | |
"loss": 3.1545, | |
"step": 125000 | |
}, | |
{ | |
"epoch": 3.39, | |
"learning_rate": 0.0008680458414089491, | |
"loss": 3.1477, | |
"step": 126000 | |
}, | |
{ | |
"epoch": 3.41, | |
"learning_rate": 0.0008666427909328389, | |
"loss": 3.1499, | |
"step": 127000 | |
}, | |
{ | |
"epoch": 3.44, | |
"learning_rate": 0.0008652383360017977, | |
"loss": 3.1532, | |
"step": 128000 | |
}, | |
{ | |
"epoch": 3.47, | |
"learning_rate": 0.0008638338810707564, | |
"loss": 3.1507, | |
"step": 129000 | |
}, | |
{ | |
"epoch": 3.49, | |
"learning_rate": 0.0008624308305946462, | |
"loss": 3.1545, | |
"step": 130000 | |
}, | |
{ | |
"epoch": 3.52, | |
"learning_rate": 0.000861026375663605, | |
"loss": 3.1478, | |
"step": 131000 | |
}, | |
{ | |
"epoch": 3.55, | |
"learning_rate": 0.0008596233251874948, | |
"loss": 3.157, | |
"step": 132000 | |
}, | |
{ | |
"epoch": 3.58, | |
"learning_rate": 0.0008582188702564535, | |
"loss": 3.1439, | |
"step": 133000 | |
}, | |
{ | |
"epoch": 3.6, | |
"learning_rate": 0.0008568144153254123, | |
"loss": 3.1461, | |
"step": 134000 | |
}, | |
{ | |
"epoch": 3.63, | |
"learning_rate": 0.0008554099603943709, | |
"loss": 3.1467, | |
"step": 135000 | |
}, | |
{ | |
"epoch": 3.66, | |
"learning_rate": 0.0008540069099182607, | |
"loss": 3.1486, | |
"step": 136000 | |
}, | |
{ | |
"epoch": 3.68, | |
"learning_rate": 0.0008526024549872195, | |
"loss": 3.1467, | |
"step": 137000 | |
}, | |
{ | |
"epoch": 3.71, | |
"learning_rate": 0.0008511994045111093, | |
"loss": 3.1482, | |
"step": 138000 | |
}, | |
{ | |
"epoch": 3.74, | |
"learning_rate": 0.000849794949580068, | |
"loss": 3.1508, | |
"step": 139000 | |
}, | |
{ | |
"epoch": 3.76, | |
"learning_rate": 0.0008483904946490267, | |
"loss": 3.1574, | |
"step": 140000 | |
}, | |
{ | |
"epoch": 3.79, | |
"learning_rate": 0.0008469860397179855, | |
"loss": 3.1437, | |
"step": 141000 | |
}, | |
{ | |
"epoch": 3.82, | |
"learning_rate": 0.0008455815847869442, | |
"loss": 3.1427, | |
"step": 142000 | |
}, | |
{ | |
"epoch": 3.84, | |
"learning_rate": 0.000844178534310834, | |
"loss": 3.15, | |
"step": 143000 | |
}, | |
{ | |
"epoch": 3.87, | |
"learning_rate": 0.0008427740793797927, | |
"loss": 3.1489, | |
"step": 144000 | |
}, | |
{ | |
"epoch": 3.9, | |
"learning_rate": 0.0008413710289036824, | |
"loss": 3.1449, | |
"step": 145000 | |
}, | |
{ | |
"epoch": 3.92, | |
"learning_rate": 0.0008399665739726412, | |
"loss": 3.1465, | |
"step": 146000 | |
}, | |
{ | |
"epoch": 3.95, | |
"learning_rate": 0.0008385621190416, | |
"loss": 3.1375, | |
"step": 147000 | |
}, | |
{ | |
"epoch": 3.98, | |
"learning_rate": 0.0008371576641105587, | |
"loss": 3.152, | |
"step": 148000 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.4049755331384225, | |
"eval_loss": 3.3773725032806396, | |
"eval_runtime": 149.0965, | |
"eval_samples_per_second": 388.473, | |
"eval_steps_per_second": 6.07, | |
"step": 148804 | |
}, | |
{ | |
"epoch": 4.01, | |
"learning_rate": 0.0008357546136344485, | |
"loss": 3.1283, | |
"step": 149000 | |
}, | |
{ | |
"epoch": 4.03, | |
"learning_rate": 0.0008343515631583383, | |
"loss": 3.0733, | |
"step": 150000 | |
}, | |
{ | |
"epoch": 4.06, | |
"learning_rate": 0.000832947108227297, | |
"loss": 3.0775, | |
"step": 151000 | |
}, | |
{ | |
"epoch": 4.09, | |
"learning_rate": 0.0008315426532962557, | |
"loss": 3.0746, | |
"step": 152000 | |
}, | |
{ | |
"epoch": 4.11, | |
"learning_rate": 0.0008301381983652145, | |
"loss": 3.0835, | |
"step": 153000 | |
}, | |
{ | |
"epoch": 4.14, | |
"learning_rate": 0.0008287351478891042, | |
"loss": 3.0848, | |
"step": 154000 | |
}, | |
{ | |
"epoch": 4.17, | |
"learning_rate": 0.0008273306929580629, | |
"loss": 3.0851, | |
"step": 155000 | |
}, | |
{ | |
"epoch": 4.19, | |
"learning_rate": 0.0008259276424819527, | |
"loss": 3.0789, | |
"step": 156000 | |
}, | |
{ | |
"epoch": 4.22, | |
"learning_rate": 0.0008245231875509115, | |
"loss": 3.0933, | |
"step": 157000 | |
}, | |
{ | |
"epoch": 4.25, | |
"learning_rate": 0.0008231201370748013, | |
"loss": 3.0864, | |
"step": 158000 | |
}, | |
{ | |
"epoch": 4.27, | |
"learning_rate": 0.00082171568214376, | |
"loss": 3.0948, | |
"step": 159000 | |
}, | |
{ | |
"epoch": 4.3, | |
"learning_rate": 0.0008203126316676498, | |
"loss": 3.0914, | |
"step": 160000 | |
}, | |
{ | |
"epoch": 4.33, | |
"learning_rate": 0.0008189081767366086, | |
"loss": 3.0954, | |
"step": 161000 | |
}, | |
{ | |
"epoch": 4.35, | |
"learning_rate": 0.0008175037218055673, | |
"loss": 3.0923, | |
"step": 162000 | |
}, | |
{ | |
"epoch": 4.38, | |
"learning_rate": 0.000816099266874526, | |
"loss": 3.0964, | |
"step": 163000 | |
}, | |
{ | |
"epoch": 4.41, | |
"learning_rate": 0.0008146962163984158, | |
"loss": 3.0941, | |
"step": 164000 | |
}, | |
{ | |
"epoch": 4.44, | |
"learning_rate": 0.0008132917614673745, | |
"loss": 3.0902, | |
"step": 165000 | |
}, | |
{ | |
"epoch": 4.46, | |
"learning_rate": 0.0008118887109912643, | |
"loss": 3.0969, | |
"step": 166000 | |
}, | |
{ | |
"epoch": 4.49, | |
"learning_rate": 0.000810484256060223, | |
"loss": 3.0948, | |
"step": 167000 | |
}, | |
{ | |
"epoch": 4.52, | |
"learning_rate": 0.0008090798011291817, | |
"loss": 3.0874, | |
"step": 168000 | |
}, | |
{ | |
"epoch": 4.54, | |
"learning_rate": 0.0008076767506530715, | |
"loss": 3.0981, | |
"step": 169000 | |
}, | |
{ | |
"epoch": 4.57, | |
"learning_rate": 0.0008062722957220303, | |
"loss": 3.0934, | |
"step": 170000 | |
}, | |
{ | |
"epoch": 4.6, | |
"learning_rate": 0.000804867840790989, | |
"loss": 3.0974, | |
"step": 171000 | |
}, | |
{ | |
"epoch": 4.62, | |
"learning_rate": 0.0008034633858599479, | |
"loss": 3.0942, | |
"step": 172000 | |
}, | |
{ | |
"epoch": 4.65, | |
"learning_rate": 0.0008020603353838376, | |
"loss": 3.0917, | |
"step": 173000 | |
}, | |
{ | |
"epoch": 4.68, | |
"learning_rate": 0.0008006558804527962, | |
"loss": 3.0974, | |
"step": 174000 | |
}, | |
{ | |
"epoch": 4.7, | |
"learning_rate": 0.000799252829976686, | |
"loss": 3.0948, | |
"step": 175000 | |
}, | |
{ | |
"epoch": 4.73, | |
"learning_rate": 0.0007978483750456448, | |
"loss": 3.099, | |
"step": 176000 | |
}, | |
{ | |
"epoch": 4.76, | |
"learning_rate": 0.0007964439201146035, | |
"loss": 3.1001, | |
"step": 177000 | |
}, | |
{ | |
"epoch": 4.78, | |
"learning_rate": 0.0007950394651835623, | |
"loss": 3.0987, | |
"step": 178000 | |
}, | |
{ | |
"epoch": 4.81, | |
"learning_rate": 0.0007936364147074521, | |
"loss": 3.0925, | |
"step": 179000 | |
}, | |
{ | |
"epoch": 4.84, | |
"learning_rate": 0.0007922319597764108, | |
"loss": 3.0969, | |
"step": 180000 | |
}, | |
{ | |
"epoch": 4.87, | |
"learning_rate": 0.0007908289093003006, | |
"loss": 3.0986, | |
"step": 181000 | |
}, | |
{ | |
"epoch": 4.89, | |
"learning_rate": 0.0007894244543692594, | |
"loss": 3.095, | |
"step": 182000 | |
}, | |
{ | |
"epoch": 4.92, | |
"learning_rate": 0.000788019999438218, | |
"loss": 3.0932, | |
"step": 183000 | |
}, | |
{ | |
"epoch": 4.95, | |
"learning_rate": 0.0007866169489621078, | |
"loss": 3.0919, | |
"step": 184000 | |
}, | |
{ | |
"epoch": 4.97, | |
"learning_rate": 0.0007852124940310666, | |
"loss": 3.0978, | |
"step": 185000 | |
}, | |
{ | |
"epoch": 5.0, | |
"learning_rate": 0.0007838080391000253, | |
"loss": 3.0973, | |
"step": 186000 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.40901955199174495, | |
"eval_loss": 3.346210479736328, | |
"eval_runtime": 147.9489, | |
"eval_samples_per_second": 391.487, | |
"eval_steps_per_second": 6.117, | |
"step": 186005 | |
}, | |
{ | |
"epoch": 5.03, | |
"learning_rate": 0.000782403584168984, | |
"loss": 3.021, | |
"step": 187000 | |
}, | |
{ | |
"epoch": 5.05, | |
"learning_rate": 0.0007810005336928739, | |
"loss": 3.0218, | |
"step": 188000 | |
}, | |
{ | |
"epoch": 5.08, | |
"learning_rate": 0.0007795960787618326, | |
"loss": 3.0321, | |
"step": 189000 | |
}, | |
{ | |
"epoch": 5.11, | |
"learning_rate": 0.0007781930282857223, | |
"loss": 3.0359, | |
"step": 190000 | |
}, | |
{ | |
"epoch": 5.13, | |
"learning_rate": 0.0007767885733546812, | |
"loss": 3.0365, | |
"step": 191000 | |
}, | |
{ | |
"epoch": 5.16, | |
"learning_rate": 0.0007753855228785709, | |
"loss": 3.0411, | |
"step": 192000 | |
}, | |
{ | |
"epoch": 5.19, | |
"learning_rate": 0.0007739810679475295, | |
"loss": 3.0414, | |
"step": 193000 | |
}, | |
{ | |
"epoch": 5.21, | |
"learning_rate": 0.0007725766130164883, | |
"loss": 3.0395, | |
"step": 194000 | |
}, | |
{ | |
"epoch": 5.24, | |
"learning_rate": 0.0007711721580854471, | |
"loss": 3.042, | |
"step": 195000 | |
}, | |
{ | |
"epoch": 5.27, | |
"learning_rate": 0.0007697691076093368, | |
"loss": 3.0454, | |
"step": 196000 | |
}, | |
{ | |
"epoch": 5.3, | |
"learning_rate": 0.0007683646526782956, | |
"loss": 3.0444, | |
"step": 197000 | |
}, | |
{ | |
"epoch": 5.32, | |
"learning_rate": 0.0007669601977472544, | |
"loss": 3.0452, | |
"step": 198000 | |
}, | |
{ | |
"epoch": 5.35, | |
"learning_rate": 0.0007655557428162131, | |
"loss": 3.0496, | |
"step": 199000 | |
}, | |
{ | |
"epoch": 5.38, | |
"learning_rate": 0.0007641526923401028, | |
"loss": 3.0454, | |
"step": 200000 | |
}, | |
{ | |
"epoch": 5.4, | |
"learning_rate": 0.0007627482374090615, | |
"loss": 3.048, | |
"step": 201000 | |
}, | |
{ | |
"epoch": 5.43, | |
"learning_rate": 0.0007613451869329513, | |
"loss": 3.0478, | |
"step": 202000 | |
}, | |
{ | |
"epoch": 5.46, | |
"learning_rate": 0.00075994073200191, | |
"loss": 3.0495, | |
"step": 203000 | |
}, | |
{ | |
"epoch": 5.48, | |
"learning_rate": 0.0007585376815257999, | |
"loss": 3.0498, | |
"step": 204000 | |
}, | |
{ | |
"epoch": 5.51, | |
"learning_rate": 0.0007571332265947586, | |
"loss": 3.0483, | |
"step": 205000 | |
}, | |
{ | |
"epoch": 5.54, | |
"learning_rate": 0.0007557301761186483, | |
"loss": 3.0534, | |
"step": 206000 | |
}, | |
{ | |
"epoch": 5.56, | |
"learning_rate": 0.0007543257211876072, | |
"loss": 3.0583, | |
"step": 207000 | |
}, | |
{ | |
"epoch": 5.59, | |
"learning_rate": 0.0007529212662565659, | |
"loss": 3.0512, | |
"step": 208000 | |
}, | |
{ | |
"epoch": 5.62, | |
"learning_rate": 0.0007515182157804556, | |
"loss": 3.0507, | |
"step": 209000 | |
}, | |
{ | |
"epoch": 5.65, | |
"learning_rate": 0.0007501137608494145, | |
"loss": 3.0499, | |
"step": 210000 | |
}, | |
{ | |
"epoch": 5.67, | |
"learning_rate": 0.0007487107103733042, | |
"loss": 3.0572, | |
"step": 211000 | |
}, | |
{ | |
"epoch": 5.7, | |
"learning_rate": 0.0007473062554422628, | |
"loss": 3.0519, | |
"step": 212000 | |
}, | |
{ | |
"epoch": 5.73, | |
"learning_rate": 0.0007459018005112215, | |
"loss": 3.0495, | |
"step": 213000 | |
}, | |
{ | |
"epoch": 5.75, | |
"learning_rate": 0.0007444987500351114, | |
"loss": 3.0612, | |
"step": 214000 | |
}, | |
{ | |
"epoch": 5.78, | |
"learning_rate": 0.0007430956995590012, | |
"loss": 3.0547, | |
"step": 215000 | |
}, | |
{ | |
"epoch": 5.81, | |
"learning_rate": 0.0007416912446279599, | |
"loss": 3.054, | |
"step": 216000 | |
}, | |
{ | |
"epoch": 5.83, | |
"learning_rate": 0.0007402867896969187, | |
"loss": 3.0547, | |
"step": 217000 | |
}, | |
{ | |
"epoch": 5.86, | |
"learning_rate": 0.0007388823347658774, | |
"loss": 3.0588, | |
"step": 218000 | |
}, | |
{ | |
"epoch": 5.89, | |
"learning_rate": 0.0007374792842897672, | |
"loss": 3.0531, | |
"step": 219000 | |
}, | |
{ | |
"epoch": 5.91, | |
"learning_rate": 0.0007360762338136569, | |
"loss": 3.0585, | |
"step": 220000 | |
}, | |
{ | |
"epoch": 5.94, | |
"learning_rate": 0.0007346717788826157, | |
"loss": 3.0522, | |
"step": 221000 | |
}, | |
{ | |
"epoch": 5.97, | |
"learning_rate": 0.0007332673239515743, | |
"loss": 3.0604, | |
"step": 222000 | |
}, | |
{ | |
"epoch": 5.99, | |
"learning_rate": 0.0007318628690205332, | |
"loss": 3.0543, | |
"step": 223000 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.4064327960745534, | |
"eval_loss": 3.3686516284942627, | |
"eval_runtime": 149.1251, | |
"eval_samples_per_second": 388.399, | |
"eval_steps_per_second": 6.069, | |
"step": 223206 | |
}, | |
{ | |
"epoch": 6.02, | |
"learning_rate": 0.0007304598185444229, | |
"loss": 3.0023, | |
"step": 224000 | |
}, | |
{ | |
"epoch": 6.05, | |
"learning_rate": 0.0007290553636133816, | |
"loss": 2.9878, | |
"step": 225000 | |
}, | |
{ | |
"epoch": 6.08, | |
"learning_rate": 0.0007276509086823405, | |
"loss": 2.9862, | |
"step": 226000 | |
}, | |
{ | |
"epoch": 6.1, | |
"learning_rate": 0.0007262464537512992, | |
"loss": 2.993, | |
"step": 227000 | |
}, | |
{ | |
"epoch": 6.13, | |
"learning_rate": 0.0007248434032751889, | |
"loss": 2.9986, | |
"step": 228000 | |
}, | |
{ | |
"epoch": 6.16, | |
"learning_rate": 0.0007234389483441476, | |
"loss": 2.9984, | |
"step": 229000 | |
}, | |
{ | |
"epoch": 6.18, | |
"learning_rate": 0.0007220344934131064, | |
"loss": 2.9975, | |
"step": 230000 | |
}, | |
{ | |
"epoch": 6.21, | |
"learning_rate": 0.0007206314429369961, | |
"loss": 3.0058, | |
"step": 231000 | |
}, | |
{ | |
"epoch": 6.24, | |
"learning_rate": 0.0007192269880059548, | |
"loss": 3.003, | |
"step": 232000 | |
}, | |
{ | |
"epoch": 6.26, | |
"learning_rate": 0.0007178239375298447, | |
"loss": 3.0059, | |
"step": 233000 | |
}, | |
{ | |
"epoch": 6.29, | |
"learning_rate": 0.0007164194825988034, | |
"loss": 3.0085, | |
"step": 234000 | |
}, | |
{ | |
"epoch": 6.32, | |
"learning_rate": 0.0007150150276677621, | |
"loss": 3.0067, | |
"step": 235000 | |
}, | |
{ | |
"epoch": 6.34, | |
"learning_rate": 0.000713611977191652, | |
"loss": 3.0048, | |
"step": 236000 | |
}, | |
{ | |
"epoch": 6.37, | |
"learning_rate": 0.0007122075222606107, | |
"loss": 3.007, | |
"step": 237000 | |
}, | |
{ | |
"epoch": 6.4, | |
"learning_rate": 0.0007108044717845005, | |
"loss": 3.0163, | |
"step": 238000 | |
}, | |
{ | |
"epoch": 6.42, | |
"learning_rate": 0.0007094000168534593, | |
"loss": 3.0104, | |
"step": 239000 | |
}, | |
{ | |
"epoch": 6.45, | |
"learning_rate": 0.0007079955619224179, | |
"loss": 3.0145, | |
"step": 240000 | |
}, | |
{ | |
"epoch": 6.48, | |
"learning_rate": 0.0007065939159012388, | |
"loss": 3.0145, | |
"step": 241000 | |
}, | |
{ | |
"epoch": 6.51, | |
"learning_rate": 0.0007051894609701974, | |
"loss": 3.0127, | |
"step": 242000 | |
}, | |
{ | |
"epoch": 6.53, | |
"learning_rate": 0.0007037850060391562, | |
"loss": 3.0159, | |
"step": 243000 | |
}, | |
{ | |
"epoch": 6.56, | |
"learning_rate": 0.0007023805511081149, | |
"loss": 3.0151, | |
"step": 244000 | |
}, | |
{ | |
"epoch": 6.59, | |
"learning_rate": 0.0007009760961770736, | |
"loss": 3.02, | |
"step": 245000 | |
}, | |
{ | |
"epoch": 6.61, | |
"learning_rate": 0.0006995730457009635, | |
"loss": 3.019, | |
"step": 246000 | |
}, | |
{ | |
"epoch": 6.64, | |
"learning_rate": 0.0006981685907699222, | |
"loss": 3.0139, | |
"step": 247000 | |
}, | |
{ | |
"epoch": 6.67, | |
"learning_rate": 0.000696765540293812, | |
"loss": 3.0183, | |
"step": 248000 | |
}, | |
{ | |
"epoch": 6.69, | |
"learning_rate": 0.0006953610853627708, | |
"loss": 3.0203, | |
"step": 249000 | |
}, | |
{ | |
"epoch": 6.72, | |
"learning_rate": 0.0006939566304317294, | |
"loss": 3.024, | |
"step": 250000 | |
}, | |
{ | |
"epoch": 6.75, | |
"learning_rate": 0.0006925535799556192, | |
"loss": 3.0248, | |
"step": 251000 | |
}, | |
{ | |
"epoch": 6.77, | |
"learning_rate": 0.000691149125024578, | |
"loss": 3.0249, | |
"step": 252000 | |
}, | |
{ | |
"epoch": 6.8, | |
"learning_rate": 0.0006897460745484677, | |
"loss": 3.0181, | |
"step": 253000 | |
}, | |
{ | |
"epoch": 6.83, | |
"learning_rate": 0.0006883416196174265, | |
"loss": 3.0196, | |
"step": 254000 | |
}, | |
{ | |
"epoch": 6.85, | |
"learning_rate": 0.0006869371646863853, | |
"loss": 3.0244, | |
"step": 255000 | |
}, | |
{ | |
"epoch": 6.88, | |
"learning_rate": 0.000685534114210275, | |
"loss": 3.0251, | |
"step": 256000 | |
}, | |
{ | |
"epoch": 6.91, | |
"learning_rate": 0.0006841296592792338, | |
"loss": 3.0219, | |
"step": 257000 | |
}, | |
{ | |
"epoch": 6.94, | |
"learning_rate": 0.0006827266088031235, | |
"loss": 3.0203, | |
"step": 258000 | |
}, | |
{ | |
"epoch": 6.96, | |
"learning_rate": 0.0006813221538720823, | |
"loss": 3.0225, | |
"step": 259000 | |
}, | |
{ | |
"epoch": 6.99, | |
"learning_rate": 0.0006799176989410409, | |
"loss": 3.0161, | |
"step": 260000 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.4113539808027173, | |
"eval_loss": 3.339113473892212, | |
"eval_runtime": 148.8983, | |
"eval_samples_per_second": 388.99, | |
"eval_steps_per_second": 6.078, | |
"step": 260407 | |
}, | |
{ | |
"epoch": 7.02, | |
"learning_rate": 0.0006785132440099997, | |
"loss": 2.9764, | |
"step": 261000 | |
}, | |
{ | |
"epoch": 7.04, | |
"learning_rate": 0.0006771101935338895, | |
"loss": 2.9576, | |
"step": 262000 | |
}, | |
{ | |
"epoch": 7.07, | |
"learning_rate": 0.0006757057386028482, | |
"loss": 2.9529, | |
"step": 263000 | |
}, | |
{ | |
"epoch": 7.1, | |
"learning_rate": 0.0006743012836718069, | |
"loss": 2.9603, | |
"step": 264000 | |
}, | |
{ | |
"epoch": 7.12, | |
"learning_rate": 0.0006728968287407658, | |
"loss": 2.9641, | |
"step": 265000 | |
}, | |
{ | |
"epoch": 7.15, | |
"learning_rate": 0.0006714937782646555, | |
"loss": 2.9675, | |
"step": 266000 | |
}, | |
{ | |
"epoch": 7.18, | |
"learning_rate": 0.0006700907277885453, | |
"loss": 2.9641, | |
"step": 267000 | |
}, | |
{ | |
"epoch": 7.2, | |
"learning_rate": 0.0006686862728575041, | |
"loss": 2.9672, | |
"step": 268000 | |
}, | |
{ | |
"epoch": 7.23, | |
"learning_rate": 0.0006672818179264628, | |
"loss": 2.9774, | |
"step": 269000 | |
}, | |
{ | |
"epoch": 7.26, | |
"learning_rate": 0.0006658773629954214, | |
"loss": 2.9753, | |
"step": 270000 | |
}, | |
{ | |
"epoch": 7.28, | |
"learning_rate": 0.0006644743125193112, | |
"loss": 2.9674, | |
"step": 271000 | |
}, | |
{ | |
"epoch": 7.31, | |
"learning_rate": 0.00066306985758827, | |
"loss": 2.9716, | |
"step": 272000 | |
}, | |
{ | |
"epoch": 7.34, | |
"learning_rate": 0.0006616654026572287, | |
"loss": 2.9788, | |
"step": 273000 | |
}, | |
{ | |
"epoch": 7.37, | |
"learning_rate": 0.0006602623521811185, | |
"loss": 2.9791, | |
"step": 274000 | |
}, | |
{ | |
"epoch": 7.39, | |
"learning_rate": 0.0006588593017050083, | |
"loss": 2.9821, | |
"step": 275000 | |
}, | |
{ | |
"epoch": 7.42, | |
"learning_rate": 0.000657454846773967, | |
"loss": 2.9828, | |
"step": 276000 | |
}, | |
{ | |
"epoch": 7.45, | |
"learning_rate": 0.0006560503918429258, | |
"loss": 2.9802, | |
"step": 277000 | |
}, | |
{ | |
"epoch": 7.47, | |
"learning_rate": 0.0006546459369118846, | |
"loss": 2.9858, | |
"step": 278000 | |
}, | |
{ | |
"epoch": 7.5, | |
"learning_rate": 0.0006532428864357743, | |
"loss": 2.9814, | |
"step": 279000 | |
}, | |
{ | |
"epoch": 7.53, | |
"learning_rate": 0.0006518398359596641, | |
"loss": 2.9865, | |
"step": 280000 | |
}, | |
{ | |
"epoch": 7.55, | |
"learning_rate": 0.0006504353810286228, | |
"loss": 2.9894, | |
"step": 281000 | |
}, | |
{ | |
"epoch": 7.58, | |
"learning_rate": 0.0006490309260975815, | |
"loss": 2.9832, | |
"step": 282000 | |
}, | |
{ | |
"epoch": 7.61, | |
"learning_rate": 0.0006476264711665402, | |
"loss": 2.986, | |
"step": 283000 | |
}, | |
{ | |
"epoch": 7.63, | |
"learning_rate": 0.0006462234206904301, | |
"loss": 2.9924, | |
"step": 284000 | |
}, | |
{ | |
"epoch": 7.66, | |
"learning_rate": 0.0006448189657593888, | |
"loss": 2.9838, | |
"step": 285000 | |
}, | |
{ | |
"epoch": 7.69, | |
"learning_rate": 0.0006434145108283475, | |
"loss": 2.99, | |
"step": 286000 | |
}, | |
{ | |
"epoch": 7.71, | |
"learning_rate": 0.0006420100558973064, | |
"loss": 2.9873, | |
"step": 287000 | |
}, | |
{ | |
"epoch": 7.74, | |
"learning_rate": 0.0006406070054211961, | |
"loss": 2.9866, | |
"step": 288000 | |
}, | |
{ | |
"epoch": 7.77, | |
"learning_rate": 0.0006392025504901547, | |
"loss": 2.9869, | |
"step": 289000 | |
}, | |
{ | |
"epoch": 7.8, | |
"learning_rate": 0.0006377995000140445, | |
"loss": 2.9881, | |
"step": 290000 | |
}, | |
{ | |
"epoch": 7.82, | |
"learning_rate": 0.0006363964495379343, | |
"loss": 2.9825, | |
"step": 291000 | |
}, | |
{ | |
"epoch": 7.85, | |
"learning_rate": 0.000634991994606893, | |
"loss": 2.9951, | |
"step": 292000 | |
}, | |
{ | |
"epoch": 7.88, | |
"learning_rate": 0.0006335875396758518, | |
"loss": 2.9958, | |
"step": 293000 | |
}, | |
{ | |
"epoch": 7.9, | |
"learning_rate": 0.0006321830847448106, | |
"loss": 2.997, | |
"step": 294000 | |
}, | |
{ | |
"epoch": 7.93, | |
"learning_rate": 0.0006307800342687003, | |
"loss": 2.9886, | |
"step": 295000 | |
}, | |
{ | |
"epoch": 7.96, | |
"learning_rate": 0.0006293755793376591, | |
"loss": 3.0001, | |
"step": 296000 | |
}, | |
{ | |
"epoch": 7.98, | |
"learning_rate": 0.0006279725288615489, | |
"loss": 2.9858, | |
"step": 297000 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.4104764790291721, | |
"eval_loss": 3.347707748413086, | |
"eval_runtime": 148.9691, | |
"eval_samples_per_second": 388.806, | |
"eval_steps_per_second": 6.075, | |
"step": 297608 | |
}, | |
{ | |
"epoch": 8.01, | |
"learning_rate": 0.0006265680739305076, | |
"loss": 2.9621, | |
"step": 298000 | |
}, | |
{ | |
"epoch": 8.04, | |
"learning_rate": 0.0006251650234543974, | |
"loss": 2.9243, | |
"step": 299000 | |
}, | |
{ | |
"epoch": 8.06, | |
"learning_rate": 0.000623760568523356, | |
"loss": 2.9237, | |
"step": 300000 | |
}, | |
{ | |
"epoch": 8.09, | |
"learning_rate": 0.0006223575180472459, | |
"loss": 2.9296, | |
"step": 301000 | |
}, | |
{ | |
"epoch": 8.12, | |
"learning_rate": 0.0006209530631162046, | |
"loss": 2.9321, | |
"step": 302000 | |
}, | |
{ | |
"epoch": 8.14, | |
"learning_rate": 0.0006195486081851633, | |
"loss": 2.9411, | |
"step": 303000 | |
}, | |
{ | |
"epoch": 8.17, | |
"learning_rate": 0.0006181441532541221, | |
"loss": 2.9375, | |
"step": 304000 | |
}, | |
{ | |
"epoch": 8.2, | |
"learning_rate": 0.0006167396983230808, | |
"loss": 2.9463, | |
"step": 305000 | |
}, | |
{ | |
"epoch": 8.23, | |
"learning_rate": 0.0006153366478469706, | |
"loss": 2.9421, | |
"step": 306000 | |
}, | |
{ | |
"epoch": 8.25, | |
"learning_rate": 0.0006139321929159294, | |
"loss": 2.9412, | |
"step": 307000 | |
}, | |
{ | |
"epoch": 8.28, | |
"learning_rate": 0.0006125291424398192, | |
"loss": 2.9451, | |
"step": 308000 | |
}, | |
{ | |
"epoch": 8.31, | |
"learning_rate": 0.0006111246875087778, | |
"loss": 2.9487, | |
"step": 309000 | |
}, | |
{ | |
"epoch": 8.33, | |
"learning_rate": 0.0006097216370326676, | |
"loss": 2.9474, | |
"step": 310000 | |
}, | |
{ | |
"epoch": 8.36, | |
"learning_rate": 0.0006083171821016264, | |
"loss": 2.9504, | |
"step": 311000 | |
}, | |
{ | |
"epoch": 8.39, | |
"learning_rate": 0.0006069127271705851, | |
"loss": 2.9526, | |
"step": 312000 | |
}, | |
{ | |
"epoch": 8.41, | |
"learning_rate": 0.0006055096766944749, | |
"loss": 2.948, | |
"step": 313000 | |
}, | |
{ | |
"epoch": 8.44, | |
"learning_rate": 0.0006041066262183647, | |
"loss": 2.954, | |
"step": 314000 | |
}, | |
{ | |
"epoch": 8.47, | |
"learning_rate": 0.0006027021712873234, | |
"loss": 2.9529, | |
"step": 315000 | |
}, | |
{ | |
"epoch": 8.49, | |
"learning_rate": 0.0006012977163562821, | |
"loss": 2.9531, | |
"step": 316000 | |
}, | |
{ | |
"epoch": 8.52, | |
"learning_rate": 0.0005998932614252409, | |
"loss": 2.9572, | |
"step": 317000 | |
}, | |
{ | |
"epoch": 8.55, | |
"learning_rate": 0.0005984902109491307, | |
"loss": 2.9579, | |
"step": 318000 | |
}, | |
{ | |
"epoch": 8.58, | |
"learning_rate": 0.0005970857560180893, | |
"loss": 2.9599, | |
"step": 319000 | |
}, | |
{ | |
"epoch": 8.6, | |
"learning_rate": 0.0005956827055419792, | |
"loss": 2.9537, | |
"step": 320000 | |
}, | |
{ | |
"epoch": 8.63, | |
"learning_rate": 0.0005942782506109379, | |
"loss": 2.9554, | |
"step": 321000 | |
}, | |
{ | |
"epoch": 8.66, | |
"learning_rate": 0.0005928737956798966, | |
"loss": 2.9589, | |
"step": 322000 | |
}, | |
{ | |
"epoch": 8.68, | |
"learning_rate": 0.0005914707452037865, | |
"loss": 2.9607, | |
"step": 323000 | |
}, | |
{ | |
"epoch": 8.71, | |
"learning_rate": 0.0005900662902727452, | |
"loss": 2.958, | |
"step": 324000 | |
}, | |
{ | |
"epoch": 8.74, | |
"learning_rate": 0.0005886632397966349, | |
"loss": 2.9597, | |
"step": 325000 | |
}, | |
{ | |
"epoch": 8.76, | |
"learning_rate": 0.0005872587848655937, | |
"loss": 2.9666, | |
"step": 326000 | |
}, | |
{ | |
"epoch": 8.79, | |
"learning_rate": 0.0005858557343894835, | |
"loss": 2.9572, | |
"step": 327000 | |
}, | |
{ | |
"epoch": 8.82, | |
"learning_rate": 0.0005844512794584422, | |
"loss": 2.9654, | |
"step": 328000 | |
}, | |
{ | |
"epoch": 8.84, | |
"learning_rate": 0.000583048228982332, | |
"loss": 2.9631, | |
"step": 329000 | |
}, | |
{ | |
"epoch": 8.87, | |
"learning_rate": 0.0005816437740512907, | |
"loss": 2.9666, | |
"step": 330000 | |
}, | |
{ | |
"epoch": 8.9, | |
"learning_rate": 0.0005802393191202494, | |
"loss": 2.9719, | |
"step": 331000 | |
}, | |
{ | |
"epoch": 8.92, | |
"learning_rate": 0.0005788348641892081, | |
"loss": 2.9649, | |
"step": 332000 | |
}, | |
{ | |
"epoch": 8.95, | |
"learning_rate": 0.0005774332181680289, | |
"loss": 2.9659, | |
"step": 333000 | |
}, | |
{ | |
"epoch": 8.98, | |
"learning_rate": 0.0005760287632369877, | |
"loss": 2.9718, | |
"step": 334000 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.41122177107294106, | |
"eval_loss": 3.343648672103882, | |
"eval_runtime": 147.5276, | |
"eval_samples_per_second": 392.604, | |
"eval_steps_per_second": 6.134, | |
"step": 334809 | |
}, | |
{ | |
"epoch": 9.01, | |
"learning_rate": 0.0005746243083059465, | |
"loss": 2.9542, | |
"step": 335000 | |
}, | |
{ | |
"epoch": 9.03, | |
"learning_rate": 0.0005732212578298362, | |
"loss": 2.891, | |
"step": 336000 | |
}, | |
{ | |
"epoch": 9.06, | |
"learning_rate": 0.000571816802898795, | |
"loss": 2.9009, | |
"step": 337000 | |
}, | |
{ | |
"epoch": 9.09, | |
"learning_rate": 0.0005704123479677537, | |
"loss": 2.8991, | |
"step": 338000 | |
}, | |
{ | |
"epoch": 9.11, | |
"learning_rate": 0.0005690078930367125, | |
"loss": 2.9084, | |
"step": 339000 | |
}, | |
{ | |
"epoch": 9.14, | |
"learning_rate": 0.0005676048425606023, | |
"loss": 2.91, | |
"step": 340000 | |
}, | |
{ | |
"epoch": 9.17, | |
"learning_rate": 0.0005662003876295609, | |
"loss": 2.9123, | |
"step": 341000 | |
}, | |
{ | |
"epoch": 9.19, | |
"learning_rate": 0.0005647973371534507, | |
"loss": 2.9128, | |
"step": 342000 | |
}, | |
{ | |
"epoch": 9.22, | |
"learning_rate": 0.0005633942866773404, | |
"loss": 2.9138, | |
"step": 343000 | |
}, | |
{ | |
"epoch": 9.25, | |
"learning_rate": 0.0005619898317462993, | |
"loss": 2.9208, | |
"step": 344000 | |
}, | |
{ | |
"epoch": 9.27, | |
"learning_rate": 0.000560585376815258, | |
"loss": 2.9197, | |
"step": 345000 | |
}, | |
{ | |
"epoch": 9.3, | |
"learning_rate": 0.0005591809218842168, | |
"loss": 2.919, | |
"step": 346000 | |
}, | |
{ | |
"epoch": 9.33, | |
"learning_rate": 0.0005577778714081066, | |
"loss": 2.9241, | |
"step": 347000 | |
}, | |
{ | |
"epoch": 9.35, | |
"learning_rate": 0.0005563734164770653, | |
"loss": 2.9236, | |
"step": 348000 | |
}, | |
{ | |
"epoch": 9.38, | |
"learning_rate": 0.000554970366000955, | |
"loss": 2.9224, | |
"step": 349000 | |
}, | |
{ | |
"epoch": 9.41, | |
"learning_rate": 0.0005535673155248449, | |
"loss": 2.9247, | |
"step": 350000 | |
}, | |
{ | |
"epoch": 9.44, | |
"learning_rate": 0.0005521628605938036, | |
"loss": 2.9262, | |
"step": 351000 | |
}, | |
{ | |
"epoch": 9.46, | |
"learning_rate": 0.0005507584056627622, | |
"loss": 2.9309, | |
"step": 352000 | |
}, | |
{ | |
"epoch": 9.49, | |
"learning_rate": 0.000549353950731721, | |
"loss": 2.9299, | |
"step": 353000 | |
}, | |
{ | |
"epoch": 9.52, | |
"learning_rate": 0.0005479509002556108, | |
"loss": 2.9349, | |
"step": 354000 | |
}, | |
{ | |
"epoch": 9.54, | |
"learning_rate": 0.0005465464453245695, | |
"loss": 2.9367, | |
"step": 355000 | |
}, | |
{ | |
"epoch": 9.57, | |
"learning_rate": 0.0005451433948484594, | |
"loss": 2.933, | |
"step": 356000 | |
}, | |
{ | |
"epoch": 9.6, | |
"learning_rate": 0.0005437389399174181, | |
"loss": 2.9336, | |
"step": 357000 | |
}, | |
{ | |
"epoch": 9.62, | |
"learning_rate": 0.0005423344849863768, | |
"loss": 2.9349, | |
"step": 358000 | |
}, | |
{ | |
"epoch": 9.65, | |
"learning_rate": 0.0005409314345102666, | |
"loss": 2.9415, | |
"step": 359000 | |
}, | |
{ | |
"epoch": 9.68, | |
"learning_rate": 0.0005395269795792254, | |
"loss": 2.9328, | |
"step": 360000 | |
}, | |
{ | |
"epoch": 9.7, | |
"learning_rate": 0.0005381239291031151, | |
"loss": 2.9346, | |
"step": 361000 | |
}, | |
{ | |
"epoch": 9.73, | |
"learning_rate": 0.0005367194741720737, | |
"loss": 2.9391, | |
"step": 362000 | |
}, | |
{ | |
"epoch": 9.76, | |
"learning_rate": 0.0005353150192410326, | |
"loss": 2.9393, | |
"step": 363000 | |
}, | |
{ | |
"epoch": 9.78, | |
"learning_rate": 0.0005339119687649223, | |
"loss": 2.9419, | |
"step": 364000 | |
}, | |
{ | |
"epoch": 9.81, | |
"learning_rate": 0.000532507513833881, | |
"loss": 2.9377, | |
"step": 365000 | |
}, | |
{ | |
"epoch": 9.84, | |
"learning_rate": 0.0005311058678127018, | |
"loss": 2.9373, | |
"step": 366000 | |
}, | |
{ | |
"epoch": 9.87, | |
"learning_rate": 0.0005297014128816607, | |
"loss": 2.9416, | |
"step": 367000 | |
}, | |
{ | |
"epoch": 9.89, | |
"learning_rate": 0.0005282969579506194, | |
"loss": 2.9433, | |
"step": 368000 | |
}, | |
{ | |
"epoch": 9.92, | |
"learning_rate": 0.0005268925030195782, | |
"loss": 2.9406, | |
"step": 369000 | |
}, | |
{ | |
"epoch": 9.95, | |
"learning_rate": 0.000525489452543468, | |
"loss": 2.9419, | |
"step": 370000 | |
}, | |
{ | |
"epoch": 9.97, | |
"learning_rate": 0.0005240849976124267, | |
"loss": 2.9411, | |
"step": 371000 | |
}, | |
{ | |
"epoch": 10.0, | |
"learning_rate": 0.0005226805426813853, | |
"loss": 2.9399, | |
"step": 372000 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.41210269901326396, | |
"eval_loss": 3.345149278640747, | |
"eval_runtime": 148.3396, | |
"eval_samples_per_second": 390.455, | |
"eval_steps_per_second": 6.101, | |
"step": 372010 | |
}, | |
{ | |
"epoch": 10.03, | |
"learning_rate": 0.0005212774922052751, | |
"loss": 2.8712, | |
"step": 373000 | |
}, | |
{ | |
"epoch": 10.05, | |
"learning_rate": 0.0005198730372742338, | |
"loss": 2.8767, | |
"step": 374000 | |
}, | |
{ | |
"epoch": 10.08, | |
"learning_rate": 0.0005184699867981236, | |
"loss": 2.8784, | |
"step": 375000 | |
}, | |
{ | |
"epoch": 10.11, | |
"learning_rate": 0.0005170655318670824, | |
"loss": 2.8827, | |
"step": 376000 | |
}, | |
{ | |
"epoch": 10.13, | |
"learning_rate": 0.0005156624813909722, | |
"loss": 2.8869, | |
"step": 377000 | |
}, | |
{ | |
"epoch": 10.16, | |
"learning_rate": 0.0005142594309148619, | |
"loss": 2.8899, | |
"step": 378000 | |
}, | |
{ | |
"epoch": 10.19, | |
"learning_rate": 0.0005128549759838207, | |
"loss": 2.8921, | |
"step": 379000 | |
}, | |
{ | |
"epoch": 10.21, | |
"learning_rate": 0.0005114505210527795, | |
"loss": 2.8914, | |
"step": 380000 | |
}, | |
{ | |
"epoch": 10.24, | |
"learning_rate": 0.0005100460661217382, | |
"loss": 2.8919, | |
"step": 381000 | |
}, | |
{ | |
"epoch": 10.27, | |
"learning_rate": 0.0005086430156456279, | |
"loss": 2.9003, | |
"step": 382000 | |
}, | |
{ | |
"epoch": 10.3, | |
"learning_rate": 0.0005072385607145867, | |
"loss": 2.8972, | |
"step": 383000 | |
}, | |
{ | |
"epoch": 10.32, | |
"learning_rate": 0.0005058355102384764, | |
"loss": 2.8954, | |
"step": 384000 | |
}, | |
{ | |
"epoch": 10.35, | |
"learning_rate": 0.0005044310553074351, | |
"loss": 2.9014, | |
"step": 385000 | |
}, | |
{ | |
"epoch": 10.38, | |
"learning_rate": 0.000503026600376394, | |
"loss": 2.903, | |
"step": 386000 | |
}, | |
{ | |
"epoch": 10.4, | |
"learning_rate": 0.0005016235499002837, | |
"loss": 2.9039, | |
"step": 387000 | |
}, | |
{ | |
"epoch": 10.43, | |
"learning_rate": 0.0005002190949692424, | |
"loss": 2.8998, | |
"step": 388000 | |
}, | |
{ | |
"epoch": 10.46, | |
"learning_rate": 0.0004988160444931322, | |
"loss": 2.9079, | |
"step": 389000 | |
}, | |
{ | |
"epoch": 10.48, | |
"learning_rate": 0.000497412994017022, | |
"loss": 2.9058, | |
"step": 390000 | |
}, | |
{ | |
"epoch": 10.51, | |
"learning_rate": 0.0004960085390859808, | |
"loss": 2.9025, | |
"step": 391000 | |
}, | |
{ | |
"epoch": 10.54, | |
"learning_rate": 0.0004946040841549395, | |
"loss": 2.9084, | |
"step": 392000 | |
}, | |
{ | |
"epoch": 10.56, | |
"learning_rate": 0.0004931996292238982, | |
"loss": 2.9051, | |
"step": 393000 | |
}, | |
{ | |
"epoch": 10.59, | |
"learning_rate": 0.0004917965787477879, | |
"loss": 2.9119, | |
"step": 394000 | |
}, | |
{ | |
"epoch": 10.62, | |
"learning_rate": 0.0004903921238167468, | |
"loss": 2.909, | |
"step": 395000 | |
}, | |
{ | |
"epoch": 10.64, | |
"learning_rate": 0.0004889876688857055, | |
"loss": 2.9127, | |
"step": 396000 | |
}, | |
{ | |
"epoch": 10.67, | |
"learning_rate": 0.00048758461840959523, | |
"loss": 2.9129, | |
"step": 397000 | |
}, | |
{ | |
"epoch": 10.7, | |
"learning_rate": 0.000486180163478554, | |
"loss": 2.9112, | |
"step": 398000 | |
}, | |
{ | |
"epoch": 10.73, | |
"learning_rate": 0.00048477711300244376, | |
"loss": 2.9202, | |
"step": 399000 | |
}, | |
{ | |
"epoch": 10.75, | |
"learning_rate": 0.0004833726580714025, | |
"loss": 2.9161, | |
"step": 400000 | |
}, | |
{ | |
"epoch": 10.78, | |
"learning_rate": 0.00048196820314036124, | |
"loss": 2.9211, | |
"step": 401000 | |
}, | |
{ | |
"epoch": 10.81, | |
"learning_rate": 0.00048056374820931995, | |
"loss": 2.9192, | |
"step": 402000 | |
}, | |
{ | |
"epoch": 10.83, | |
"learning_rate": 0.00047916069773320976, | |
"loss": 2.9145, | |
"step": 403000 | |
}, | |
{ | |
"epoch": 10.86, | |
"learning_rate": 0.00047775624280216853, | |
"loss": 2.9171, | |
"step": 404000 | |
}, | |
{ | |
"epoch": 10.89, | |
"learning_rate": 0.0004763517878711272, | |
"loss": 2.9132, | |
"step": 405000 | |
}, | |
{ | |
"epoch": 10.91, | |
"learning_rate": 0.00047495014184994805, | |
"loss": 2.9178, | |
"step": 406000 | |
}, | |
{ | |
"epoch": 10.94, | |
"learning_rate": 0.00047354568691890676, | |
"loss": 2.9177, | |
"step": 407000 | |
}, | |
{ | |
"epoch": 10.97, | |
"learning_rate": 0.00047214123198786553, | |
"loss": 2.9154, | |
"step": 408000 | |
}, | |
{ | |
"epoch": 10.99, | |
"learning_rate": 0.0004707367770568243, | |
"loss": 2.9207, | |
"step": 409000 | |
}, | |
{ | |
"epoch": 11.0, | |
"eval_accuracy": 0.4129671679171056, | |
"eval_loss": 3.358556032180786, | |
"eval_runtime": 148.219, | |
"eval_samples_per_second": 390.773, | |
"eval_steps_per_second": 6.106, | |
"step": 409211 | |
}, | |
{ | |
"epoch": 11.02, | |
"learning_rate": 0.00046933232212578296, | |
"loss": 2.8685, | |
"step": 410000 | |
}, | |
{ | |
"epoch": 11.05, | |
"learning_rate": 0.00046792927164967277, | |
"loss": 2.8573, | |
"step": 411000 | |
}, | |
{ | |
"epoch": 11.07, | |
"learning_rate": 0.0004665248167186315, | |
"loss": 2.8612, | |
"step": 412000 | |
}, | |
{ | |
"epoch": 11.1, | |
"learning_rate": 0.0004651217662425213, | |
"loss": 2.862, | |
"step": 413000 | |
}, | |
{ | |
"epoch": 11.13, | |
"learning_rate": 0.00046371871576641105, | |
"loss": 2.8641, | |
"step": 414000 | |
}, | |
{ | |
"epoch": 11.16, | |
"learning_rate": 0.0004623142608353698, | |
"loss": 2.8669, | |
"step": 415000 | |
}, | |
{ | |
"epoch": 11.18, | |
"learning_rate": 0.00046090980590432853, | |
"loss": 2.8682, | |
"step": 416000 | |
}, | |
{ | |
"epoch": 11.21, | |
"learning_rate": 0.0004595053509732873, | |
"loss": 2.8753, | |
"step": 417000 | |
}, | |
{ | |
"epoch": 11.24, | |
"learning_rate": 0.00045810230049717705, | |
"loss": 2.8688, | |
"step": 418000 | |
}, | |
{ | |
"epoch": 11.26, | |
"learning_rate": 0.0004566978455661358, | |
"loss": 2.8752, | |
"step": 419000 | |
}, | |
{ | |
"epoch": 11.29, | |
"learning_rate": 0.0004552947950900256, | |
"loss": 2.8755, | |
"step": 420000 | |
}, | |
{ | |
"epoch": 11.32, | |
"learning_rate": 0.0004538903401589843, | |
"loss": 2.8753, | |
"step": 421000 | |
}, | |
{ | |
"epoch": 11.34, | |
"learning_rate": 0.00045248728968287405, | |
"loss": 2.8777, | |
"step": 422000 | |
}, | |
{ | |
"epoch": 11.37, | |
"learning_rate": 0.0004510828347518328, | |
"loss": 2.8822, | |
"step": 423000 | |
}, | |
{ | |
"epoch": 11.4, | |
"learning_rate": 0.00044968118873065367, | |
"loss": 2.8774, | |
"step": 424000 | |
}, | |
{ | |
"epoch": 11.42, | |
"learning_rate": 0.0004482767337996124, | |
"loss": 2.8873, | |
"step": 425000 | |
}, | |
{ | |
"epoch": 11.45, | |
"learning_rate": 0.0004468722788685711, | |
"loss": 2.8866, | |
"step": 426000 | |
}, | |
{ | |
"epoch": 11.48, | |
"learning_rate": 0.00044546782393752987, | |
"loss": 2.8853, | |
"step": 427000 | |
}, | |
{ | |
"epoch": 11.51, | |
"learning_rate": 0.0004440647734614197, | |
"loss": 2.8821, | |
"step": 428000 | |
}, | |
{ | |
"epoch": 11.53, | |
"learning_rate": 0.00044266031853037834, | |
"loss": 2.8884, | |
"step": 429000 | |
}, | |
{ | |
"epoch": 11.56, | |
"learning_rate": 0.0004412558635993371, | |
"loss": 2.8865, | |
"step": 430000 | |
}, | |
{ | |
"epoch": 11.59, | |
"learning_rate": 0.0004398528131232269, | |
"loss": 2.8887, | |
"step": 431000 | |
}, | |
{ | |
"epoch": 11.61, | |
"learning_rate": 0.00043844835819218563, | |
"loss": 2.8915, | |
"step": 432000 | |
}, | |
{ | |
"epoch": 11.64, | |
"learning_rate": 0.00043704530771607544, | |
"loss": 2.8884, | |
"step": 433000 | |
}, | |
{ | |
"epoch": 11.67, | |
"learning_rate": 0.0004356408527850341, | |
"loss": 2.8851, | |
"step": 434000 | |
}, | |
{ | |
"epoch": 11.69, | |
"learning_rate": 0.0004342378023089239, | |
"loss": 2.8882, | |
"step": 435000 | |
}, | |
{ | |
"epoch": 11.72, | |
"learning_rate": 0.0004328333473778827, | |
"loss": 2.8893, | |
"step": 436000 | |
}, | |
{ | |
"epoch": 11.75, | |
"learning_rate": 0.00043143029690177244, | |
"loss": 2.8898, | |
"step": 437000 | |
}, | |
{ | |
"epoch": 11.77, | |
"learning_rate": 0.0004300258419707312, | |
"loss": 2.8875, | |
"step": 438000 | |
}, | |
{ | |
"epoch": 11.8, | |
"learning_rate": 0.0004286213870396899, | |
"loss": 2.8952, | |
"step": 439000 | |
}, | |
{ | |
"epoch": 11.83, | |
"learning_rate": 0.0004272183365635797, | |
"loss": 2.8938, | |
"step": 440000 | |
}, | |
{ | |
"epoch": 11.85, | |
"learning_rate": 0.00042581388163253844, | |
"loss": 2.8965, | |
"step": 441000 | |
}, | |
{ | |
"epoch": 11.88, | |
"learning_rate": 0.00042440942670149716, | |
"loss": 2.8929, | |
"step": 442000 | |
}, | |
{ | |
"epoch": 11.91, | |
"learning_rate": 0.00042300637622538697, | |
"loss": 2.9017, | |
"step": 443000 | |
}, | |
{ | |
"epoch": 11.94, | |
"learning_rate": 0.0004216019212943457, | |
"loss": 2.895, | |
"step": 444000 | |
}, | |
{ | |
"epoch": 11.96, | |
"learning_rate": 0.00042019887081823544, | |
"loss": 2.892, | |
"step": 445000 | |
}, | |
{ | |
"epoch": 11.99, | |
"learning_rate": 0.0004187944158871942, | |
"loss": 2.8987, | |
"step": 446000 | |
}, | |
{ | |
"epoch": 12.0, | |
"eval_accuracy": 0.4122773663391878, | |
"eval_loss": 3.355417013168335, | |
"eval_runtime": 147.56, | |
"eval_samples_per_second": 392.518, | |
"eval_steps_per_second": 6.133, | |
"step": 446412 | |
}, | |
{ | |
"epoch": 12.02, | |
"learning_rate": 0.000417389960956153, | |
"loss": 2.8605, | |
"step": 447000 | |
}, | |
{ | |
"epoch": 12.04, | |
"learning_rate": 0.0004159883149349737, | |
"loss": 2.8334, | |
"step": 448000 | |
}, | |
{ | |
"epoch": 12.07, | |
"learning_rate": 0.0004145838600039325, | |
"loss": 2.8413, | |
"step": 449000 | |
}, | |
{ | |
"epoch": 12.1, | |
"learning_rate": 0.0004131794050728912, | |
"loss": 2.8457, | |
"step": 450000 | |
}, | |
{ | |
"epoch": 12.12, | |
"learning_rate": 0.00041177495014184997, | |
"loss": 2.8426, | |
"step": 451000 | |
}, | |
{ | |
"epoch": 12.15, | |
"learning_rate": 0.0004103718996657397, | |
"loss": 2.8498, | |
"step": 452000 | |
}, | |
{ | |
"epoch": 12.18, | |
"learning_rate": 0.0004089674447346985, | |
"loss": 2.8513, | |
"step": 453000 | |
}, | |
{ | |
"epoch": 12.2, | |
"learning_rate": 0.0004075629898036572, | |
"loss": 2.8469, | |
"step": 454000 | |
}, | |
{ | |
"epoch": 12.23, | |
"learning_rate": 0.0004061585348726159, | |
"loss": 2.8472, | |
"step": 455000 | |
}, | |
{ | |
"epoch": 12.26, | |
"learning_rate": 0.00040475548439650573, | |
"loss": 2.8486, | |
"step": 456000 | |
}, | |
{ | |
"epoch": 12.28, | |
"learning_rate": 0.0004033510294654645, | |
"loss": 2.8561, | |
"step": 457000 | |
}, | |
{ | |
"epoch": 12.31, | |
"learning_rate": 0.00040194797898935426, | |
"loss": 2.8519, | |
"step": 458000 | |
}, | |
{ | |
"epoch": 12.34, | |
"learning_rate": 0.00040054352405831297, | |
"loss": 2.8522, | |
"step": 459000 | |
}, | |
{ | |
"epoch": 12.37, | |
"learning_rate": 0.00039913906912727174, | |
"loss": 2.8585, | |
"step": 460000 | |
}, | |
{ | |
"epoch": 12.39, | |
"learning_rate": 0.00039773461419623045, | |
"loss": 2.8596, | |
"step": 461000 | |
}, | |
{ | |
"epoch": 12.42, | |
"learning_rate": 0.00039633156372012026, | |
"loss": 2.8608, | |
"step": 462000 | |
}, | |
{ | |
"epoch": 12.45, | |
"learning_rate": 0.00039492851324401, | |
"loss": 2.8572, | |
"step": 463000 | |
}, | |
{ | |
"epoch": 12.47, | |
"learning_rate": 0.00039352405831296873, | |
"loss": 2.8622, | |
"step": 464000 | |
}, | |
{ | |
"epoch": 12.5, | |
"learning_rate": 0.0003921196033819275, | |
"loss": 2.8632, | |
"step": 465000 | |
}, | |
{ | |
"epoch": 12.53, | |
"learning_rate": 0.00039071655290581726, | |
"loss": 2.8635, | |
"step": 466000 | |
}, | |
{ | |
"epoch": 12.55, | |
"learning_rate": 0.000389312097974776, | |
"loss": 2.8693, | |
"step": 467000 | |
}, | |
{ | |
"epoch": 12.58, | |
"learning_rate": 0.00038790764304373474, | |
"loss": 2.8685, | |
"step": 468000 | |
}, | |
{ | |
"epoch": 12.61, | |
"learning_rate": 0.00038650318811269345, | |
"loss": 2.863, | |
"step": 469000 | |
}, | |
{ | |
"epoch": 12.63, | |
"learning_rate": 0.00038510013763658327, | |
"loss": 2.8722, | |
"step": 470000 | |
}, | |
{ | |
"epoch": 12.66, | |
"learning_rate": 0.000383695682705542, | |
"loss": 2.8671, | |
"step": 471000 | |
}, | |
{ | |
"epoch": 12.69, | |
"learning_rate": 0.0003822912277745007, | |
"loss": 2.8703, | |
"step": 472000 | |
}, | |
{ | |
"epoch": 12.71, | |
"learning_rate": 0.0003808881772983905, | |
"loss": 2.8754, | |
"step": 473000 | |
}, | |
{ | |
"epoch": 12.74, | |
"learning_rate": 0.0003794837223673492, | |
"loss": 2.8687, | |
"step": 474000 | |
}, | |
{ | |
"epoch": 12.77, | |
"learning_rate": 0.00037808067189123903, | |
"loss": 2.8734, | |
"step": 475000 | |
}, | |
{ | |
"epoch": 12.8, | |
"learning_rate": 0.0003766762169601978, | |
"loss": 2.8748, | |
"step": 476000 | |
}, | |
{ | |
"epoch": 12.82, | |
"learning_rate": 0.00037527316648408755, | |
"loss": 2.8773, | |
"step": 477000 | |
}, | |
{ | |
"epoch": 12.85, | |
"learning_rate": 0.00037386871155304627, | |
"loss": 2.8739, | |
"step": 478000 | |
}, | |
{ | |
"epoch": 12.88, | |
"learning_rate": 0.000372465661076936, | |
"loss": 2.8759, | |
"step": 479000 | |
}, | |
{ | |
"epoch": 12.9, | |
"learning_rate": 0.0003710612061458948, | |
"loss": 2.8762, | |
"step": 480000 | |
}, | |
{ | |
"epoch": 12.93, | |
"learning_rate": 0.00036965675121485356, | |
"loss": 2.8766, | |
"step": 481000 | |
}, | |
{ | |
"epoch": 12.96, | |
"learning_rate": 0.0003682522962838122, | |
"loss": 2.8768, | |
"step": 482000 | |
}, | |
{ | |
"epoch": 12.98, | |
"learning_rate": 0.000366847841352771, | |
"loss": 2.8779, | |
"step": 483000 | |
}, | |
{ | |
"epoch": 13.0, | |
"eval_accuracy": 0.41304986617795647, | |
"eval_loss": 3.3615658283233643, | |
"eval_runtime": 148.2115, | |
"eval_samples_per_second": 390.793, | |
"eval_steps_per_second": 6.106, | |
"step": 483613 | |
}, | |
{ | |
"epoch": 13.01, | |
"learning_rate": 0.00036544479087666074, | |
"loss": 2.8498, | |
"step": 484000 | |
}, | |
{ | |
"epoch": 13.04, | |
"learning_rate": 0.00036404174040055055, | |
"loss": 2.8192, | |
"step": 485000 | |
}, | |
{ | |
"epoch": 13.06, | |
"learning_rate": 0.0003626372854695093, | |
"loss": 2.8181, | |
"step": 486000 | |
}, | |
{ | |
"epoch": 13.09, | |
"learning_rate": 0.000361232830538468, | |
"loss": 2.8195, | |
"step": 487000 | |
}, | |
{ | |
"epoch": 13.12, | |
"learning_rate": 0.00035982837560742675, | |
"loss": 2.8275, | |
"step": 488000 | |
}, | |
{ | |
"epoch": 13.14, | |
"learning_rate": 0.0003584239206763855, | |
"loss": 2.8255, | |
"step": 489000 | |
}, | |
{ | |
"epoch": 13.17, | |
"learning_rate": 0.0003570208702002753, | |
"loss": 2.8286, | |
"step": 490000 | |
}, | |
{ | |
"epoch": 13.2, | |
"learning_rate": 0.000355616415269234, | |
"loss": 2.8343, | |
"step": 491000 | |
}, | |
{ | |
"epoch": 13.23, | |
"learning_rate": 0.00035421336479312375, | |
"loss": 2.8334, | |
"step": 492000 | |
}, | |
{ | |
"epoch": 13.25, | |
"learning_rate": 0.0003528089098620825, | |
"loss": 2.8299, | |
"step": 493000 | |
}, | |
{ | |
"epoch": 13.28, | |
"learning_rate": 0.0003514044549310413, | |
"loss": 2.8365, | |
"step": 494000 | |
}, | |
{ | |
"epoch": 13.31, | |
"learning_rate": 0.00035, | |
"loss": 2.8353, | |
"step": 495000 | |
}, | |
{ | |
"epoch": 13.33, | |
"learning_rate": 0.00034859694952388975, | |
"loss": 2.8377, | |
"step": 496000 | |
}, | |
{ | |
"epoch": 13.36, | |
"learning_rate": 0.0003471924945928485, | |
"loss": 2.8407, | |
"step": 497000 | |
}, | |
{ | |
"epoch": 13.39, | |
"learning_rate": 0.0003457880396618073, | |
"loss": 2.8428, | |
"step": 498000 | |
}, | |
{ | |
"epoch": 13.41, | |
"learning_rate": 0.00034438498918569705, | |
"loss": 2.8405, | |
"step": 499000 | |
}, | |
{ | |
"epoch": 13.44, | |
"learning_rate": 0.00034298053425465576, | |
"loss": 2.8419, | |
"step": 500000 | |
}, | |
{ | |
"epoch": 13.47, | |
"learning_rate": 0.0003415774837785455, | |
"loss": 2.8469, | |
"step": 501000 | |
}, | |
{ | |
"epoch": 13.49, | |
"learning_rate": 0.0003401730288475043, | |
"loss": 2.8488, | |
"step": 502000 | |
}, | |
{ | |
"epoch": 13.52, | |
"learning_rate": 0.00033876997837139404, | |
"loss": 2.8434, | |
"step": 503000 | |
}, | |
{ | |
"epoch": 13.55, | |
"learning_rate": 0.0003373655234403528, | |
"loss": 2.8472, | |
"step": 504000 | |
}, | |
{ | |
"epoch": 13.57, | |
"learning_rate": 0.0003359610685093115, | |
"loss": 2.8471, | |
"step": 505000 | |
}, | |
{ | |
"epoch": 13.6, | |
"learning_rate": 0.0003345566135782703, | |
"loss": 2.8512, | |
"step": 506000 | |
}, | |
{ | |
"epoch": 13.63, | |
"learning_rate": 0.00033315496755709114, | |
"loss": 2.8477, | |
"step": 507000 | |
}, | |
{ | |
"epoch": 13.66, | |
"learning_rate": 0.0003317505126260498, | |
"loss": 2.8482, | |
"step": 508000 | |
}, | |
{ | |
"epoch": 13.68, | |
"learning_rate": 0.00033034605769500857, | |
"loss": 2.8487, | |
"step": 509000 | |
}, | |
{ | |
"epoch": 13.71, | |
"learning_rate": 0.0003289416027639673, | |
"loss": 2.8496, | |
"step": 510000 | |
}, | |
{ | |
"epoch": 13.74, | |
"learning_rate": 0.0003275385522878571, | |
"loss": 2.8543, | |
"step": 511000 | |
}, | |
{ | |
"epoch": 13.76, | |
"learning_rate": 0.0003261355018117469, | |
"loss": 2.851, | |
"step": 512000 | |
}, | |
{ | |
"epoch": 13.79, | |
"learning_rate": 0.00032473104688070557, | |
"loss": 2.853, | |
"step": 513000 | |
}, | |
{ | |
"epoch": 13.82, | |
"learning_rate": 0.0003233279964045954, | |
"loss": 2.8558, | |
"step": 514000 | |
}, | |
{ | |
"epoch": 13.84, | |
"learning_rate": 0.00032192354147355415, | |
"loss": 2.8448, | |
"step": 515000 | |
}, | |
{ | |
"epoch": 13.87, | |
"learning_rate": 0.00032051908654251286, | |
"loss": 2.8517, | |
"step": 516000 | |
}, | |
{ | |
"epoch": 13.9, | |
"learning_rate": 0.0003191146316114716, | |
"loss": 2.8543, | |
"step": 517000 | |
}, | |
{ | |
"epoch": 13.92, | |
"learning_rate": 0.0003177115811353614, | |
"loss": 2.8519, | |
"step": 518000 | |
}, | |
{ | |
"epoch": 13.95, | |
"learning_rate": 0.0003163071262043201, | |
"loss": 2.8479, | |
"step": 519000 | |
}, | |
{ | |
"epoch": 13.98, | |
"learning_rate": 0.00031490267127327887, | |
"loss": 2.8519, | |
"step": 520000 | |
}, | |
{ | |
"epoch": 14.0, | |
"eval_accuracy": 0.41285833673710687, | |
"eval_loss": 3.369619846343994, | |
"eval_runtime": 147.964, | |
"eval_samples_per_second": 391.446, | |
"eval_steps_per_second": 6.116, | |
"step": 520814 | |
}, | |
{ | |
"epoch": 14.0, | |
"learning_rate": 0.0003134982163422376, | |
"loss": 2.8511, | |
"step": 521000 | |
}, | |
{ | |
"epoch": 14.03, | |
"learning_rate": 0.00031209376141119635, | |
"loss": 2.802, | |
"step": 522000 | |
}, | |
{ | |
"epoch": 14.06, | |
"learning_rate": 0.0003106907109350861, | |
"loss": 2.7993, | |
"step": 523000 | |
}, | |
{ | |
"epoch": 14.09, | |
"learning_rate": 0.0003092862560040449, | |
"loss": 2.8106, | |
"step": 524000 | |
}, | |
{ | |
"epoch": 14.11, | |
"learning_rate": 0.0003078818010730036, | |
"loss": 2.8026, | |
"step": 525000 | |
}, | |
{ | |
"epoch": 14.14, | |
"learning_rate": 0.00030647875059689334, | |
"loss": 2.8074, | |
"step": 526000 | |
}, | |
{ | |
"epoch": 14.17, | |
"learning_rate": 0.0003050742956658521, | |
"loss": 2.8076, | |
"step": 527000 | |
}, | |
{ | |
"epoch": 14.19, | |
"learning_rate": 0.00030367124518974187, | |
"loss": 2.8154, | |
"step": 528000 | |
}, | |
{ | |
"epoch": 14.22, | |
"learning_rate": 0.00030226679025870064, | |
"loss": 2.8121, | |
"step": 529000 | |
}, | |
{ | |
"epoch": 14.25, | |
"learning_rate": 0.0003008637397825904, | |
"loss": 2.816, | |
"step": 530000 | |
}, | |
{ | |
"epoch": 14.27, | |
"learning_rate": 0.0002994592848515491, | |
"loss": 2.8158, | |
"step": 531000 | |
}, | |
{ | |
"epoch": 14.3, | |
"learning_rate": 0.00029805623437543886, | |
"loss": 2.8182, | |
"step": 532000 | |
}, | |
{ | |
"epoch": 14.33, | |
"learning_rate": 0.0002966531838993287, | |
"loss": 2.8169, | |
"step": 533000 | |
}, | |
{ | |
"epoch": 14.35, | |
"learning_rate": 0.00029524872896828744, | |
"loss": 2.8197, | |
"step": 534000 | |
}, | |
{ | |
"epoch": 14.38, | |
"learning_rate": 0.00029384427403724616, | |
"loss": 2.818, | |
"step": 535000 | |
}, | |
{ | |
"epoch": 14.41, | |
"learning_rate": 0.00029243981910620487, | |
"loss": 2.821, | |
"step": 536000 | |
}, | |
{ | |
"epoch": 14.44, | |
"learning_rate": 0.0002910367686300946, | |
"loss": 2.8227, | |
"step": 537000 | |
}, | |
{ | |
"epoch": 14.46, | |
"learning_rate": 0.0002896323136990534, | |
"loss": 2.8222, | |
"step": 538000 | |
}, | |
{ | |
"epoch": 14.49, | |
"learning_rate": 0.0002882292632229432, | |
"loss": 2.8308, | |
"step": 539000 | |
}, | |
{ | |
"epoch": 14.52, | |
"learning_rate": 0.0002868248082919019, | |
"loss": 2.8315, | |
"step": 540000 | |
}, | |
{ | |
"epoch": 14.54, | |
"learning_rate": 0.00028542035336086063, | |
"loss": 2.8244, | |
"step": 541000 | |
}, | |
{ | |
"epoch": 14.57, | |
"learning_rate": 0.0002840173028847504, | |
"loss": 2.8245, | |
"step": 542000 | |
}, | |
{ | |
"epoch": 14.6, | |
"learning_rate": 0.00028261284795370916, | |
"loss": 2.8289, | |
"step": 543000 | |
}, | |
{ | |
"epoch": 14.62, | |
"learning_rate": 0.00028120979747759897, | |
"loss": 2.8252, | |
"step": 544000 | |
}, | |
{ | |
"epoch": 14.65, | |
"learning_rate": 0.0002798053425465577, | |
"loss": 2.8265, | |
"step": 545000 | |
}, | |
{ | |
"epoch": 14.68, | |
"learning_rate": 0.0002784022920704475, | |
"loss": 2.8309, | |
"step": 546000 | |
}, | |
{ | |
"epoch": 14.7, | |
"learning_rate": 0.00027699924159433725, | |
"loss": 2.8286, | |
"step": 547000 | |
}, | |
{ | |
"epoch": 14.73, | |
"learning_rate": 0.00027559478666329596, | |
"loss": 2.8289, | |
"step": 548000 | |
}, | |
{ | |
"epoch": 14.76, | |
"learning_rate": 0.00027419033173225473, | |
"loss": 2.8297, | |
"step": 549000 | |
}, | |
{ | |
"epoch": 14.78, | |
"learning_rate": 0.00027278587680121345, | |
"loss": 2.8295, | |
"step": 550000 | |
}, | |
{ | |
"epoch": 14.81, | |
"learning_rate": 0.00027138282632510326, | |
"loss": 2.8369, | |
"step": 551000 | |
}, | |
{ | |
"epoch": 14.84, | |
"learning_rate": 0.000269979775848993, | |
"loss": 2.8354, | |
"step": 552000 | |
}, | |
{ | |
"epoch": 14.87, | |
"learning_rate": 0.00026857532091795173, | |
"loss": 2.8305, | |
"step": 553000 | |
}, | |
{ | |
"epoch": 14.89, | |
"learning_rate": 0.0002671708659869105, | |
"loss": 2.8355, | |
"step": 554000 | |
}, | |
{ | |
"epoch": 14.92, | |
"learning_rate": 0.00026576641105586926, | |
"loss": 2.8353, | |
"step": 555000 | |
}, | |
{ | |
"epoch": 14.95, | |
"learning_rate": 0.000264363360579759, | |
"loss": 2.8427, | |
"step": 556000 | |
}, | |
{ | |
"epoch": 14.97, | |
"learning_rate": 0.00026295890564871773, | |
"loss": 2.8361, | |
"step": 557000 | |
}, | |
{ | |
"epoch": 15.0, | |
"learning_rate": 0.00026155445071767645, | |
"loss": 2.8395, | |
"step": 558000 | |
}, | |
{ | |
"epoch": 15.0, | |
"eval_accuracy": 0.41281084066040374, | |
"eval_loss": 3.3729231357574463, | |
"eval_runtime": 147.9664, | |
"eval_samples_per_second": 391.44, | |
"eval_steps_per_second": 6.116, | |
"step": 558015 | |
}, | |
{ | |
"epoch": 15.03, | |
"learning_rate": 0.00026015140024156626, | |
"loss": 2.7847, | |
"step": 559000 | |
}, | |
{ | |
"epoch": 15.05, | |
"learning_rate": 0.000258746945310525, | |
"loss": 2.7891, | |
"step": 560000 | |
}, | |
{ | |
"epoch": 15.08, | |
"learning_rate": 0.0002573424903794837, | |
"loss": 2.788, | |
"step": 561000 | |
}, | |
{ | |
"epoch": 15.11, | |
"learning_rate": 0.0002559394399033735, | |
"loss": 2.7885, | |
"step": 562000 | |
}, | |
{ | |
"epoch": 15.13, | |
"learning_rate": 0.00025453498497233227, | |
"loss": 2.7939, | |
"step": 563000 | |
}, | |
{ | |
"epoch": 15.16, | |
"learning_rate": 0.000253133338951153, | |
"loss": 2.7933, | |
"step": 564000 | |
}, | |
{ | |
"epoch": 15.19, | |
"learning_rate": 0.0002517288840201118, | |
"loss": 2.7946, | |
"step": 565000 | |
}, | |
{ | |
"epoch": 15.21, | |
"learning_rate": 0.00025032442908907055, | |
"loss": 2.7977, | |
"step": 566000 | |
}, | |
{ | |
"epoch": 15.24, | |
"learning_rate": 0.0002489213786129603, | |
"loss": 2.7946, | |
"step": 567000 | |
}, | |
{ | |
"epoch": 15.27, | |
"learning_rate": 0.00024751692368191907, | |
"loss": 2.7985, | |
"step": 568000 | |
}, | |
{ | |
"epoch": 15.3, | |
"learning_rate": 0.0002461124687508778, | |
"loss": 2.7984, | |
"step": 569000 | |
}, | |
{ | |
"epoch": 15.32, | |
"learning_rate": 0.00024470801381983655, | |
"loss": 2.7972, | |
"step": 570000 | |
}, | |
{ | |
"epoch": 15.35, | |
"learning_rate": 0.0002433049633437263, | |
"loss": 2.7978, | |
"step": 571000 | |
}, | |
{ | |
"epoch": 15.38, | |
"learning_rate": 0.00024190050841268505, | |
"loss": 2.8039, | |
"step": 572000 | |
}, | |
{ | |
"epoch": 15.4, | |
"learning_rate": 0.00024049605348164377, | |
"loss": 2.8002, | |
"step": 573000 | |
}, | |
{ | |
"epoch": 15.43, | |
"learning_rate": 0.00023909300300553355, | |
"loss": 2.8051, | |
"step": 574000 | |
}, | |
{ | |
"epoch": 15.46, | |
"learning_rate": 0.00023768854807449232, | |
"loss": 2.8069, | |
"step": 575000 | |
}, | |
{ | |
"epoch": 15.48, | |
"learning_rate": 0.00023628409314345103, | |
"loss": 2.8039, | |
"step": 576000 | |
}, | |
{ | |
"epoch": 15.51, | |
"learning_rate": 0.00023488104266734081, | |
"loss": 2.8068, | |
"step": 577000 | |
}, | |
{ | |
"epoch": 15.54, | |
"learning_rate": 0.00023347658773629953, | |
"loss": 2.8093, | |
"step": 578000 | |
}, | |
{ | |
"epoch": 15.56, | |
"learning_rate": 0.0002320735372601893, | |
"loss": 2.8067, | |
"step": 579000 | |
}, | |
{ | |
"epoch": 15.59, | |
"learning_rate": 0.00023066908232914808, | |
"loss": 2.8073, | |
"step": 580000 | |
}, | |
{ | |
"epoch": 15.62, | |
"learning_rate": 0.0002292646273981068, | |
"loss": 2.8129, | |
"step": 581000 | |
}, | |
{ | |
"epoch": 15.64, | |
"learning_rate": 0.00022786017246706554, | |
"loss": 2.8102, | |
"step": 582000 | |
}, | |
{ | |
"epoch": 15.67, | |
"learning_rate": 0.00022645712199095532, | |
"loss": 2.812, | |
"step": 583000 | |
}, | |
{ | |
"epoch": 15.7, | |
"learning_rate": 0.00022505266705991406, | |
"loss": 2.8093, | |
"step": 584000 | |
}, | |
{ | |
"epoch": 15.73, | |
"learning_rate": 0.00022364961658380384, | |
"loss": 2.8139, | |
"step": 585000 | |
}, | |
{ | |
"epoch": 15.75, | |
"learning_rate": 0.00022224516165276256, | |
"loss": 2.8115, | |
"step": 586000 | |
}, | |
{ | |
"epoch": 15.78, | |
"learning_rate": 0.0002208407067217213, | |
"loss": 2.8157, | |
"step": 587000 | |
}, | |
{ | |
"epoch": 15.81, | |
"learning_rate": 0.00021943765624561108, | |
"loss": 2.8138, | |
"step": 588000 | |
}, | |
{ | |
"epoch": 15.83, | |
"learning_rate": 0.00021803320131456982, | |
"loss": 2.8146, | |
"step": 589000 | |
}, | |
{ | |
"epoch": 15.86, | |
"learning_rate": 0.0002166301508384596, | |
"loss": 2.8138, | |
"step": 590000 | |
}, | |
{ | |
"epoch": 15.89, | |
"learning_rate": 0.00021522569590741835, | |
"loss": 2.8195, | |
"step": 591000 | |
}, | |
{ | |
"epoch": 15.91, | |
"learning_rate": 0.0002138226454313081, | |
"loss": 2.8192, | |
"step": 592000 | |
}, | |
{ | |
"epoch": 15.94, | |
"learning_rate": 0.00021241819050026685, | |
"loss": 2.8169, | |
"step": 593000 | |
}, | |
{ | |
"epoch": 15.97, | |
"learning_rate": 0.00021101373556922559, | |
"loss": 2.8174, | |
"step": 594000 | |
}, | |
{ | |
"epoch": 15.99, | |
"learning_rate": 0.00020961068509311537, | |
"loss": 2.8151, | |
"step": 595000 | |
}, | |
{ | |
"epoch": 16.0, | |
"eval_accuracy": 0.4140438576219447, | |
"eval_loss": 3.3717539310455322, | |
"eval_runtime": 148.2583, | |
"eval_samples_per_second": 390.669, | |
"eval_steps_per_second": 6.104, | |
"step": 595216 | |
}, | |
{ | |
"epoch": 16.02, | |
"learning_rate": 0.0002082062301620741, | |
"loss": 2.7799, | |
"step": 596000 | |
}, | |
{ | |
"epoch": 16.05, | |
"learning_rate": 0.00020680177523103282, | |
"loss": 2.7671, | |
"step": 597000 | |
}, | |
{ | |
"epoch": 16.07, | |
"learning_rate": 0.0002053987247549226, | |
"loss": 2.772, | |
"step": 598000 | |
}, | |
{ | |
"epoch": 16.1, | |
"learning_rate": 0.00020399426982388135, | |
"loss": 2.7732, | |
"step": 599000 | |
}, | |
{ | |
"epoch": 16.13, | |
"learning_rate": 0.00020259121934777113, | |
"loss": 2.7791, | |
"step": 600000 | |
}, | |
{ | |
"epoch": 16.16, | |
"learning_rate": 0.00020118676441672987, | |
"loss": 2.7742, | |
"step": 601000 | |
}, | |
{ | |
"epoch": 16.18, | |
"learning_rate": 0.0001997823094856886, | |
"loss": 2.7786, | |
"step": 602000 | |
}, | |
{ | |
"epoch": 16.21, | |
"learning_rate": 0.0001983792590095784, | |
"loss": 2.7834, | |
"step": 603000 | |
}, | |
{ | |
"epoch": 16.24, | |
"learning_rate": 0.00019697620853346818, | |
"loss": 2.7824, | |
"step": 604000 | |
}, | |
{ | |
"epoch": 16.26, | |
"learning_rate": 0.0001955717536024269, | |
"loss": 2.7857, | |
"step": 605000 | |
}, | |
{ | |
"epoch": 16.29, | |
"learning_rate": 0.00019416729867138564, | |
"loss": 2.7824, | |
"step": 606000 | |
}, | |
{ | |
"epoch": 16.32, | |
"learning_rate": 0.00019276284374034438, | |
"loss": 2.7849, | |
"step": 607000 | |
}, | |
{ | |
"epoch": 16.34, | |
"learning_rate": 0.00019135838880930312, | |
"loss": 2.7853, | |
"step": 608000 | |
}, | |
{ | |
"epoch": 16.37, | |
"learning_rate": 0.0001899553383331929, | |
"loss": 2.7886, | |
"step": 609000 | |
}, | |
{ | |
"epoch": 16.4, | |
"learning_rate": 0.00018855088340215162, | |
"loss": 2.7843, | |
"step": 610000 | |
}, | |
{ | |
"epoch": 16.42, | |
"learning_rate": 0.0001871478329260414, | |
"loss": 2.7929, | |
"step": 611000 | |
}, | |
{ | |
"epoch": 16.45, | |
"learning_rate": 0.00018574337799500017, | |
"loss": 2.7879, | |
"step": 612000 | |
}, | |
{ | |
"epoch": 16.48, | |
"learning_rate": 0.00018434032751888993, | |
"loss": 2.7893, | |
"step": 613000 | |
}, | |
{ | |
"epoch": 16.5, | |
"learning_rate": 0.0001829372770427797, | |
"loss": 2.791, | |
"step": 614000 | |
}, | |
{ | |
"epoch": 16.53, | |
"learning_rate": 0.00018153282211173845, | |
"loss": 2.7879, | |
"step": 615000 | |
}, | |
{ | |
"epoch": 16.56, | |
"learning_rate": 0.0001801297716356282, | |
"loss": 2.7904, | |
"step": 616000 | |
}, | |
{ | |
"epoch": 16.59, | |
"learning_rate": 0.00017872531670458695, | |
"loss": 2.7892, | |
"step": 617000 | |
}, | |
{ | |
"epoch": 16.61, | |
"learning_rate": 0.0001773208617735457, | |
"loss": 2.7929, | |
"step": 618000 | |
}, | |
{ | |
"epoch": 16.64, | |
"learning_rate": 0.00017591640684250443, | |
"loss": 2.7952, | |
"step": 619000 | |
}, | |
{ | |
"epoch": 16.67, | |
"learning_rate": 0.00017451195191146317, | |
"loss": 2.7835, | |
"step": 620000 | |
}, | |
{ | |
"epoch": 16.69, | |
"learning_rate": 0.00017311030589028397, | |
"loss": 2.793, | |
"step": 621000 | |
}, | |
{ | |
"epoch": 16.72, | |
"learning_rate": 0.0001717058509592427, | |
"loss": 2.7948, | |
"step": 622000 | |
}, | |
{ | |
"epoch": 16.75, | |
"learning_rate": 0.00017030139602820148, | |
"loss": 2.7917, | |
"step": 623000 | |
}, | |
{ | |
"epoch": 16.77, | |
"learning_rate": 0.0001688969410971602, | |
"loss": 2.7938, | |
"step": 624000 | |
}, | |
{ | |
"epoch": 16.8, | |
"learning_rate": 0.00016749248616611893, | |
"loss": 2.7978, | |
"step": 625000 | |
}, | |
{ | |
"epoch": 16.83, | |
"learning_rate": 0.0001660894356900087, | |
"loss": 2.7945, | |
"step": 626000 | |
}, | |
{ | |
"epoch": 16.85, | |
"learning_rate": 0.00016468498075896746, | |
"loss": 2.7943, | |
"step": 627000 | |
}, | |
{ | |
"epoch": 16.88, | |
"learning_rate": 0.00016328193028285724, | |
"loss": 2.7918, | |
"step": 628000 | |
}, | |
{ | |
"epoch": 16.91, | |
"learning_rate": 0.00016187747535181596, | |
"loss": 2.7988, | |
"step": 629000 | |
}, | |
{ | |
"epoch": 16.94, | |
"learning_rate": 0.00016047442487570574, | |
"loss": 2.7968, | |
"step": 630000 | |
}, | |
{ | |
"epoch": 16.96, | |
"learning_rate": 0.00015906996994466445, | |
"loss": 2.7929, | |
"step": 631000 | |
}, | |
{ | |
"epoch": 16.99, | |
"learning_rate": 0.00015766551501362322, | |
"loss": 2.798, | |
"step": 632000 | |
}, | |
{ | |
"epoch": 17.0, | |
"eval_accuracy": 0.41277355590429304, | |
"eval_loss": 3.385791063308716, | |
"eval_runtime": 147.8055, | |
"eval_samples_per_second": 391.866, | |
"eval_steps_per_second": 6.123, | |
"step": 632417 | |
}, | |
{ | |
"epoch": 17.02, | |
"learning_rate": 0.000156262464537513, | |
"loss": 2.7792, | |
"step": 633000 | |
}, | |
{ | |
"epoch": 17.04, | |
"learning_rate": 0.00015485800960647172, | |
"loss": 2.7608, | |
"step": 634000 | |
}, | |
{ | |
"epoch": 17.07, | |
"learning_rate": 0.0001534549591303615, | |
"loss": 2.7645, | |
"step": 635000 | |
}, | |
{ | |
"epoch": 17.1, | |
"learning_rate": 0.00015205050419932024, | |
"loss": 2.7656, | |
"step": 636000 | |
}, | |
{ | |
"epoch": 17.12, | |
"learning_rate": 0.00015064604926827898, | |
"loss": 2.7604, | |
"step": 637000 | |
}, | |
{ | |
"epoch": 17.15, | |
"learning_rate": 0.00014924299879216877, | |
"loss": 2.7683, | |
"step": 638000 | |
}, | |
{ | |
"epoch": 17.18, | |
"learning_rate": 0.00014783854386112748, | |
"loss": 2.7652, | |
"step": 639000 | |
}, | |
{ | |
"epoch": 17.2, | |
"learning_rate": 0.00014643408893008622, | |
"loss": 2.7589, | |
"step": 640000 | |
}, | |
{ | |
"epoch": 17.23, | |
"learning_rate": 0.000145029633999045, | |
"loss": 2.7709, | |
"step": 641000 | |
}, | |
{ | |
"epoch": 17.26, | |
"learning_rate": 0.00014362658352293475, | |
"loss": 2.7638, | |
"step": 642000 | |
}, | |
{ | |
"epoch": 17.28, | |
"learning_rate": 0.00014222353304682453, | |
"loss": 2.7667, | |
"step": 643000 | |
}, | |
{ | |
"epoch": 17.31, | |
"learning_rate": 0.00014081907811578327, | |
"loss": 2.7637, | |
"step": 644000 | |
}, | |
{ | |
"epoch": 17.34, | |
"learning_rate": 0.00013941462318474201, | |
"loss": 2.7752, | |
"step": 645000 | |
}, | |
{ | |
"epoch": 17.37, | |
"learning_rate": 0.0001380115727086318, | |
"loss": 2.7716, | |
"step": 646000 | |
}, | |
{ | |
"epoch": 17.39, | |
"learning_rate": 0.0001366071177775905, | |
"loss": 2.7665, | |
"step": 647000 | |
}, | |
{ | |
"epoch": 17.42, | |
"learning_rate": 0.00013520266284654925, | |
"loss": 2.7669, | |
"step": 648000 | |
}, | |
{ | |
"epoch": 17.45, | |
"learning_rate": 0.00013379820791550802, | |
"loss": 2.7743, | |
"step": 649000 | |
}, | |
{ | |
"epoch": 17.47, | |
"learning_rate": 0.00013239375298446673, | |
"loss": 2.7733, | |
"step": 650000 | |
}, | |
{ | |
"epoch": 17.5, | |
"learning_rate": 0.00013099070250835652, | |
"loss": 2.7713, | |
"step": 651000 | |
}, | |
{ | |
"epoch": 17.53, | |
"learning_rate": 0.0001295876520322463, | |
"loss": 2.7694, | |
"step": 652000 | |
}, | |
{ | |
"epoch": 17.55, | |
"learning_rate": 0.00012818319710120502, | |
"loss": 2.767, | |
"step": 653000 | |
}, | |
{ | |
"epoch": 17.58, | |
"learning_rate": 0.00012677874217016378, | |
"loss": 2.7736, | |
"step": 654000 | |
}, | |
{ | |
"epoch": 17.61, | |
"learning_rate": 0.0001253742872391225, | |
"loss": 2.7743, | |
"step": 655000 | |
}, | |
{ | |
"epoch": 17.63, | |
"learning_rate": 0.00012396983230808124, | |
"loss": 2.7739, | |
"step": 656000 | |
}, | |
{ | |
"epoch": 17.66, | |
"learning_rate": 0.000122566781831971, | |
"loss": 2.7722, | |
"step": 657000 | |
}, | |
{ | |
"epoch": 17.69, | |
"learning_rate": 0.00012116232690092975, | |
"loss": 2.776, | |
"step": 658000 | |
}, | |
{ | |
"epoch": 17.71, | |
"learning_rate": 0.00011975787196988849, | |
"loss": 2.7807, | |
"step": 659000 | |
}, | |
{ | |
"epoch": 17.74, | |
"learning_rate": 0.00011835482149377827, | |
"loss": 2.7719, | |
"step": 660000 | |
}, | |
{ | |
"epoch": 17.77, | |
"learning_rate": 0.000116950366562737, | |
"loss": 2.7747, | |
"step": 661000 | |
}, | |
{ | |
"epoch": 17.8, | |
"learning_rate": 0.00011554731608662679, | |
"loss": 2.7782, | |
"step": 662000 | |
}, | |
{ | |
"epoch": 17.82, | |
"learning_rate": 0.00011414286115558551, | |
"loss": 2.7738, | |
"step": 663000 | |
}, | |
{ | |
"epoch": 17.85, | |
"learning_rate": 0.0001127398106794753, | |
"loss": 2.7756, | |
"step": 664000 | |
}, | |
{ | |
"epoch": 17.88, | |
"learning_rate": 0.00011133535574843404, | |
"loss": 2.7715, | |
"step": 665000 | |
}, | |
{ | |
"epoch": 17.9, | |
"learning_rate": 0.00010993090081739278, | |
"loss": 2.7809, | |
"step": 666000 | |
}, | |
{ | |
"epoch": 17.93, | |
"learning_rate": 0.00010852785034128255, | |
"loss": 2.7813, | |
"step": 667000 | |
}, | |
{ | |
"epoch": 17.96, | |
"learning_rate": 0.00010712339541024129, | |
"loss": 2.7748, | |
"step": 668000 | |
}, | |
{ | |
"epoch": 17.98, | |
"learning_rate": 0.00010571894047920003, | |
"loss": 2.7738, | |
"step": 669000 | |
}, | |
{ | |
"epoch": 18.0, | |
"eval_accuracy": 0.41297422178988324, | |
"eval_loss": 3.407961368560791, | |
"eval_runtime": 148.1896, | |
"eval_samples_per_second": 390.851, | |
"eval_steps_per_second": 6.107, | |
"step": 669618 | |
}, | |
{ | |
"epoch": 18.01, | |
"learning_rate": 0.0001043158900030898, | |
"loss": 2.7651, | |
"step": 670000 | |
}, | |
{ | |
"epoch": 18.04, | |
"learning_rate": 0.00010291143507204854, | |
"loss": 2.7487, | |
"step": 671000 | |
}, | |
{ | |
"epoch": 18.06, | |
"learning_rate": 0.00010150838459593833, | |
"loss": 2.7489, | |
"step": 672000 | |
}, | |
{ | |
"epoch": 18.09, | |
"learning_rate": 0.00010010392966489705, | |
"loss": 2.7467, | |
"step": 673000 | |
}, | |
{ | |
"epoch": 18.12, | |
"learning_rate": 9.870087918878684e-05, | |
"loss": 2.7491, | |
"step": 674000 | |
}, | |
{ | |
"epoch": 18.14, | |
"learning_rate": 9.729642425774556e-05, | |
"loss": 2.7511, | |
"step": 675000 | |
}, | |
{ | |
"epoch": 18.17, | |
"learning_rate": 9.58919693267043e-05, | |
"loss": 2.745, | |
"step": 676000 | |
}, | |
{ | |
"epoch": 18.2, | |
"learning_rate": 9.448891885059409e-05, | |
"loss": 2.7478, | |
"step": 677000 | |
}, | |
{ | |
"epoch": 18.23, | |
"learning_rate": 9.308446391955282e-05, | |
"loss": 2.755, | |
"step": 678000 | |
}, | |
{ | |
"epoch": 18.25, | |
"learning_rate": 9.168000898851156e-05, | |
"loss": 2.7538, | |
"step": 679000 | |
}, | |
{ | |
"epoch": 18.28, | |
"learning_rate": 9.027695851240134e-05, | |
"loss": 2.755, | |
"step": 680000 | |
}, | |
{ | |
"epoch": 18.31, | |
"learning_rate": 8.887250358136008e-05, | |
"loss": 2.7577, | |
"step": 681000 | |
}, | |
{ | |
"epoch": 18.33, | |
"learning_rate": 8.746945310524985e-05, | |
"loss": 2.7505, | |
"step": 682000 | |
}, | |
{ | |
"epoch": 18.36, | |
"learning_rate": 8.60649981742086e-05, | |
"loss": 2.7591, | |
"step": 683000 | |
}, | |
{ | |
"epoch": 18.39, | |
"learning_rate": 8.466194769809838e-05, | |
"loss": 2.7601, | |
"step": 684000 | |
}, | |
{ | |
"epoch": 18.41, | |
"learning_rate": 8.32574927670571e-05, | |
"loss": 2.7567, | |
"step": 685000 | |
}, | |
{ | |
"epoch": 18.44, | |
"learning_rate": 8.185444229094687e-05, | |
"loss": 2.7547, | |
"step": 686000 | |
}, | |
{ | |
"epoch": 18.47, | |
"learning_rate": 8.044998735990562e-05, | |
"loss": 2.7584, | |
"step": 687000 | |
}, | |
{ | |
"epoch": 18.49, | |
"learning_rate": 7.904553242886437e-05, | |
"loss": 2.7554, | |
"step": 688000 | |
}, | |
{ | |
"epoch": 18.52, | |
"learning_rate": 7.764388640768517e-05, | |
"loss": 2.756, | |
"step": 689000 | |
}, | |
{ | |
"epoch": 18.55, | |
"learning_rate": 7.623943147664391e-05, | |
"loss": 2.7581, | |
"step": 690000 | |
}, | |
{ | |
"epoch": 18.57, | |
"learning_rate": 7.483497654560266e-05, | |
"loss": 2.7593, | |
"step": 691000 | |
}, | |
{ | |
"epoch": 18.6, | |
"learning_rate": 7.343192606949243e-05, | |
"loss": 2.7549, | |
"step": 692000 | |
}, | |
{ | |
"epoch": 18.63, | |
"learning_rate": 7.202747113845116e-05, | |
"loss": 2.761, | |
"step": 693000 | |
}, | |
{ | |
"epoch": 18.66, | |
"learning_rate": 7.06230162074099e-05, | |
"loss": 2.7556, | |
"step": 694000 | |
}, | |
{ | |
"epoch": 18.68, | |
"learning_rate": 6.921856127636864e-05, | |
"loss": 2.7513, | |
"step": 695000 | |
}, | |
{ | |
"epoch": 18.71, | |
"learning_rate": 6.781410634532737e-05, | |
"loss": 2.7577, | |
"step": 696000 | |
}, | |
{ | |
"epoch": 18.74, | |
"learning_rate": 6.641105586921716e-05, | |
"loss": 2.759, | |
"step": 697000 | |
}, | |
{ | |
"epoch": 18.76, | |
"learning_rate": 6.50066009381759e-05, | |
"loss": 2.7603, | |
"step": 698000 | |
}, | |
{ | |
"epoch": 18.79, | |
"learning_rate": 6.360355046206568e-05, | |
"loss": 2.7598, | |
"step": 699000 | |
}, | |
{ | |
"epoch": 18.82, | |
"learning_rate": 6.219909553102441e-05, | |
"loss": 2.7545, | |
"step": 700000 | |
}, | |
{ | |
"epoch": 18.84, | |
"learning_rate": 6.079464059998314e-05, | |
"loss": 2.7603, | |
"step": 701000 | |
}, | |
{ | |
"epoch": 18.87, | |
"learning_rate": 5.939018566894189e-05, | |
"loss": 2.7558, | |
"step": 702000 | |
}, | |
{ | |
"epoch": 18.9, | |
"learning_rate": 5.798713519283167e-05, | |
"loss": 2.7559, | |
"step": 703000 | |
}, | |
{ | |
"epoch": 18.92, | |
"learning_rate": 5.65826802617904e-05, | |
"loss": 2.7584, | |
"step": 704000 | |
}, | |
{ | |
"epoch": 18.95, | |
"learning_rate": 5.5178225330749135e-05, | |
"loss": 2.7614, | |
"step": 705000 | |
}, | |
{ | |
"epoch": 18.98, | |
"learning_rate": 5.377517485463892e-05, | |
"loss": 2.7555, | |
"step": 706000 | |
}, | |
{ | |
"epoch": 19.0, | |
"eval_accuracy": 0.41307418524410433, | |
"eval_loss": 3.4066617488861084, | |
"eval_runtime": 148.4411, | |
"eval_samples_per_second": 390.189, | |
"eval_steps_per_second": 6.097, | |
"step": 706819 | |
}, | |
{ | |
"epoch": 19.0, | |
"learning_rate": 5.237071992359765e-05, | |
"loss": 2.7536, | |
"step": 707000 | |
}, | |
{ | |
"epoch": 19.03, | |
"learning_rate": 5.096766944748743e-05, | |
"loss": 2.7341, | |
"step": 708000 | |
}, | |
{ | |
"epoch": 19.06, | |
"learning_rate": 4.956461897137721e-05, | |
"loss": 2.7413, | |
"step": 709000 | |
}, | |
{ | |
"epoch": 19.09, | |
"learning_rate": 4.816016404033595e-05, | |
"loss": 2.7406, | |
"step": 710000 | |
}, | |
{ | |
"epoch": 19.11, | |
"learning_rate": 4.675570910929468e-05, | |
"loss": 2.7417, | |
"step": 711000 | |
}, | |
{ | |
"epoch": 19.14, | |
"learning_rate": 4.535265863318446e-05, | |
"loss": 2.7401, | |
"step": 712000 | |
}, | |
{ | |
"epoch": 19.17, | |
"learning_rate": 4.39482037021432e-05, | |
"loss": 2.741, | |
"step": 713000 | |
}, | |
{ | |
"epoch": 19.19, | |
"learning_rate": 4.254374877110194e-05, | |
"loss": 2.7379, | |
"step": 714000 | |
}, | |
{ | |
"epoch": 19.22, | |
"learning_rate": 4.1139293840060675e-05, | |
"loss": 2.7369, | |
"step": 715000 | |
}, | |
{ | |
"epoch": 19.25, | |
"learning_rate": 3.973483890901941e-05, | |
"loss": 2.7397, | |
"step": 716000 | |
}, | |
{ | |
"epoch": 19.27, | |
"learning_rate": 3.8330383977978144e-05, | |
"loss": 2.7431, | |
"step": 717000 | |
}, | |
{ | |
"epoch": 19.3, | |
"learning_rate": 3.692873795679897e-05, | |
"loss": 2.7404, | |
"step": 718000 | |
}, | |
{ | |
"epoch": 19.33, | |
"learning_rate": 3.5524283025757704e-05, | |
"loss": 2.7413, | |
"step": 719000 | |
}, | |
{ | |
"epoch": 19.35, | |
"learning_rate": 3.411982809471644e-05, | |
"loss": 2.7379, | |
"step": 720000 | |
}, | |
{ | |
"epoch": 19.38, | |
"learning_rate": 3.271537316367518e-05, | |
"loss": 2.7436, | |
"step": 721000 | |
}, | |
{ | |
"epoch": 19.41, | |
"learning_rate": 3.1312322687564956e-05, | |
"loss": 2.7363, | |
"step": 722000 | |
}, | |
{ | |
"epoch": 19.43, | |
"learning_rate": 2.9907867756523694e-05, | |
"loss": 2.7387, | |
"step": 723000 | |
}, | |
{ | |
"epoch": 19.46, | |
"learning_rate": 2.850481728041347e-05, | |
"loss": 2.7431, | |
"step": 724000 | |
}, | |
{ | |
"epoch": 19.49, | |
"learning_rate": 2.710036234937221e-05, | |
"loss": 2.7458, | |
"step": 725000 | |
}, | |
{ | |
"epoch": 19.52, | |
"learning_rate": 2.569731187326199e-05, | |
"loss": 2.7419, | |
"step": 726000 | |
}, | |
{ | |
"epoch": 19.54, | |
"learning_rate": 2.4292856942220723e-05, | |
"loss": 2.7415, | |
"step": 727000 | |
}, | |
{ | |
"epoch": 19.57, | |
"learning_rate": 2.2888402011179464e-05, | |
"loss": 2.7392, | |
"step": 728000 | |
}, | |
{ | |
"epoch": 19.6, | |
"learning_rate": 2.1485351535069238e-05, | |
"loss": 2.7467, | |
"step": 729000 | |
}, | |
{ | |
"epoch": 19.62, | |
"learning_rate": 2.008089660402798e-05, | |
"loss": 2.7393, | |
"step": 730000 | |
}, | |
{ | |
"epoch": 19.65, | |
"learning_rate": 1.8677846127917755e-05, | |
"loss": 2.7378, | |
"step": 731000 | |
}, | |
{ | |
"epoch": 19.68, | |
"learning_rate": 1.7273391196876493e-05, | |
"loss": 2.747, | |
"step": 732000 | |
}, | |
{ | |
"epoch": 19.7, | |
"learning_rate": 1.587034072076627e-05, | |
"loss": 2.7419, | |
"step": 733000 | |
}, | |
{ | |
"epoch": 19.73, | |
"learning_rate": 1.4465885789725008e-05, | |
"loss": 2.7466, | |
"step": 734000 | |
}, | |
{ | |
"epoch": 19.76, | |
"learning_rate": 1.3062835313614786e-05, | |
"loss": 2.7354, | |
"step": 735000 | |
}, | |
{ | |
"epoch": 19.78, | |
"learning_rate": 1.1658380382573524e-05, | |
"loss": 2.7457, | |
"step": 736000 | |
}, | |
{ | |
"epoch": 19.81, | |
"learning_rate": 1.025392545153226e-05, | |
"loss": 2.7398, | |
"step": 737000 | |
}, | |
{ | |
"epoch": 19.84, | |
"learning_rate": 8.850874975422038e-06, | |
"loss": 2.7415, | |
"step": 738000 | |
}, | |
{ | |
"epoch": 19.87, | |
"learning_rate": 7.446420044380776e-06, | |
"loss": 2.7381, | |
"step": 739000 | |
}, | |
{ | |
"epoch": 19.89, | |
"learning_rate": 6.043369568270554e-06, | |
"loss": 2.7407, | |
"step": 740000 | |
}, | |
{ | |
"epoch": 19.92, | |
"learning_rate": 4.638914637229291e-06, | |
"loss": 2.7397, | |
"step": 741000 | |
}, | |
{ | |
"epoch": 19.95, | |
"learning_rate": 3.2344597061880285e-06, | |
"loss": 2.7412, | |
"step": 742000 | |
}, | |
{ | |
"epoch": 19.97, | |
"learning_rate": 1.831409230077807e-06, | |
"loss": 2.7409, | |
"step": 743000 | |
}, | |
{ | |
"epoch": 20.0, | |
"learning_rate": 4.2695429903654394e-07, | |
"loss": 2.7434, | |
"step": 744000 | |
}, | |
{ | |
"epoch": 20.0, | |
"eval_accuracy": 0.41252109443859236, | |
"eval_loss": 3.417576313018799, | |
"eval_runtime": 148.878, | |
"eval_samples_per_second": 389.043, | |
"eval_steps_per_second": 6.079, | |
"step": 744020 | |
}, | |
{ | |
"epoch": 20.0, | |
"step": 744020, | |
"total_flos": 1.56740238729216e+18, | |
"train_loss": 2.994195082282441, | |
"train_runtime": 55239.7487, | |
"train_samples_per_second": 215.503, | |
"train_steps_per_second": 13.469 | |
} | |
], | |
"logging_steps": 1000, | |
"max_steps": 744020, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 20, | |
"save_steps": 5000, | |
"total_flos": 1.56740238729216e+18, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |