Mistral-Chem-v1-15M / trainer_state.json
RaphaelMourad's picture
Upload 9 files
d94d2d0 verified
raw
history blame contribute delete
No virus
79.2 kB
{
"best_metric": 1.1968414783477783,
"best_model_checkpoint": "./results/models/checkpoint-307584",
"epoch": 16.0,
"eval_steps": 500,
"global_step": 307584,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.0019989596337910945,
"loss": 2.0581,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 0.001997919267582189,
"loss": 1.5829,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 0.0019968789013732834,
"loss": 1.5037,
"step": 1500
},
{
"epoch": 0.1,
"learning_rate": 0.001995838535164378,
"loss": 1.4638,
"step": 2000
},
{
"epoch": 0.13,
"learning_rate": 0.0019947981689554723,
"loss": 1.4347,
"step": 2500
},
{
"epoch": 0.16,
"learning_rate": 0.001993757802746567,
"loss": 1.4135,
"step": 3000
},
{
"epoch": 0.18,
"learning_rate": 0.0019927174365376612,
"loss": 1.3959,
"step": 3500
},
{
"epoch": 0.21,
"learning_rate": 0.0019916770703287557,
"loss": 1.3816,
"step": 4000
},
{
"epoch": 0.23,
"learning_rate": 0.00199063670411985,
"loss": 1.3715,
"step": 4500
},
{
"epoch": 0.26,
"learning_rate": 0.0019895963379109446,
"loss": 1.3634,
"step": 5000
},
{
"epoch": 0.29,
"learning_rate": 0.001988555971702039,
"loss": 1.354,
"step": 5500
},
{
"epoch": 0.31,
"learning_rate": 0.0019875156054931335,
"loss": 1.3475,
"step": 6000
},
{
"epoch": 0.34,
"learning_rate": 0.001986475239284228,
"loss": 1.3392,
"step": 6500
},
{
"epoch": 0.36,
"learning_rate": 0.0019854348730753224,
"loss": 1.3336,
"step": 7000
},
{
"epoch": 0.39,
"learning_rate": 0.001984394506866417,
"loss": 1.3283,
"step": 7500
},
{
"epoch": 0.42,
"learning_rate": 0.0019833541406575114,
"loss": 1.3234,
"step": 8000
},
{
"epoch": 0.44,
"learning_rate": 0.001982313774448606,
"loss": 1.321,
"step": 8500
},
{
"epoch": 0.47,
"learning_rate": 0.0019812734082397003,
"loss": 1.3154,
"step": 9000
},
{
"epoch": 0.49,
"learning_rate": 0.0019802330420307947,
"loss": 1.3126,
"step": 9500
},
{
"epoch": 0.52,
"learning_rate": 0.0019791926758218896,
"loss": 1.3087,
"step": 10000
},
{
"epoch": 0.55,
"learning_rate": 0.001978152309612984,
"loss": 1.3058,
"step": 10500
},
{
"epoch": 0.57,
"learning_rate": 0.001977111943404078,
"loss": 1.3022,
"step": 11000
},
{
"epoch": 0.6,
"learning_rate": 0.0019760715771951726,
"loss": 1.3002,
"step": 11500
},
{
"epoch": 0.62,
"learning_rate": 0.001975031210986267,
"loss": 1.2965,
"step": 12000
},
{
"epoch": 0.65,
"learning_rate": 0.0019739908447773615,
"loss": 1.2948,
"step": 12500
},
{
"epoch": 0.68,
"learning_rate": 0.0019729504785684564,
"loss": 1.2932,
"step": 13000
},
{
"epoch": 0.7,
"learning_rate": 0.001971910112359551,
"loss": 1.289,
"step": 13500
},
{
"epoch": 0.73,
"learning_rate": 0.0019708697461506453,
"loss": 1.2894,
"step": 14000
},
{
"epoch": 0.75,
"learning_rate": 0.0019698293799417393,
"loss": 1.2862,
"step": 14500
},
{
"epoch": 0.78,
"learning_rate": 0.0019687890137328337,
"loss": 1.2827,
"step": 15000
},
{
"epoch": 0.81,
"learning_rate": 0.0019677486475239286,
"loss": 1.2818,
"step": 15500
},
{
"epoch": 0.83,
"learning_rate": 0.001966708281315023,
"loss": 1.2813,
"step": 16000
},
{
"epoch": 0.86,
"learning_rate": 0.0019656679151061176,
"loss": 1.279,
"step": 16500
},
{
"epoch": 0.88,
"learning_rate": 0.001964627548897212,
"loss": 1.276,
"step": 17000
},
{
"epoch": 0.91,
"learning_rate": 0.0019635871826883065,
"loss": 1.2761,
"step": 17500
},
{
"epoch": 0.94,
"learning_rate": 0.0019625468164794005,
"loss": 1.2737,
"step": 18000
},
{
"epoch": 0.96,
"learning_rate": 0.0019615064502704954,
"loss": 1.2729,
"step": 18500
},
{
"epoch": 0.99,
"learning_rate": 0.00196046608406159,
"loss": 1.2708,
"step": 19000
},
{
"epoch": 1.0,
"eval_loss": 1.28058660030365,
"eval_runtime": 0.6137,
"eval_samples_per_second": 1629.563,
"eval_steps_per_second": 3.259,
"step": 19224
},
{
"epoch": 1.01,
"learning_rate": 0.0019594257178526843,
"loss": 1.2682,
"step": 19500
},
{
"epoch": 1.04,
"learning_rate": 0.0019583853516437788,
"loss": 1.2641,
"step": 20000
},
{
"epoch": 1.07,
"learning_rate": 0.001957344985434873,
"loss": 1.2635,
"step": 20500
},
{
"epoch": 1.09,
"learning_rate": 0.0019563046192259677,
"loss": 1.265,
"step": 21000
},
{
"epoch": 1.12,
"learning_rate": 0.001955264253017062,
"loss": 1.2636,
"step": 21500
},
{
"epoch": 1.14,
"learning_rate": 0.0019542238868081566,
"loss": 1.2617,
"step": 22000
},
{
"epoch": 1.17,
"learning_rate": 0.001953183520599251,
"loss": 1.2605,
"step": 22500
},
{
"epoch": 1.2,
"learning_rate": 0.0019521431543903455,
"loss": 1.2585,
"step": 23000
},
{
"epoch": 1.22,
"learning_rate": 0.0019511027881814397,
"loss": 1.2583,
"step": 23500
},
{
"epoch": 1.25,
"learning_rate": 0.0019500624219725344,
"loss": 1.2565,
"step": 24000
},
{
"epoch": 1.27,
"learning_rate": 0.0019490220557636289,
"loss": 1.2558,
"step": 24500
},
{
"epoch": 1.3,
"learning_rate": 0.0019479816895547233,
"loss": 1.2523,
"step": 25000
},
{
"epoch": 1.33,
"learning_rate": 0.0019469413233458178,
"loss": 1.2553,
"step": 25500
},
{
"epoch": 1.35,
"learning_rate": 0.0019459009571369122,
"loss": 1.2539,
"step": 26000
},
{
"epoch": 1.38,
"learning_rate": 0.0019448605909280067,
"loss": 1.2521,
"step": 26500
},
{
"epoch": 1.4,
"learning_rate": 0.0019438202247191011,
"loss": 1.2511,
"step": 27000
},
{
"epoch": 1.43,
"learning_rate": 0.0019427798585101956,
"loss": 1.2517,
"step": 27500
},
{
"epoch": 1.46,
"learning_rate": 0.00194173949230129,
"loss": 1.2497,
"step": 28000
},
{
"epoch": 1.48,
"learning_rate": 0.0019406991260923845,
"loss": 1.2495,
"step": 28500
},
{
"epoch": 1.51,
"learning_rate": 0.001939658759883479,
"loss": 1.2488,
"step": 29000
},
{
"epoch": 1.53,
"learning_rate": 0.0019386183936745737,
"loss": 1.248,
"step": 29500
},
{
"epoch": 1.56,
"learning_rate": 0.001937578027465668,
"loss": 1.2471,
"step": 30000
},
{
"epoch": 1.59,
"learning_rate": 0.0019365376612567623,
"loss": 1.248,
"step": 30500
},
{
"epoch": 1.61,
"learning_rate": 0.0019354972950478568,
"loss": 1.2446,
"step": 31000
},
{
"epoch": 1.64,
"learning_rate": 0.0019344569288389513,
"loss": 1.2447,
"step": 31500
},
{
"epoch": 1.66,
"learning_rate": 0.0019334165626300457,
"loss": 1.2439,
"step": 32000
},
{
"epoch": 1.69,
"learning_rate": 0.0019323761964211404,
"loss": 1.2442,
"step": 32500
},
{
"epoch": 1.72,
"learning_rate": 0.0019313358302122348,
"loss": 1.2444,
"step": 33000
},
{
"epoch": 1.74,
"learning_rate": 0.0019302954640033293,
"loss": 1.2417,
"step": 33500
},
{
"epoch": 1.77,
"learning_rate": 0.0019292550977944235,
"loss": 1.2429,
"step": 34000
},
{
"epoch": 1.79,
"learning_rate": 0.001928214731585518,
"loss": 1.2398,
"step": 34500
},
{
"epoch": 1.82,
"learning_rate": 0.0019271743653766125,
"loss": 1.2419,
"step": 35000
},
{
"epoch": 1.85,
"learning_rate": 0.0019261339991677071,
"loss": 1.2393,
"step": 35500
},
{
"epoch": 1.87,
"learning_rate": 0.0019250936329588016,
"loss": 1.2396,
"step": 36000
},
{
"epoch": 1.9,
"learning_rate": 0.001924053266749896,
"loss": 1.2374,
"step": 36500
},
{
"epoch": 1.92,
"learning_rate": 0.0019230129005409905,
"loss": 1.24,
"step": 37000
},
{
"epoch": 1.95,
"learning_rate": 0.001921972534332085,
"loss": 1.2377,
"step": 37500
},
{
"epoch": 1.98,
"learning_rate": 0.0019209321681231794,
"loss": 1.2387,
"step": 38000
},
{
"epoch": 2.0,
"eval_loss": 1.2544126510620117,
"eval_runtime": 0.5912,
"eval_samples_per_second": 1691.485,
"eval_steps_per_second": 3.383,
"step": 38448
},
{
"epoch": 2.0,
"learning_rate": 0.0019198918019142739,
"loss": 1.2349,
"step": 38500
},
{
"epoch": 2.03,
"learning_rate": 0.0019188514357053683,
"loss": 1.2314,
"step": 39000
},
{
"epoch": 2.05,
"learning_rate": 0.0019178110694964628,
"loss": 1.2342,
"step": 39500
},
{
"epoch": 2.08,
"learning_rate": 0.0019167707032875572,
"loss": 1.2318,
"step": 40000
},
{
"epoch": 2.11,
"learning_rate": 0.0019157303370786517,
"loss": 1.231,
"step": 40500
},
{
"epoch": 2.13,
"learning_rate": 0.0019146899708697464,
"loss": 1.2321,
"step": 41000
},
{
"epoch": 2.16,
"learning_rate": 0.0019136496046608406,
"loss": 1.2315,
"step": 41500
},
{
"epoch": 2.18,
"learning_rate": 0.001912609238451935,
"loss": 1.2308,
"step": 42000
},
{
"epoch": 2.21,
"learning_rate": 0.0019115688722430295,
"loss": 1.2303,
"step": 42500
},
{
"epoch": 2.24,
"learning_rate": 0.001910528506034124,
"loss": 1.2297,
"step": 43000
},
{
"epoch": 2.26,
"learning_rate": 0.0019094881398252184,
"loss": 1.2295,
"step": 43500
},
{
"epoch": 2.29,
"learning_rate": 0.0019084477736163131,
"loss": 1.2271,
"step": 44000
},
{
"epoch": 2.31,
"learning_rate": 0.0019074074074074076,
"loss": 1.2275,
"step": 44500
},
{
"epoch": 2.34,
"learning_rate": 0.0019063670411985018,
"loss": 1.2284,
"step": 45000
},
{
"epoch": 2.37,
"learning_rate": 0.0019053266749895963,
"loss": 1.2327,
"step": 45500
},
{
"epoch": 2.39,
"learning_rate": 0.0019042863087806907,
"loss": 1.2286,
"step": 46000
},
{
"epoch": 2.42,
"learning_rate": 0.0019032459425717854,
"loss": 1.2293,
"step": 46500
},
{
"epoch": 2.44,
"learning_rate": 0.0019022055763628799,
"loss": 1.2263,
"step": 47000
},
{
"epoch": 2.47,
"learning_rate": 0.0019011652101539743,
"loss": 1.2297,
"step": 47500
},
{
"epoch": 2.5,
"learning_rate": 0.0019001248439450688,
"loss": 1.227,
"step": 48000
},
{
"epoch": 2.52,
"learning_rate": 0.001899084477736163,
"loss": 1.2263,
"step": 48500
},
{
"epoch": 2.55,
"learning_rate": 0.0018980441115272575,
"loss": 1.2274,
"step": 49000
},
{
"epoch": 2.57,
"learning_rate": 0.0018970037453183521,
"loss": 1.2271,
"step": 49500
},
{
"epoch": 2.6,
"learning_rate": 0.0018959633791094466,
"loss": 1.2262,
"step": 50000
},
{
"epoch": 2.63,
"learning_rate": 0.001894923012900541,
"loss": 1.2256,
"step": 50500
},
{
"epoch": 2.65,
"learning_rate": 0.0018938826466916355,
"loss": 1.2256,
"step": 51000
},
{
"epoch": 2.68,
"learning_rate": 0.00189284228048273,
"loss": 1.225,
"step": 51500
},
{
"epoch": 2.7,
"learning_rate": 0.0018918019142738244,
"loss": 1.2254,
"step": 52000
},
{
"epoch": 2.73,
"learning_rate": 0.0018907615480649189,
"loss": 1.2249,
"step": 52500
},
{
"epoch": 2.76,
"learning_rate": 0.0018897211818560133,
"loss": 1.2247,
"step": 53000
},
{
"epoch": 2.78,
"learning_rate": 0.0018886808156471078,
"loss": 1.2231,
"step": 53500
},
{
"epoch": 2.81,
"learning_rate": 0.0018876404494382023,
"loss": 1.2226,
"step": 54000
},
{
"epoch": 2.83,
"learning_rate": 0.0018866000832292967,
"loss": 1.2239,
"step": 54500
},
{
"epoch": 2.86,
"learning_rate": 0.0018855597170203914,
"loss": 1.2226,
"step": 55000
},
{
"epoch": 2.89,
"learning_rate": 0.0018845193508114856,
"loss": 1.2234,
"step": 55500
},
{
"epoch": 2.91,
"learning_rate": 0.00188347898460258,
"loss": 1.221,
"step": 56000
},
{
"epoch": 2.94,
"learning_rate": 0.0018824386183936745,
"loss": 1.2227,
"step": 56500
},
{
"epoch": 2.97,
"learning_rate": 0.001881398252184769,
"loss": 1.2199,
"step": 57000
},
{
"epoch": 2.99,
"learning_rate": 0.0018803578859758635,
"loss": 1.2195,
"step": 57500
},
{
"epoch": 3.0,
"eval_loss": 1.233520746231079,
"eval_runtime": 0.6051,
"eval_samples_per_second": 1652.629,
"eval_steps_per_second": 3.305,
"step": 57672
},
{
"epoch": 3.02,
"learning_rate": 0.0018793175197669581,
"loss": 1.2162,
"step": 58000
},
{
"epoch": 3.04,
"learning_rate": 0.0018782771535580526,
"loss": 1.2161,
"step": 58500
},
{
"epoch": 3.07,
"learning_rate": 0.001877236787349147,
"loss": 1.2146,
"step": 59000
},
{
"epoch": 3.1,
"learning_rate": 0.0018761964211402413,
"loss": 1.2163,
"step": 59500
},
{
"epoch": 3.12,
"learning_rate": 0.0018751560549313357,
"loss": 1.2163,
"step": 60000
},
{
"epoch": 3.15,
"learning_rate": 0.0018741156887224304,
"loss": 1.2152,
"step": 60500
},
{
"epoch": 3.17,
"learning_rate": 0.0018730753225135249,
"loss": 1.2164,
"step": 61000
},
{
"epoch": 3.2,
"learning_rate": 0.0018720349563046193,
"loss": 1.2157,
"step": 61500
},
{
"epoch": 3.23,
"learning_rate": 0.0018709945900957138,
"loss": 1.2176,
"step": 62000
},
{
"epoch": 3.25,
"learning_rate": 0.0018699542238868082,
"loss": 1.2168,
"step": 62500
},
{
"epoch": 3.28,
"learning_rate": 0.0018689138576779025,
"loss": 1.2159,
"step": 63000
},
{
"epoch": 3.3,
"learning_rate": 0.0018678734914689972,
"loss": 1.2145,
"step": 63500
},
{
"epoch": 3.33,
"learning_rate": 0.0018668331252600916,
"loss": 1.2148,
"step": 64000
},
{
"epoch": 3.36,
"learning_rate": 0.001865792759051186,
"loss": 1.2151,
"step": 64500
},
{
"epoch": 3.38,
"learning_rate": 0.0018647523928422805,
"loss": 1.2145,
"step": 65000
},
{
"epoch": 3.41,
"learning_rate": 0.001863712026633375,
"loss": 1.216,
"step": 65500
},
{
"epoch": 3.43,
"learning_rate": 0.0018626716604244697,
"loss": 1.2152,
"step": 66000
},
{
"epoch": 3.46,
"learning_rate": 0.001861631294215564,
"loss": 1.2132,
"step": 66500
},
{
"epoch": 3.49,
"learning_rate": 0.0018605909280066584,
"loss": 1.2137,
"step": 67000
},
{
"epoch": 3.51,
"learning_rate": 0.0018595505617977528,
"loss": 1.2145,
"step": 67500
},
{
"epoch": 3.54,
"learning_rate": 0.0018585101955888473,
"loss": 1.2141,
"step": 68000
},
{
"epoch": 3.56,
"learning_rate": 0.0018574698293799417,
"loss": 1.2148,
"step": 68500
},
{
"epoch": 3.59,
"learning_rate": 0.0018564294631710364,
"loss": 1.2125,
"step": 69000
},
{
"epoch": 3.62,
"learning_rate": 0.0018553890969621309,
"loss": 1.2132,
"step": 69500
},
{
"epoch": 3.64,
"learning_rate": 0.001854348730753225,
"loss": 1.2124,
"step": 70000
},
{
"epoch": 3.67,
"learning_rate": 0.0018533083645443196,
"loss": 1.2148,
"step": 70500
},
{
"epoch": 3.69,
"learning_rate": 0.001852267998335414,
"loss": 1.2135,
"step": 71000
},
{
"epoch": 3.72,
"learning_rate": 0.0018512276321265085,
"loss": 1.2132,
"step": 71500
},
{
"epoch": 3.75,
"learning_rate": 0.0018501872659176031,
"loss": 1.2133,
"step": 72000
},
{
"epoch": 3.77,
"learning_rate": 0.0018491468997086976,
"loss": 1.2144,
"step": 72500
},
{
"epoch": 3.8,
"learning_rate": 0.001848106533499792,
"loss": 1.2101,
"step": 73000
},
{
"epoch": 3.82,
"learning_rate": 0.0018470661672908863,
"loss": 1.212,
"step": 73500
},
{
"epoch": 3.85,
"learning_rate": 0.0018460258010819808,
"loss": 1.2119,
"step": 74000
},
{
"epoch": 3.88,
"learning_rate": 0.0018449854348730754,
"loss": 1.2117,
"step": 74500
},
{
"epoch": 3.9,
"learning_rate": 0.0018439450686641699,
"loss": 1.2106,
"step": 75000
},
{
"epoch": 3.93,
"learning_rate": 0.0018429047024552643,
"loss": 1.2125,
"step": 75500
},
{
"epoch": 3.95,
"learning_rate": 0.0018418643362463588,
"loss": 1.2106,
"step": 76000
},
{
"epoch": 3.98,
"learning_rate": 0.0018408239700374533,
"loss": 1.2108,
"step": 76500
},
{
"epoch": 4.0,
"eval_loss": 1.2261288166046143,
"eval_runtime": 0.8323,
"eval_samples_per_second": 1201.497,
"eval_steps_per_second": 2.403,
"step": 76896
},
{
"epoch": 4.01,
"learning_rate": 0.0018397836038285475,
"loss": 1.2091,
"step": 77000
},
{
"epoch": 4.03,
"learning_rate": 0.0018387432376196422,
"loss": 1.2055,
"step": 77500
},
{
"epoch": 4.06,
"learning_rate": 0.0018377028714107366,
"loss": 1.2061,
"step": 78000
},
{
"epoch": 4.08,
"learning_rate": 0.001836662505201831,
"loss": 1.2047,
"step": 78500
},
{
"epoch": 4.11,
"learning_rate": 0.0018356221389929255,
"loss": 1.2077,
"step": 79000
},
{
"epoch": 4.14,
"learning_rate": 0.00183458177278402,
"loss": 1.2069,
"step": 79500
},
{
"epoch": 4.16,
"learning_rate": 0.0018335414065751145,
"loss": 1.2078,
"step": 80000
},
{
"epoch": 4.19,
"learning_rate": 0.001832501040366209,
"loss": 1.2058,
"step": 80500
},
{
"epoch": 4.21,
"learning_rate": 0.0018314606741573034,
"loss": 1.2075,
"step": 81000
},
{
"epoch": 4.24,
"learning_rate": 0.0018304203079483978,
"loss": 1.2064,
"step": 81500
},
{
"epoch": 4.27,
"learning_rate": 0.0018293799417394923,
"loss": 1.2059,
"step": 82000
},
{
"epoch": 4.29,
"learning_rate": 0.0018283395755305867,
"loss": 1.2075,
"step": 82500
},
{
"epoch": 4.32,
"learning_rate": 0.0018272992093216814,
"loss": 1.2042,
"step": 83000
},
{
"epoch": 4.34,
"learning_rate": 0.0018262588431127759,
"loss": 1.2051,
"step": 83500
},
{
"epoch": 4.37,
"learning_rate": 0.0018252184769038703,
"loss": 1.2066,
"step": 84000
},
{
"epoch": 4.4,
"learning_rate": 0.0018241781106949646,
"loss": 1.2081,
"step": 84500
},
{
"epoch": 4.42,
"learning_rate": 0.001823137744486059,
"loss": 1.2072,
"step": 85000
},
{
"epoch": 4.45,
"learning_rate": 0.0018220973782771535,
"loss": 1.2052,
"step": 85500
},
{
"epoch": 4.47,
"learning_rate": 0.0018210570120682482,
"loss": 1.2042,
"step": 86000
},
{
"epoch": 4.5,
"learning_rate": 0.0018200166458593426,
"loss": 1.2058,
"step": 86500
},
{
"epoch": 4.53,
"learning_rate": 0.001818976279650437,
"loss": 1.2064,
"step": 87000
},
{
"epoch": 4.55,
"learning_rate": 0.0018179359134415315,
"loss": 1.2032,
"step": 87500
},
{
"epoch": 4.58,
"learning_rate": 0.0018168955472326258,
"loss": 1.2059,
"step": 88000
},
{
"epoch": 4.6,
"learning_rate": 0.0018158551810237204,
"loss": 1.2058,
"step": 88500
},
{
"epoch": 4.63,
"learning_rate": 0.001814814814814815,
"loss": 1.2041,
"step": 89000
},
{
"epoch": 4.66,
"learning_rate": 0.0018137744486059093,
"loss": 1.2057,
"step": 89500
},
{
"epoch": 4.68,
"learning_rate": 0.0018127340823970038,
"loss": 1.2031,
"step": 90000
},
{
"epoch": 4.71,
"learning_rate": 0.0018116937161880983,
"loss": 1.2062,
"step": 90500
},
{
"epoch": 4.73,
"learning_rate": 0.0018106533499791927,
"loss": 1.2051,
"step": 91000
},
{
"epoch": 4.76,
"learning_rate": 0.0018096129837702872,
"loss": 1.2037,
"step": 91500
},
{
"epoch": 4.79,
"learning_rate": 0.0018085726175613816,
"loss": 1.2053,
"step": 92000
},
{
"epoch": 4.81,
"learning_rate": 0.001807532251352476,
"loss": 1.2046,
"step": 92500
},
{
"epoch": 4.84,
"learning_rate": 0.0018064918851435705,
"loss": 1.2023,
"step": 93000
},
{
"epoch": 4.86,
"learning_rate": 0.001805451518934665,
"loss": 1.2045,
"step": 93500
},
{
"epoch": 4.89,
"learning_rate": 0.0018044111527257595,
"loss": 1.204,
"step": 94000
},
{
"epoch": 4.92,
"learning_rate": 0.0018033707865168541,
"loss": 1.2037,
"step": 94500
},
{
"epoch": 4.94,
"learning_rate": 0.0018023304203079484,
"loss": 1.204,
"step": 95000
},
{
"epoch": 4.97,
"learning_rate": 0.0018012900540990428,
"loss": 1.2044,
"step": 95500
},
{
"epoch": 4.99,
"learning_rate": 0.0018002496878901373,
"loss": 1.2022,
"step": 96000
},
{
"epoch": 5.0,
"eval_loss": 1.2207547426223755,
"eval_runtime": 0.6112,
"eval_samples_per_second": 1636.066,
"eval_steps_per_second": 3.272,
"step": 96120
},
{
"epoch": 5.02,
"learning_rate": 0.0017992093216812317,
"loss": 1.2001,
"step": 96500
},
{
"epoch": 5.05,
"learning_rate": 0.0017981689554723264,
"loss": 1.1996,
"step": 97000
},
{
"epoch": 5.07,
"learning_rate": 0.0017971285892634209,
"loss": 1.1989,
"step": 97500
},
{
"epoch": 5.1,
"learning_rate": 0.0017960882230545153,
"loss": 1.1998,
"step": 98000
},
{
"epoch": 5.12,
"learning_rate": 0.0017950478568456096,
"loss": 1.1989,
"step": 98500
},
{
"epoch": 5.15,
"learning_rate": 0.001794007490636704,
"loss": 1.1984,
"step": 99000
},
{
"epoch": 5.18,
"learning_rate": 0.0017929671244277985,
"loss": 1.1991,
"step": 99500
},
{
"epoch": 5.2,
"learning_rate": 0.0017919267582188932,
"loss": 1.1993,
"step": 100000
},
{
"epoch": 5.23,
"learning_rate": 0.0017908863920099876,
"loss": 1.1996,
"step": 100500
},
{
"epoch": 5.25,
"learning_rate": 0.001789846025801082,
"loss": 1.1995,
"step": 101000
},
{
"epoch": 5.28,
"learning_rate": 0.0017888056595921765,
"loss": 1.1987,
"step": 101500
},
{
"epoch": 5.31,
"learning_rate": 0.0017877652933832708,
"loss": 1.1971,
"step": 102000
},
{
"epoch": 5.33,
"learning_rate": 0.0017867249271743654,
"loss": 1.1984,
"step": 102500
},
{
"epoch": 5.36,
"learning_rate": 0.00178568456096546,
"loss": 1.2003,
"step": 103000
},
{
"epoch": 5.38,
"learning_rate": 0.0017846441947565544,
"loss": 1.2011,
"step": 103500
},
{
"epoch": 5.41,
"learning_rate": 0.0017836038285476488,
"loss": 1.1994,
"step": 104000
},
{
"epoch": 5.44,
"learning_rate": 0.0017825634623387433,
"loss": 1.1989,
"step": 104500
},
{
"epoch": 5.46,
"learning_rate": 0.0017815230961298377,
"loss": 1.1996,
"step": 105000
},
{
"epoch": 5.49,
"learning_rate": 0.0017804827299209324,
"loss": 1.1982,
"step": 105500
},
{
"epoch": 5.51,
"learning_rate": 0.0017794423637120266,
"loss": 1.1971,
"step": 106000
},
{
"epoch": 5.54,
"learning_rate": 0.001778401997503121,
"loss": 1.1988,
"step": 106500
},
{
"epoch": 5.57,
"learning_rate": 0.0017773616312942156,
"loss": 1.1996,
"step": 107000
},
{
"epoch": 5.59,
"learning_rate": 0.00177632126508531,
"loss": 1.1972,
"step": 107500
},
{
"epoch": 5.62,
"learning_rate": 0.0017752808988764045,
"loss": 1.1991,
"step": 108000
},
{
"epoch": 5.64,
"learning_rate": 0.0017742405326674991,
"loss": 1.1987,
"step": 108500
},
{
"epoch": 5.67,
"learning_rate": 0.0017732001664585936,
"loss": 1.1983,
"step": 109000
},
{
"epoch": 5.7,
"learning_rate": 0.0017721598002496878,
"loss": 1.1993,
"step": 109500
},
{
"epoch": 5.72,
"learning_rate": 0.0017711194340407823,
"loss": 1.1979,
"step": 110000
},
{
"epoch": 5.75,
"learning_rate": 0.0017700790678318768,
"loss": 1.1989,
"step": 110500
},
{
"epoch": 5.77,
"learning_rate": 0.0017690387016229714,
"loss": 1.2003,
"step": 111000
},
{
"epoch": 5.8,
"learning_rate": 0.0017679983354140659,
"loss": 1.1987,
"step": 111500
},
{
"epoch": 5.83,
"learning_rate": 0.0017669579692051603,
"loss": 1.1987,
"step": 112000
},
{
"epoch": 5.85,
"learning_rate": 0.0017659176029962548,
"loss": 1.1976,
"step": 112500
},
{
"epoch": 5.88,
"learning_rate": 0.001764877236787349,
"loss": 1.1983,
"step": 113000
},
{
"epoch": 5.9,
"learning_rate": 0.0017638368705784435,
"loss": 1.1981,
"step": 113500
},
{
"epoch": 5.93,
"learning_rate": 0.0017627965043695382,
"loss": 1.1989,
"step": 114000
},
{
"epoch": 5.96,
"learning_rate": 0.0017617561381606326,
"loss": 1.1963,
"step": 114500
},
{
"epoch": 5.98,
"learning_rate": 0.001760715771951727,
"loss": 1.1983,
"step": 115000
},
{
"epoch": 6.0,
"eval_loss": 1.216284155845642,
"eval_runtime": 0.6159,
"eval_samples_per_second": 1623.524,
"eval_steps_per_second": 3.247,
"step": 115344
},
{
"epoch": 6.01,
"learning_rate": 0.0017596754057428215,
"loss": 1.1979,
"step": 115500
},
{
"epoch": 6.03,
"learning_rate": 0.001758635039533916,
"loss": 1.1933,
"step": 116000
},
{
"epoch": 6.06,
"learning_rate": 0.0017575946733250102,
"loss": 1.1942,
"step": 116500
},
{
"epoch": 6.09,
"learning_rate": 0.001756554307116105,
"loss": 1.1937,
"step": 117000
},
{
"epoch": 6.11,
"learning_rate": 0.0017555139409071994,
"loss": 1.1937,
"step": 117500
},
{
"epoch": 6.14,
"learning_rate": 0.0017544735746982938,
"loss": 1.1958,
"step": 118000
},
{
"epoch": 6.16,
"learning_rate": 0.0017534332084893883,
"loss": 1.1936,
"step": 118500
},
{
"epoch": 6.19,
"learning_rate": 0.0017523928422804827,
"loss": 1.1952,
"step": 119000
},
{
"epoch": 6.22,
"learning_rate": 0.0017513524760715774,
"loss": 1.1959,
"step": 119500
},
{
"epoch": 6.24,
"learning_rate": 0.0017503121098626717,
"loss": 1.1943,
"step": 120000
},
{
"epoch": 6.27,
"learning_rate": 0.0017492717436537661,
"loss": 1.1945,
"step": 120500
},
{
"epoch": 6.29,
"learning_rate": 0.0017482313774448606,
"loss": 1.1952,
"step": 121000
},
{
"epoch": 6.32,
"learning_rate": 0.001747191011235955,
"loss": 1.1972,
"step": 121500
},
{
"epoch": 6.35,
"learning_rate": 0.0017461506450270495,
"loss": 1.1933,
"step": 122000
},
{
"epoch": 6.37,
"learning_rate": 0.0017451102788181442,
"loss": 1.1949,
"step": 122500
},
{
"epoch": 6.4,
"learning_rate": 0.0017440699126092386,
"loss": 1.1947,
"step": 123000
},
{
"epoch": 6.42,
"learning_rate": 0.0017430295464003329,
"loss": 1.1935,
"step": 123500
},
{
"epoch": 6.45,
"learning_rate": 0.0017419891801914273,
"loss": 1.1954,
"step": 124000
},
{
"epoch": 6.48,
"learning_rate": 0.0017409488139825218,
"loss": 1.193,
"step": 124500
},
{
"epoch": 6.5,
"learning_rate": 0.0017399084477736164,
"loss": 1.1938,
"step": 125000
},
{
"epoch": 6.53,
"learning_rate": 0.001738868081564711,
"loss": 1.1939,
"step": 125500
},
{
"epoch": 6.55,
"learning_rate": 0.0017378277153558054,
"loss": 1.1948,
"step": 126000
},
{
"epoch": 6.58,
"learning_rate": 0.0017367873491468998,
"loss": 1.1926,
"step": 126500
},
{
"epoch": 6.61,
"learning_rate": 0.001735746982937994,
"loss": 1.1936,
"step": 127000
},
{
"epoch": 6.63,
"learning_rate": 0.0017347066167290885,
"loss": 1.1933,
"step": 127500
},
{
"epoch": 6.66,
"learning_rate": 0.0017336662505201832,
"loss": 1.1947,
"step": 128000
},
{
"epoch": 6.68,
"learning_rate": 0.0017326258843112776,
"loss": 1.1931,
"step": 128500
},
{
"epoch": 6.71,
"learning_rate": 0.001731585518102372,
"loss": 1.1931,
"step": 129000
},
{
"epoch": 6.74,
"learning_rate": 0.0017305451518934666,
"loss": 1.1938,
"step": 129500
},
{
"epoch": 6.76,
"learning_rate": 0.001729504785684561,
"loss": 1.1939,
"step": 130000
},
{
"epoch": 6.79,
"learning_rate": 0.0017284644194756553,
"loss": 1.1923,
"step": 130500
},
{
"epoch": 6.81,
"learning_rate": 0.00172742405326675,
"loss": 1.1932,
"step": 131000
},
{
"epoch": 6.84,
"learning_rate": 0.0017263836870578444,
"loss": 1.1926,
"step": 131500
},
{
"epoch": 6.87,
"learning_rate": 0.0017253433208489388,
"loss": 1.1929,
"step": 132000
},
{
"epoch": 6.89,
"learning_rate": 0.0017243029546400333,
"loss": 1.1932,
"step": 132500
},
{
"epoch": 6.92,
"learning_rate": 0.0017232625884311278,
"loss": 1.1932,
"step": 133000
},
{
"epoch": 6.94,
"learning_rate": 0.0017222222222222224,
"loss": 1.1952,
"step": 133500
},
{
"epoch": 6.97,
"learning_rate": 0.0017211818560133169,
"loss": 1.1924,
"step": 134000
},
{
"epoch": 7.0,
"learning_rate": 0.0017201414898044111,
"loss": 1.1927,
"step": 134500
},
{
"epoch": 7.0,
"eval_loss": 1.2103557586669922,
"eval_runtime": 0.6147,
"eval_samples_per_second": 1626.936,
"eval_steps_per_second": 3.254,
"step": 134568
},
{
"epoch": 7.02,
"learning_rate": 0.0017191011235955056,
"loss": 1.1869,
"step": 135000
},
{
"epoch": 7.05,
"learning_rate": 0.0017180607573866,
"loss": 1.1872,
"step": 135500
},
{
"epoch": 7.07,
"learning_rate": 0.0017170203911776945,
"loss": 1.1898,
"step": 136000
},
{
"epoch": 7.1,
"learning_rate": 0.0017159800249687892,
"loss": 1.1895,
"step": 136500
},
{
"epoch": 7.13,
"learning_rate": 0.0017149396587598836,
"loss": 1.1902,
"step": 137000
},
{
"epoch": 7.15,
"learning_rate": 0.001713899292550978,
"loss": 1.1901,
"step": 137500
},
{
"epoch": 7.18,
"learning_rate": 0.0017128589263420723,
"loss": 1.1892,
"step": 138000
},
{
"epoch": 7.2,
"learning_rate": 0.0017118185601331668,
"loss": 1.1902,
"step": 138500
},
{
"epoch": 7.23,
"learning_rate": 0.0017107781939242615,
"loss": 1.1906,
"step": 139000
},
{
"epoch": 7.26,
"learning_rate": 0.001709737827715356,
"loss": 1.1904,
"step": 139500
},
{
"epoch": 7.28,
"learning_rate": 0.0017086974615064504,
"loss": 1.1898,
"step": 140000
},
{
"epoch": 7.31,
"learning_rate": 0.0017076570952975448,
"loss": 1.1917,
"step": 140500
},
{
"epoch": 7.33,
"learning_rate": 0.0017066167290886393,
"loss": 1.1914,
"step": 141000
},
{
"epoch": 7.36,
"learning_rate": 0.0017055763628797335,
"loss": 1.1905,
"step": 141500
},
{
"epoch": 7.39,
"learning_rate": 0.0017045359966708282,
"loss": 1.1921,
"step": 142000
},
{
"epoch": 7.41,
"learning_rate": 0.0017034956304619227,
"loss": 1.1899,
"step": 142500
},
{
"epoch": 7.44,
"learning_rate": 0.001702455264253017,
"loss": 1.19,
"step": 143000
},
{
"epoch": 7.46,
"learning_rate": 0.0017014148980441116,
"loss": 1.1883,
"step": 143500
},
{
"epoch": 7.49,
"learning_rate": 0.001700374531835206,
"loss": 1.191,
"step": 144000
},
{
"epoch": 7.52,
"learning_rate": 0.0016993341656263005,
"loss": 1.1896,
"step": 144500
},
{
"epoch": 7.54,
"learning_rate": 0.001698293799417395,
"loss": 1.1893,
"step": 145000
},
{
"epoch": 7.57,
"learning_rate": 0.0016972534332084894,
"loss": 1.1892,
"step": 145500
},
{
"epoch": 7.59,
"learning_rate": 0.0016962130669995838,
"loss": 1.1887,
"step": 146000
},
{
"epoch": 7.62,
"learning_rate": 0.0016951727007906783,
"loss": 1.1913,
"step": 146500
},
{
"epoch": 7.65,
"learning_rate": 0.0016941323345817728,
"loss": 1.1895,
"step": 147000
},
{
"epoch": 7.67,
"learning_rate": 0.0016930919683728674,
"loss": 1.1891,
"step": 147500
},
{
"epoch": 7.7,
"learning_rate": 0.001692051602163962,
"loss": 1.189,
"step": 148000
},
{
"epoch": 7.72,
"learning_rate": 0.0016910112359550561,
"loss": 1.1907,
"step": 148500
},
{
"epoch": 7.75,
"learning_rate": 0.0016899708697461506,
"loss": 1.1911,
"step": 149000
},
{
"epoch": 7.78,
"learning_rate": 0.001688930503537245,
"loss": 1.1881,
"step": 149500
},
{
"epoch": 7.8,
"learning_rate": 0.0016878901373283395,
"loss": 1.1893,
"step": 150000
},
{
"epoch": 7.83,
"learning_rate": 0.0016868497711194342,
"loss": 1.1902,
"step": 150500
},
{
"epoch": 7.85,
"learning_rate": 0.0016858094049105286,
"loss": 1.1912,
"step": 151000
},
{
"epoch": 7.88,
"learning_rate": 0.001684769038701623,
"loss": 1.1907,
"step": 151500
},
{
"epoch": 7.91,
"learning_rate": 0.0016837286724927173,
"loss": 1.1909,
"step": 152000
},
{
"epoch": 7.93,
"learning_rate": 0.0016826883062838118,
"loss": 1.1875,
"step": 152500
},
{
"epoch": 7.96,
"learning_rate": 0.0016816479400749065,
"loss": 1.19,
"step": 153000
},
{
"epoch": 7.98,
"learning_rate": 0.001680607573866001,
"loss": 1.1881,
"step": 153500
},
{
"epoch": 8.0,
"eval_loss": 1.2096730470657349,
"eval_runtime": 0.6211,
"eval_samples_per_second": 1609.947,
"eval_steps_per_second": 3.22,
"step": 153792
},
{
"epoch": 8.01,
"learning_rate": 0.0016795672076570954,
"loss": 1.1874,
"step": 154000
},
{
"epoch": 8.04,
"learning_rate": 0.0016785268414481898,
"loss": 1.1844,
"step": 154500
},
{
"epoch": 8.06,
"learning_rate": 0.0016774864752392843,
"loss": 1.1845,
"step": 155000
},
{
"epoch": 8.09,
"learning_rate": 0.0016764461090303787,
"loss": 1.186,
"step": 155500
},
{
"epoch": 8.11,
"learning_rate": 0.0016754057428214732,
"loss": 1.1851,
"step": 156000
},
{
"epoch": 8.14,
"learning_rate": 0.0016743653766125677,
"loss": 1.1872,
"step": 156500
},
{
"epoch": 8.17,
"learning_rate": 0.0016733250104036621,
"loss": 1.1872,
"step": 157000
},
{
"epoch": 8.19,
"learning_rate": 0.0016722846441947566,
"loss": 1.1862,
"step": 157500
},
{
"epoch": 8.22,
"learning_rate": 0.001671244277985851,
"loss": 1.1867,
"step": 158000
},
{
"epoch": 8.24,
"learning_rate": 0.0016702039117769455,
"loss": 1.186,
"step": 158500
},
{
"epoch": 8.27,
"learning_rate": 0.0016691635455680402,
"loss": 1.1858,
"step": 159000
},
{
"epoch": 8.3,
"learning_rate": 0.0016681231793591344,
"loss": 1.1843,
"step": 159500
},
{
"epoch": 8.32,
"learning_rate": 0.0016670828131502289,
"loss": 1.1857,
"step": 160000
},
{
"epoch": 8.35,
"learning_rate": 0.0016660424469413233,
"loss": 1.1872,
"step": 160500
},
{
"epoch": 8.37,
"learning_rate": 0.0016650020807324178,
"loss": 1.1859,
"step": 161000
},
{
"epoch": 8.4,
"learning_rate": 0.0016639617145235124,
"loss": 1.1865,
"step": 161500
},
{
"epoch": 8.43,
"learning_rate": 0.001662921348314607,
"loss": 1.1871,
"step": 162000
},
{
"epoch": 8.45,
"learning_rate": 0.0016618809821057014,
"loss": 1.1871,
"step": 162500
},
{
"epoch": 8.48,
"learning_rate": 0.0016608406158967956,
"loss": 1.1876,
"step": 163000
},
{
"epoch": 8.5,
"learning_rate": 0.00165980024968789,
"loss": 1.1876,
"step": 163500
},
{
"epoch": 8.53,
"learning_rate": 0.0016587598834789845,
"loss": 1.1874,
"step": 164000
},
{
"epoch": 8.56,
"learning_rate": 0.0016577195172700792,
"loss": 1.186,
"step": 164500
},
{
"epoch": 8.58,
"learning_rate": 0.0016566791510611736,
"loss": 1.1869,
"step": 165000
},
{
"epoch": 8.61,
"learning_rate": 0.001655638784852268,
"loss": 1.1865,
"step": 165500
},
{
"epoch": 8.64,
"learning_rate": 0.0016545984186433626,
"loss": 1.1862,
"step": 166000
},
{
"epoch": 8.66,
"learning_rate": 0.0016535580524344568,
"loss": 1.1881,
"step": 166500
},
{
"epoch": 8.69,
"learning_rate": 0.0016525176862255513,
"loss": 1.1875,
"step": 167000
},
{
"epoch": 8.71,
"learning_rate": 0.001651477320016646,
"loss": 1.1866,
"step": 167500
},
{
"epoch": 8.74,
"learning_rate": 0.0016504369538077404,
"loss": 1.1868,
"step": 168000
},
{
"epoch": 8.77,
"learning_rate": 0.0016493965875988348,
"loss": 1.1867,
"step": 168500
},
{
"epoch": 8.79,
"learning_rate": 0.0016483562213899293,
"loss": 1.1858,
"step": 169000
},
{
"epoch": 8.82,
"learning_rate": 0.0016473158551810238,
"loss": 1.1869,
"step": 169500
},
{
"epoch": 8.84,
"learning_rate": 0.0016462754889721182,
"loss": 1.1861,
"step": 170000
},
{
"epoch": 8.87,
"learning_rate": 0.0016452351227632127,
"loss": 1.1849,
"step": 170500
},
{
"epoch": 8.9,
"learning_rate": 0.0016441947565543071,
"loss": 1.1857,
"step": 171000
},
{
"epoch": 8.92,
"learning_rate": 0.0016431543903454016,
"loss": 1.1883,
"step": 171500
},
{
"epoch": 8.95,
"learning_rate": 0.001642114024136496,
"loss": 1.1862,
"step": 172000
},
{
"epoch": 8.97,
"learning_rate": 0.0016410736579275905,
"loss": 1.1859,
"step": 172500
},
{
"epoch": 9.0,
"learning_rate": 0.0016400332917186852,
"loss": 1.1865,
"step": 173000
},
{
"epoch": 9.0,
"eval_loss": 1.2050005197525024,
"eval_runtime": 0.6154,
"eval_samples_per_second": 1624.869,
"eval_steps_per_second": 3.25,
"step": 173016
},
{
"epoch": 9.03,
"learning_rate": 0.0016389929255097794,
"loss": 1.181,
"step": 173500
},
{
"epoch": 9.05,
"learning_rate": 0.0016379525593008739,
"loss": 1.1819,
"step": 174000
},
{
"epoch": 9.08,
"learning_rate": 0.0016369121930919683,
"loss": 1.1832,
"step": 174500
},
{
"epoch": 9.1,
"learning_rate": 0.0016358718268830628,
"loss": 1.1833,
"step": 175000
},
{
"epoch": 9.13,
"learning_rate": 0.0016348314606741575,
"loss": 1.1814,
"step": 175500
},
{
"epoch": 9.16,
"learning_rate": 0.001633791094465252,
"loss": 1.1824,
"step": 176000
},
{
"epoch": 9.18,
"learning_rate": 0.0016327507282563464,
"loss": 1.1836,
"step": 176500
},
{
"epoch": 9.21,
"learning_rate": 0.0016317103620474408,
"loss": 1.1824,
"step": 177000
},
{
"epoch": 9.23,
"learning_rate": 0.001630669995838535,
"loss": 1.1837,
"step": 177500
},
{
"epoch": 9.26,
"learning_rate": 0.0016296296296296295,
"loss": 1.1859,
"step": 178000
},
{
"epoch": 9.29,
"learning_rate": 0.0016285892634207242,
"loss": 1.1834,
"step": 178500
},
{
"epoch": 9.31,
"learning_rate": 0.0016275488972118187,
"loss": 1.1829,
"step": 179000
},
{
"epoch": 9.34,
"learning_rate": 0.0016265085310029131,
"loss": 1.1827,
"step": 179500
},
{
"epoch": 9.36,
"learning_rate": 0.0016254681647940076,
"loss": 1.1845,
"step": 180000
},
{
"epoch": 9.39,
"learning_rate": 0.001624427798585102,
"loss": 1.1836,
"step": 180500
},
{
"epoch": 9.42,
"learning_rate": 0.0016233874323761963,
"loss": 1.1821,
"step": 181000
},
{
"epoch": 9.44,
"learning_rate": 0.001622347066167291,
"loss": 1.1833,
"step": 181500
},
{
"epoch": 9.47,
"learning_rate": 0.0016213066999583854,
"loss": 1.1843,
"step": 182000
},
{
"epoch": 9.49,
"learning_rate": 0.0016202663337494799,
"loss": 1.1845,
"step": 182500
},
{
"epoch": 9.52,
"learning_rate": 0.0016192259675405743,
"loss": 1.1837,
"step": 183000
},
{
"epoch": 9.55,
"learning_rate": 0.0016181856013316688,
"loss": 1.1837,
"step": 183500
},
{
"epoch": 9.57,
"learning_rate": 0.0016171452351227634,
"loss": 1.184,
"step": 184000
},
{
"epoch": 9.6,
"learning_rate": 0.0016161048689138577,
"loss": 1.1835,
"step": 184500
},
{
"epoch": 9.62,
"learning_rate": 0.0016150645027049521,
"loss": 1.1826,
"step": 185000
},
{
"epoch": 9.65,
"learning_rate": 0.0016140241364960466,
"loss": 1.1846,
"step": 185500
},
{
"epoch": 9.68,
"learning_rate": 0.001612983770287141,
"loss": 1.1833,
"step": 186000
},
{
"epoch": 9.7,
"learning_rate": 0.0016119434040782355,
"loss": 1.1829,
"step": 186500
},
{
"epoch": 9.73,
"learning_rate": 0.0016109030378693302,
"loss": 1.183,
"step": 187000
},
{
"epoch": 9.75,
"learning_rate": 0.0016098626716604246,
"loss": 1.184,
"step": 187500
},
{
"epoch": 9.78,
"learning_rate": 0.0016088223054515189,
"loss": 1.1831,
"step": 188000
},
{
"epoch": 9.81,
"learning_rate": 0.0016077819392426133,
"loss": 1.185,
"step": 188500
},
{
"epoch": 9.83,
"learning_rate": 0.0016067415730337078,
"loss": 1.183,
"step": 189000
},
{
"epoch": 9.86,
"learning_rate": 0.0016057012068248025,
"loss": 1.183,
"step": 189500
},
{
"epoch": 9.88,
"learning_rate": 0.001604660840615897,
"loss": 1.1837,
"step": 190000
},
{
"epoch": 9.91,
"learning_rate": 0.0016036204744069914,
"loss": 1.1809,
"step": 190500
},
{
"epoch": 9.94,
"learning_rate": 0.0016025801081980858,
"loss": 1.1838,
"step": 191000
},
{
"epoch": 9.96,
"learning_rate": 0.00160153974198918,
"loss": 1.1831,
"step": 191500
},
{
"epoch": 9.99,
"learning_rate": 0.0016004993757802745,
"loss": 1.1846,
"step": 192000
},
{
"epoch": 10.0,
"eval_loss": 1.2037365436553955,
"eval_runtime": 0.6098,
"eval_samples_per_second": 1639.906,
"eval_steps_per_second": 3.28,
"step": 192240
},
{
"epoch": 10.01,
"learning_rate": 0.0015994590095713692,
"loss": 1.181,
"step": 192500
},
{
"epoch": 10.04,
"learning_rate": 0.0015984186433624637,
"loss": 1.1792,
"step": 193000
},
{
"epoch": 10.07,
"learning_rate": 0.0015973782771535581,
"loss": 1.1796,
"step": 193500
},
{
"epoch": 10.09,
"learning_rate": 0.0015963379109446526,
"loss": 1.1806,
"step": 194000
},
{
"epoch": 10.12,
"learning_rate": 0.001595297544735747,
"loss": 1.1803,
"step": 194500
},
{
"epoch": 10.14,
"learning_rate": 0.0015942571785268413,
"loss": 1.1801,
"step": 195000
},
{
"epoch": 10.17,
"learning_rate": 0.001593216812317936,
"loss": 1.1799,
"step": 195500
},
{
"epoch": 10.2,
"learning_rate": 0.0015921764461090304,
"loss": 1.1812,
"step": 196000
},
{
"epoch": 10.22,
"learning_rate": 0.0015911360799001249,
"loss": 1.1795,
"step": 196500
},
{
"epoch": 10.25,
"learning_rate": 0.0015900957136912193,
"loss": 1.1812,
"step": 197000
},
{
"epoch": 10.27,
"learning_rate": 0.0015890553474823138,
"loss": 1.1803,
"step": 197500
},
{
"epoch": 10.3,
"learning_rate": 0.0015880149812734085,
"loss": 1.1818,
"step": 198000
},
{
"epoch": 10.33,
"learning_rate": 0.0015869746150645027,
"loss": 1.1802,
"step": 198500
},
{
"epoch": 10.35,
"learning_rate": 0.0015859342488555972,
"loss": 1.1805,
"step": 199000
},
{
"epoch": 10.38,
"learning_rate": 0.0015848938826466916,
"loss": 1.1802,
"step": 199500
},
{
"epoch": 10.4,
"learning_rate": 0.001583853516437786,
"loss": 1.1812,
"step": 200000
},
{
"epoch": 10.43,
"learning_rate": 0.0015828131502288805,
"loss": 1.1817,
"step": 200500
},
{
"epoch": 10.46,
"learning_rate": 0.0015817727840199752,
"loss": 1.1828,
"step": 201000
},
{
"epoch": 10.48,
"learning_rate": 0.0015807324178110697,
"loss": 1.1798,
"step": 201500
},
{
"epoch": 10.51,
"learning_rate": 0.0015796920516021641,
"loss": 1.1817,
"step": 202000
},
{
"epoch": 10.53,
"learning_rate": 0.0015786516853932584,
"loss": 1.181,
"step": 202500
},
{
"epoch": 10.56,
"learning_rate": 0.0015776113191843528,
"loss": 1.1814,
"step": 203000
},
{
"epoch": 10.59,
"learning_rate": 0.0015765709529754473,
"loss": 1.1798,
"step": 203500
},
{
"epoch": 10.61,
"learning_rate": 0.001575530586766542,
"loss": 1.1819,
"step": 204000
},
{
"epoch": 10.64,
"learning_rate": 0.0015744902205576364,
"loss": 1.1818,
"step": 204500
},
{
"epoch": 10.66,
"learning_rate": 0.0015734498543487309,
"loss": 1.182,
"step": 205000
},
{
"epoch": 10.69,
"learning_rate": 0.0015724094881398253,
"loss": 1.1821,
"step": 205500
},
{
"epoch": 10.72,
"learning_rate": 0.0015713691219309195,
"loss": 1.1819,
"step": 206000
},
{
"epoch": 10.74,
"learning_rate": 0.0015703287557220142,
"loss": 1.1809,
"step": 206500
},
{
"epoch": 10.77,
"learning_rate": 0.0015692883895131087,
"loss": 1.1806,
"step": 207000
},
{
"epoch": 10.79,
"learning_rate": 0.0015682480233042031,
"loss": 1.1814,
"step": 207500
},
{
"epoch": 10.82,
"learning_rate": 0.0015672076570952976,
"loss": 1.181,
"step": 208000
},
{
"epoch": 10.85,
"learning_rate": 0.001566167290886392,
"loss": 1.183,
"step": 208500
},
{
"epoch": 10.87,
"learning_rate": 0.0015651269246774865,
"loss": 1.1812,
"step": 209000
},
{
"epoch": 10.9,
"learning_rate": 0.001564086558468581,
"loss": 1.1803,
"step": 209500
},
{
"epoch": 10.92,
"learning_rate": 0.0015630461922596754,
"loss": 1.1817,
"step": 210000
},
{
"epoch": 10.95,
"learning_rate": 0.0015620058260507699,
"loss": 1.1781,
"step": 210500
},
{
"epoch": 10.98,
"learning_rate": 0.0015609654598418643,
"loss": 1.1806,
"step": 211000
},
{
"epoch": 11.0,
"eval_loss": 1.2047163248062134,
"eval_runtime": 0.6153,
"eval_samples_per_second": 1625.195,
"eval_steps_per_second": 3.25,
"step": 211464
},
{
"epoch": 11.0,
"learning_rate": 0.0015599250936329588,
"loss": 1.1819,
"step": 211500
},
{
"epoch": 11.03,
"learning_rate": 0.0015588847274240535,
"loss": 1.1753,
"step": 212000
},
{
"epoch": 11.05,
"learning_rate": 0.001557844361215148,
"loss": 1.1781,
"step": 212500
},
{
"epoch": 11.08,
"learning_rate": 0.0015568039950062422,
"loss": 1.1788,
"step": 213000
},
{
"epoch": 11.11,
"learning_rate": 0.0015557636287973366,
"loss": 1.1768,
"step": 213500
},
{
"epoch": 11.13,
"learning_rate": 0.001554723262588431,
"loss": 1.1775,
"step": 214000
},
{
"epoch": 11.16,
"learning_rate": 0.0015536828963795255,
"loss": 1.1782,
"step": 214500
},
{
"epoch": 11.18,
"learning_rate": 0.0015526425301706202,
"loss": 1.1771,
"step": 215000
},
{
"epoch": 11.21,
"learning_rate": 0.0015516021639617147,
"loss": 1.1778,
"step": 215500
},
{
"epoch": 11.24,
"learning_rate": 0.0015505617977528091,
"loss": 1.1767,
"step": 216000
},
{
"epoch": 11.26,
"learning_rate": 0.0015495214315439034,
"loss": 1.1781,
"step": 216500
},
{
"epoch": 11.29,
"learning_rate": 0.0015484810653349978,
"loss": 1.1781,
"step": 217000
},
{
"epoch": 11.31,
"learning_rate": 0.0015474406991260923,
"loss": 1.179,
"step": 217500
},
{
"epoch": 11.34,
"learning_rate": 0.001546400332917187,
"loss": 1.1775,
"step": 218000
},
{
"epoch": 11.37,
"learning_rate": 0.0015453599667082814,
"loss": 1.1799,
"step": 218500
},
{
"epoch": 11.39,
"learning_rate": 0.0015443196004993759,
"loss": 1.1773,
"step": 219000
},
{
"epoch": 11.42,
"learning_rate": 0.0015432792342904703,
"loss": 1.1786,
"step": 219500
},
{
"epoch": 11.44,
"learning_rate": 0.0015422388680815646,
"loss": 1.1766,
"step": 220000
},
{
"epoch": 11.47,
"learning_rate": 0.0015411985018726592,
"loss": 1.1793,
"step": 220500
},
{
"epoch": 11.5,
"learning_rate": 0.0015401581356637537,
"loss": 1.1785,
"step": 221000
},
{
"epoch": 11.52,
"learning_rate": 0.0015391177694548481,
"loss": 1.1787,
"step": 221500
},
{
"epoch": 11.55,
"learning_rate": 0.0015380774032459426,
"loss": 1.1786,
"step": 222000
},
{
"epoch": 11.57,
"learning_rate": 0.001537037037037037,
"loss": 1.1796,
"step": 222500
},
{
"epoch": 11.6,
"learning_rate": 0.0015359966708281315,
"loss": 1.1785,
"step": 223000
},
{
"epoch": 11.63,
"learning_rate": 0.0015349563046192262,
"loss": 1.1789,
"step": 223500
},
{
"epoch": 11.65,
"learning_rate": 0.0015339159384103204,
"loss": 1.1775,
"step": 224000
},
{
"epoch": 11.68,
"learning_rate": 0.0015328755722014149,
"loss": 1.181,
"step": 224500
},
{
"epoch": 11.7,
"learning_rate": 0.0015318352059925093,
"loss": 1.1781,
"step": 225000
},
{
"epoch": 11.73,
"learning_rate": 0.0015307948397836038,
"loss": 1.1791,
"step": 225500
},
{
"epoch": 11.76,
"learning_rate": 0.0015297544735746985,
"loss": 1.1797,
"step": 226000
},
{
"epoch": 11.78,
"learning_rate": 0.001528714107365793,
"loss": 1.1779,
"step": 226500
},
{
"epoch": 11.81,
"learning_rate": 0.0015276737411568874,
"loss": 1.1773,
"step": 227000
},
{
"epoch": 11.83,
"learning_rate": 0.0015266333749479816,
"loss": 1.178,
"step": 227500
},
{
"epoch": 11.86,
"learning_rate": 0.001525593008739076,
"loss": 1.179,
"step": 228000
},
{
"epoch": 11.89,
"learning_rate": 0.0015245526425301705,
"loss": 1.1782,
"step": 228500
},
{
"epoch": 11.91,
"learning_rate": 0.0015235122763212652,
"loss": 1.1796,
"step": 229000
},
{
"epoch": 11.94,
"learning_rate": 0.0015224719101123597,
"loss": 1.1771,
"step": 229500
},
{
"epoch": 11.96,
"learning_rate": 0.0015214315439034541,
"loss": 1.179,
"step": 230000
},
{
"epoch": 11.99,
"learning_rate": 0.0015203911776945486,
"loss": 1.1791,
"step": 230500
},
{
"epoch": 12.0,
"eval_loss": 1.1990782022476196,
"eval_runtime": 0.5976,
"eval_samples_per_second": 1673.372,
"eval_steps_per_second": 3.347,
"step": 230688
},
{
"epoch": 12.02,
"learning_rate": 0.0015193508114856428,
"loss": 1.1745,
"step": 231000
},
{
"epoch": 12.04,
"learning_rate": 0.0015183104452767373,
"loss": 1.1744,
"step": 231500
},
{
"epoch": 12.07,
"learning_rate": 0.001517270079067832,
"loss": 1.1743,
"step": 232000
},
{
"epoch": 12.09,
"learning_rate": 0.0015162297128589264,
"loss": 1.1758,
"step": 232500
},
{
"epoch": 12.12,
"learning_rate": 0.0015151893466500209,
"loss": 1.1738,
"step": 233000
},
{
"epoch": 12.15,
"learning_rate": 0.0015141489804411153,
"loss": 1.1752,
"step": 233500
},
{
"epoch": 12.17,
"learning_rate": 0.0015131086142322098,
"loss": 1.1753,
"step": 234000
},
{
"epoch": 12.2,
"learning_rate": 0.0015120682480233042,
"loss": 1.1764,
"step": 234500
},
{
"epoch": 12.22,
"learning_rate": 0.0015110278818143987,
"loss": 1.174,
"step": 235000
},
{
"epoch": 12.25,
"learning_rate": 0.0015099875156054932,
"loss": 1.176,
"step": 235500
},
{
"epoch": 12.28,
"learning_rate": 0.0015089471493965876,
"loss": 1.176,
"step": 236000
},
{
"epoch": 12.3,
"learning_rate": 0.001507906783187682,
"loss": 1.1758,
"step": 236500
},
{
"epoch": 12.33,
"learning_rate": 0.0015068664169787765,
"loss": 1.1747,
"step": 237000
},
{
"epoch": 12.35,
"learning_rate": 0.0015058260507698712,
"loss": 1.1752,
"step": 237500
},
{
"epoch": 12.38,
"learning_rate": 0.0015047856845609654,
"loss": 1.1761,
"step": 238000
},
{
"epoch": 12.41,
"learning_rate": 0.00150374531835206,
"loss": 1.1771,
"step": 238500
},
{
"epoch": 12.43,
"learning_rate": 0.0015027049521431544,
"loss": 1.1764,
"step": 239000
},
{
"epoch": 12.46,
"learning_rate": 0.0015016645859342488,
"loss": 1.1754,
"step": 239500
},
{
"epoch": 12.48,
"learning_rate": 0.0015006242197253433,
"loss": 1.1763,
"step": 240000
},
{
"epoch": 12.51,
"learning_rate": 0.001499583853516438,
"loss": 1.1751,
"step": 240500
},
{
"epoch": 12.54,
"learning_rate": 0.0014985434873075324,
"loss": 1.177,
"step": 241000
},
{
"epoch": 12.56,
"learning_rate": 0.0014975031210986266,
"loss": 1.178,
"step": 241500
},
{
"epoch": 12.59,
"learning_rate": 0.001496462754889721,
"loss": 1.178,
"step": 242000
},
{
"epoch": 12.61,
"learning_rate": 0.0014954223886808156,
"loss": 1.1761,
"step": 242500
},
{
"epoch": 12.64,
"learning_rate": 0.0014943820224719102,
"loss": 1.1763,
"step": 243000
},
{
"epoch": 12.67,
"learning_rate": 0.0014933416562630047,
"loss": 1.1781,
"step": 243500
},
{
"epoch": 12.69,
"learning_rate": 0.0014923012900540991,
"loss": 1.1773,
"step": 244000
},
{
"epoch": 12.72,
"learning_rate": 0.0014912609238451936,
"loss": 1.176,
"step": 244500
},
{
"epoch": 12.74,
"learning_rate": 0.0014902205576362878,
"loss": 1.1762,
"step": 245000
},
{
"epoch": 12.77,
"learning_rate": 0.0014891801914273823,
"loss": 1.1776,
"step": 245500
},
{
"epoch": 12.8,
"learning_rate": 0.001488139825218477,
"loss": 1.1774,
"step": 246000
},
{
"epoch": 12.82,
"learning_rate": 0.0014870994590095714,
"loss": 1.1759,
"step": 246500
},
{
"epoch": 12.85,
"learning_rate": 0.0014860590928006659,
"loss": 1.1783,
"step": 247000
},
{
"epoch": 12.87,
"learning_rate": 0.0014850187265917603,
"loss": 1.1757,
"step": 247500
},
{
"epoch": 12.9,
"learning_rate": 0.0014839783603828548,
"loss": 1.1769,
"step": 248000
},
{
"epoch": 12.93,
"learning_rate": 0.0014829379941739495,
"loss": 1.1749,
"step": 248500
},
{
"epoch": 12.95,
"learning_rate": 0.0014818976279650437,
"loss": 1.1751,
"step": 249000
},
{
"epoch": 12.98,
"learning_rate": 0.0014808572617561382,
"loss": 1.1757,
"step": 249500
},
{
"epoch": 13.0,
"eval_loss": 1.198763132095337,
"eval_runtime": 0.6114,
"eval_samples_per_second": 1635.549,
"eval_steps_per_second": 3.271,
"step": 249912
},
{
"epoch": 13.0,
"learning_rate": 0.0014798168955472326,
"loss": 1.1756,
"step": 250000
},
{
"epoch": 13.03,
"learning_rate": 0.001478776529338327,
"loss": 1.1712,
"step": 250500
},
{
"epoch": 13.06,
"learning_rate": 0.0014777361631294215,
"loss": 1.1714,
"step": 251000
},
{
"epoch": 13.08,
"learning_rate": 0.0014766957969205162,
"loss": 1.1724,
"step": 251500
},
{
"epoch": 13.11,
"learning_rate": 0.0014756554307116107,
"loss": 1.1736,
"step": 252000
},
{
"epoch": 13.13,
"learning_rate": 0.001474615064502705,
"loss": 1.1739,
"step": 252500
},
{
"epoch": 13.16,
"learning_rate": 0.0014735746982937994,
"loss": 1.1737,
"step": 253000
},
{
"epoch": 13.19,
"learning_rate": 0.0014725343320848938,
"loss": 1.1732,
"step": 253500
},
{
"epoch": 13.21,
"learning_rate": 0.0014714939658759883,
"loss": 1.1725,
"step": 254000
},
{
"epoch": 13.24,
"learning_rate": 0.001470453599667083,
"loss": 1.1737,
"step": 254500
},
{
"epoch": 13.26,
"learning_rate": 0.0014694132334581774,
"loss": 1.1737,
"step": 255000
},
{
"epoch": 13.29,
"learning_rate": 0.0014683728672492719,
"loss": 1.1733,
"step": 255500
},
{
"epoch": 13.32,
"learning_rate": 0.0014673325010403661,
"loss": 1.1729,
"step": 256000
},
{
"epoch": 13.34,
"learning_rate": 0.0014662921348314606,
"loss": 1.1742,
"step": 256500
},
{
"epoch": 13.37,
"learning_rate": 0.0014652517686225552,
"loss": 1.174,
"step": 257000
},
{
"epoch": 13.39,
"learning_rate": 0.0014642114024136497,
"loss": 1.1749,
"step": 257500
},
{
"epoch": 13.42,
"learning_rate": 0.0014631710362047442,
"loss": 1.1735,
"step": 258000
},
{
"epoch": 13.45,
"learning_rate": 0.0014621306699958386,
"loss": 1.1724,
"step": 258500
},
{
"epoch": 13.47,
"learning_rate": 0.001461090303786933,
"loss": 1.1743,
"step": 259000
},
{
"epoch": 13.5,
"learning_rate": 0.0014600499375780273,
"loss": 1.1755,
"step": 259500
},
{
"epoch": 13.52,
"learning_rate": 0.001459009571369122,
"loss": 1.1759,
"step": 260000
},
{
"epoch": 13.55,
"learning_rate": 0.0014579692051602164,
"loss": 1.1749,
"step": 260500
},
{
"epoch": 13.58,
"learning_rate": 0.001456928838951311,
"loss": 1.1756,
"step": 261000
},
{
"epoch": 13.6,
"learning_rate": 0.0014558884727424054,
"loss": 1.1747,
"step": 261500
},
{
"epoch": 13.63,
"learning_rate": 0.0014548481065334998,
"loss": 1.1745,
"step": 262000
},
{
"epoch": 13.65,
"learning_rate": 0.0014538077403245945,
"loss": 1.1736,
"step": 262500
},
{
"epoch": 13.68,
"learning_rate": 0.0014527673741156887,
"loss": 1.1749,
"step": 263000
},
{
"epoch": 13.71,
"learning_rate": 0.0014517270079067832,
"loss": 1.1747,
"step": 263500
},
{
"epoch": 13.73,
"learning_rate": 0.0014506866416978776,
"loss": 1.1735,
"step": 264000
},
{
"epoch": 13.76,
"learning_rate": 0.001449646275488972,
"loss": 1.1736,
"step": 264500
},
{
"epoch": 13.78,
"learning_rate": 0.0014486059092800666,
"loss": 1.1741,
"step": 265000
},
{
"epoch": 13.81,
"learning_rate": 0.0014475655430711612,
"loss": 1.1756,
"step": 265500
},
{
"epoch": 13.84,
"learning_rate": 0.0014465251768622557,
"loss": 1.1746,
"step": 266000
},
{
"epoch": 13.86,
"learning_rate": 0.00144548481065335,
"loss": 1.176,
"step": 266500
},
{
"epoch": 13.89,
"learning_rate": 0.0014444444444444444,
"loss": 1.1746,
"step": 267000
},
{
"epoch": 13.91,
"learning_rate": 0.0014434040782355388,
"loss": 1.1746,
"step": 267500
},
{
"epoch": 13.94,
"learning_rate": 0.0014423637120266333,
"loss": 1.1741,
"step": 268000
},
{
"epoch": 13.97,
"learning_rate": 0.001441323345817728,
"loss": 1.173,
"step": 268500
},
{
"epoch": 13.99,
"learning_rate": 0.0014402829796088224,
"loss": 1.1741,
"step": 269000
},
{
"epoch": 14.0,
"eval_loss": 1.199006199836731,
"eval_runtime": 0.8396,
"eval_samples_per_second": 1191.026,
"eval_steps_per_second": 2.382,
"step": 269136
},
{
"epoch": 14.02,
"learning_rate": 0.0014392426133999169,
"loss": 1.1715,
"step": 269500
},
{
"epoch": 14.04,
"learning_rate": 0.0014382022471910111,
"loss": 1.1707,
"step": 270000
},
{
"epoch": 14.07,
"learning_rate": 0.0014371618809821056,
"loss": 1.171,
"step": 270500
},
{
"epoch": 14.1,
"learning_rate": 0.0014361215147732003,
"loss": 1.1708,
"step": 271000
},
{
"epoch": 14.12,
"learning_rate": 0.0014350811485642947,
"loss": 1.1715,
"step": 271500
},
{
"epoch": 14.15,
"learning_rate": 0.0014340407823553892,
"loss": 1.171,
"step": 272000
},
{
"epoch": 14.17,
"learning_rate": 0.0014330004161464836,
"loss": 1.1721,
"step": 272500
},
{
"epoch": 14.2,
"learning_rate": 0.001431960049937578,
"loss": 1.1717,
"step": 273000
},
{
"epoch": 14.23,
"learning_rate": 0.0014309196837286725,
"loss": 1.1715,
"step": 273500
},
{
"epoch": 14.25,
"learning_rate": 0.001429879317519767,
"loss": 1.171,
"step": 274000
},
{
"epoch": 14.28,
"learning_rate": 0.0014288389513108614,
"loss": 1.1712,
"step": 274500
},
{
"epoch": 14.31,
"learning_rate": 0.001427798585101956,
"loss": 1.1726,
"step": 275000
},
{
"epoch": 14.33,
"learning_rate": 0.0014267582188930504,
"loss": 1.173,
"step": 275500
},
{
"epoch": 14.36,
"learning_rate": 0.0014257178526841448,
"loss": 1.1709,
"step": 276000
},
{
"epoch": 14.38,
"learning_rate": 0.0014246774864752393,
"loss": 1.1728,
"step": 276500
},
{
"epoch": 14.41,
"learning_rate": 0.001423637120266334,
"loss": 1.1721,
"step": 277000
},
{
"epoch": 14.44,
"learning_rate": 0.0014225967540574282,
"loss": 1.1732,
"step": 277500
},
{
"epoch": 14.46,
"learning_rate": 0.0014215563878485226,
"loss": 1.1718,
"step": 278000
},
{
"epoch": 14.49,
"learning_rate": 0.001420516021639617,
"loss": 1.1716,
"step": 278500
},
{
"epoch": 14.51,
"learning_rate": 0.0014194756554307116,
"loss": 1.1735,
"step": 279000
},
{
"epoch": 14.54,
"learning_rate": 0.0014184352892218062,
"loss": 1.1731,
"step": 279500
},
{
"epoch": 14.57,
"learning_rate": 0.0014173949230129007,
"loss": 1.1724,
"step": 280000
},
{
"epoch": 14.59,
"learning_rate": 0.0014163545568039951,
"loss": 1.1717,
"step": 280500
},
{
"epoch": 14.62,
"learning_rate": 0.0014153141905950894,
"loss": 1.1714,
"step": 281000
},
{
"epoch": 14.64,
"learning_rate": 0.0014142738243861838,
"loss": 1.1725,
"step": 281500
},
{
"epoch": 14.67,
"learning_rate": 0.0014132334581772783,
"loss": 1.1726,
"step": 282000
},
{
"epoch": 14.7,
"learning_rate": 0.001412193091968373,
"loss": 1.1731,
"step": 282500
},
{
"epoch": 14.72,
"learning_rate": 0.0014111527257594674,
"loss": 1.1727,
"step": 283000
},
{
"epoch": 14.75,
"learning_rate": 0.0014101123595505619,
"loss": 1.1724,
"step": 283500
},
{
"epoch": 14.77,
"learning_rate": 0.0014090719933416563,
"loss": 1.1715,
"step": 284000
},
{
"epoch": 14.8,
"learning_rate": 0.0014080316271327506,
"loss": 1.173,
"step": 284500
},
{
"epoch": 14.83,
"learning_rate": 0.0014069912609238453,
"loss": 1.1727,
"step": 285000
},
{
"epoch": 14.85,
"learning_rate": 0.0014059508947149397,
"loss": 1.1744,
"step": 285500
},
{
"epoch": 14.88,
"learning_rate": 0.0014049105285060342,
"loss": 1.1709,
"step": 286000
},
{
"epoch": 14.9,
"learning_rate": 0.0014038701622971286,
"loss": 1.1727,
"step": 286500
},
{
"epoch": 14.93,
"learning_rate": 0.001402829796088223,
"loss": 1.1733,
"step": 287000
},
{
"epoch": 14.96,
"learning_rate": 0.0014017894298793175,
"loss": 1.1736,
"step": 287500
},
{
"epoch": 14.98,
"learning_rate": 0.001400749063670412,
"loss": 1.1734,
"step": 288000
},
{
"epoch": 15.0,
"eval_loss": 1.1976137161254883,
"eval_runtime": 0.6131,
"eval_samples_per_second": 1631.003,
"eval_steps_per_second": 3.262,
"step": 288360
},
{
"epoch": 15.01,
"learning_rate": 0.0013997086974615065,
"loss": 1.1707,
"step": 288500
},
{
"epoch": 15.03,
"learning_rate": 0.001398668331252601,
"loss": 1.1673,
"step": 289000
},
{
"epoch": 15.06,
"learning_rate": 0.0013976279650436954,
"loss": 1.1696,
"step": 289500
},
{
"epoch": 15.09,
"learning_rate": 0.0013965875988347898,
"loss": 1.169,
"step": 290000
},
{
"epoch": 15.11,
"learning_rate": 0.0013955472326258843,
"loss": 1.1689,
"step": 290500
},
{
"epoch": 15.14,
"learning_rate": 0.001394506866416979,
"loss": 1.1702,
"step": 291000
},
{
"epoch": 15.16,
"learning_rate": 0.0013934665002080732,
"loss": 1.1687,
"step": 291500
},
{
"epoch": 15.19,
"learning_rate": 0.0013924261339991677,
"loss": 1.1688,
"step": 292000
},
{
"epoch": 15.22,
"learning_rate": 0.0013913857677902621,
"loss": 1.1693,
"step": 292500
},
{
"epoch": 15.24,
"learning_rate": 0.0013903454015813566,
"loss": 1.1703,
"step": 293000
},
{
"epoch": 15.27,
"learning_rate": 0.0013893050353724512,
"loss": 1.1719,
"step": 293500
},
{
"epoch": 15.29,
"learning_rate": 0.0013882646691635457,
"loss": 1.1701,
"step": 294000
},
{
"epoch": 15.32,
"learning_rate": 0.0013872243029546402,
"loss": 1.1707,
"step": 294500
},
{
"epoch": 15.35,
"learning_rate": 0.0013861839367457346,
"loss": 1.1708,
"step": 295000
},
{
"epoch": 15.37,
"learning_rate": 0.0013851435705368289,
"loss": 1.1716,
"step": 295500
},
{
"epoch": 15.4,
"learning_rate": 0.0013841032043279233,
"loss": 1.1716,
"step": 296000
},
{
"epoch": 15.42,
"learning_rate": 0.001383062838119018,
"loss": 1.1707,
"step": 296500
},
{
"epoch": 15.45,
"learning_rate": 0.0013820224719101124,
"loss": 1.1708,
"step": 297000
},
{
"epoch": 15.48,
"learning_rate": 0.001380982105701207,
"loss": 1.1691,
"step": 297500
},
{
"epoch": 15.5,
"learning_rate": 0.0013799417394923014,
"loss": 1.1725,
"step": 298000
},
{
"epoch": 15.53,
"learning_rate": 0.0013789013732833958,
"loss": 1.1697,
"step": 298500
},
{
"epoch": 15.55,
"learning_rate": 0.0013778610070744903,
"loss": 1.1715,
"step": 299000
},
{
"epoch": 15.58,
"learning_rate": 0.0013768206408655847,
"loss": 1.1713,
"step": 299500
},
{
"epoch": 15.61,
"learning_rate": 0.0013757802746566792,
"loss": 1.1708,
"step": 300000
},
{
"epoch": 15.63,
"learning_rate": 0.0013747399084477736,
"loss": 1.1705,
"step": 300500
},
{
"epoch": 15.66,
"learning_rate": 0.001373699542238868,
"loss": 1.1712,
"step": 301000
},
{
"epoch": 15.68,
"learning_rate": 0.0013726591760299626,
"loss": 1.1712,
"step": 301500
},
{
"epoch": 15.71,
"learning_rate": 0.0013716188098210572,
"loss": 1.1715,
"step": 302000
},
{
"epoch": 15.74,
"learning_rate": 0.0013705784436121515,
"loss": 1.1709,
"step": 302500
},
{
"epoch": 15.76,
"learning_rate": 0.001369538077403246,
"loss": 1.1722,
"step": 303000
},
{
"epoch": 15.79,
"learning_rate": 0.0013684977111943404,
"loss": 1.1718,
"step": 303500
},
{
"epoch": 15.81,
"learning_rate": 0.0013674573449854348,
"loss": 1.1708,
"step": 304000
},
{
"epoch": 15.84,
"learning_rate": 0.0013664169787765293,
"loss": 1.1723,
"step": 304500
},
{
"epoch": 15.87,
"learning_rate": 0.001365376612567624,
"loss": 1.1702,
"step": 305000
},
{
"epoch": 15.89,
"learning_rate": 0.0013643362463587184,
"loss": 1.1714,
"step": 305500
},
{
"epoch": 15.92,
"learning_rate": 0.0013632958801498127,
"loss": 1.1701,
"step": 306000
},
{
"epoch": 15.94,
"learning_rate": 0.0013622555139409071,
"loss": 1.1709,
"step": 306500
},
{
"epoch": 15.97,
"learning_rate": 0.0013612151477320016,
"loss": 1.1697,
"step": 307000
},
{
"epoch": 16.0,
"learning_rate": 0.0013601747815230963,
"loss": 1.1709,
"step": 307500
},
{
"epoch": 16.0,
"eval_loss": 1.1968414783477783,
"eval_runtime": 0.6109,
"eval_samples_per_second": 1636.861,
"eval_steps_per_second": 3.274,
"step": 307584
}
],
"logging_steps": 500,
"max_steps": 961200,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 1.2378919133169423e+18,
"train_batch_size": 512,
"trial_name": null,
"trial_params": null
}