|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.10634328358208955, |
|
"eval_steps": 38, |
|
"global_step": 114, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009328358208955224, |
|
"grad_norm": 2.931856870651245, |
|
"learning_rate": 4e-05, |
|
"loss": 3.1183, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009328358208955224, |
|
"eval_loss": 0.7989240884780884, |
|
"eval_runtime": 61.068, |
|
"eval_samples_per_second": 7.402, |
|
"eval_steps_per_second": 3.701, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018656716417910447, |
|
"grad_norm": 3.0857083797454834, |
|
"learning_rate": 8e-05, |
|
"loss": 3.3967, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002798507462686567, |
|
"grad_norm": 2.9495201110839844, |
|
"learning_rate": 0.00012, |
|
"loss": 3.4361, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0037313432835820895, |
|
"grad_norm": 2.4459776878356934, |
|
"learning_rate": 0.00016, |
|
"loss": 3.2275, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0046641791044776115, |
|
"grad_norm": 2.410581588745117, |
|
"learning_rate": 0.0002, |
|
"loss": 3.0069, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005597014925373134, |
|
"grad_norm": 1.99094820022583, |
|
"learning_rate": 0.00024, |
|
"loss": 2.6997, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0065298507462686565, |
|
"grad_norm": 1.850408911705017, |
|
"learning_rate": 0.00028, |
|
"loss": 2.6469, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007462686567164179, |
|
"grad_norm": 2.972104072570801, |
|
"learning_rate": 0.00032, |
|
"loss": 3.351, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008395522388059701, |
|
"grad_norm": 2.8936173915863037, |
|
"learning_rate": 0.00036, |
|
"loss": 2.8041, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009328358208955223, |
|
"grad_norm": 2.354464530944824, |
|
"learning_rate": 0.0004, |
|
"loss": 2.7652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010261194029850746, |
|
"grad_norm": 2.024070978164673, |
|
"learning_rate": 0.0003999496469885013, |
|
"loss": 2.8786, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011194029850746268, |
|
"grad_norm": 1.8775830268859863, |
|
"learning_rate": 0.00039979861330826294, |
|
"loss": 2.0356, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012126865671641791, |
|
"grad_norm": 1.7130846977233887, |
|
"learning_rate": 0.0003995469750092912, |
|
"loss": 2.6772, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013059701492537313, |
|
"grad_norm": 1.9945520162582397, |
|
"learning_rate": 0.00039919485879904784, |
|
"loss": 2.8959, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013992537313432836, |
|
"grad_norm": 1.6973563432693481, |
|
"learning_rate": 0.00039874244197864856, |
|
"loss": 2.3531, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 1.5936486721038818, |
|
"learning_rate": 0.00039818995235358696, |
|
"loss": 1.9481, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01585820895522388, |
|
"grad_norm": 1.828995943069458, |
|
"learning_rate": 0.00039753766811902755, |
|
"loss": 3.6787, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.016791044776119403, |
|
"grad_norm": 1.553421139717102, |
|
"learning_rate": 0.0003967859177197259, |
|
"loss": 2.2696, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.017723880597014924, |
|
"grad_norm": 1.2893372774124146, |
|
"learning_rate": 0.00039593507968464716, |
|
"loss": 1.9452, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.018656716417910446, |
|
"grad_norm": 1.674631953239441, |
|
"learning_rate": 0.0003949855824363647, |
|
"loss": 2.2916, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01958955223880597, |
|
"grad_norm": 1.4569449424743652, |
|
"learning_rate": 0.0003939379040753374, |
|
"loss": 2.1701, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.020522388059701493, |
|
"grad_norm": 1.438828468322754, |
|
"learning_rate": 0.00039279257213917066, |
|
"loss": 2.5621, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.021455223880597014, |
|
"grad_norm": 1.3770123720169067, |
|
"learning_rate": 0.0003915501633369861, |
|
"loss": 2.1366, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.022388059701492536, |
|
"grad_norm": 1.4712828397750854, |
|
"learning_rate": 0.00039021130325903074, |
|
"loss": 2.319, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02332089552238806, |
|
"grad_norm": 1.465248465538025, |
|
"learning_rate": 0.00038877666606167355, |
|
"loss": 2.3959, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.024253731343283583, |
|
"grad_norm": 1.4384740591049194, |
|
"learning_rate": 0.00038724697412794747, |
|
"loss": 2.097, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.025186567164179104, |
|
"grad_norm": 1.3539812564849854, |
|
"learning_rate": 0.0003856229977038078, |
|
"loss": 2.2313, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.026119402985074626, |
|
"grad_norm": 1.3618801832199097, |
|
"learning_rate": 0.0003839055545102902, |
|
"loss": 2.1054, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.027052238805970148, |
|
"grad_norm": 1.422633409500122, |
|
"learning_rate": 0.00038209550933176323, |
|
"loss": 2.276, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.027985074626865673, |
|
"grad_norm": 1.4232622385025024, |
|
"learning_rate": 0.0003801937735804838, |
|
"loss": 2.1735, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.028917910447761194, |
|
"grad_norm": 1.4555679559707642, |
|
"learning_rate": 0.0003782013048376736, |
|
"loss": 2.2706, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 1.2929563522338867, |
|
"learning_rate": 0.0003761191063713476, |
|
"loss": 1.9037, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.030783582089552237, |
|
"grad_norm": 1.2687627077102661, |
|
"learning_rate": 0.0003739482266311391, |
|
"loss": 2.1032, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03171641791044776, |
|
"grad_norm": 1.2993357181549072, |
|
"learning_rate": 0.00037168975872037323, |
|
"loss": 2.0062, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03264925373134328, |
|
"grad_norm": 1.3507018089294434, |
|
"learning_rate": 0.00036934483984565685, |
|
"loss": 2.1522, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.033582089552238806, |
|
"grad_norm": 1.4183921813964844, |
|
"learning_rate": 0.00036691465074426054, |
|
"loss": 1.845, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03451492537313433, |
|
"grad_norm": 1.3370906114578247, |
|
"learning_rate": 0.00036440041508958203, |
|
"loss": 1.9448, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03544776119402985, |
|
"grad_norm": 1.4249347448349, |
|
"learning_rate": 0.0003618033988749895, |
|
"loss": 2.4594, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03544776119402985, |
|
"eval_loss": 0.5035107135772705, |
|
"eval_runtime": 60.2427, |
|
"eval_samples_per_second": 7.503, |
|
"eval_steps_per_second": 3.751, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.036380597014925374, |
|
"grad_norm": 1.4865802526474, |
|
"learning_rate": 0.00035912490977635625, |
|
"loss": 2.2656, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 1.3488837480545044, |
|
"learning_rate": 0.000356366296493606, |
|
"loss": 1.9688, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03824626865671642, |
|
"grad_norm": 1.4332904815673828, |
|
"learning_rate": 0.0003535289480716022, |
|
"loss": 2.2615, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03917910447761194, |
|
"grad_norm": 1.516402006149292, |
|
"learning_rate": 0.00035061429320072223, |
|
"loss": 2.059, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04011194029850746, |
|
"grad_norm": 1.4039419889450073, |
|
"learning_rate": 0.00034762379949746815, |
|
"loss": 2.1295, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.041044776119402986, |
|
"grad_norm": 1.2742645740509033, |
|
"learning_rate": 0.0003445589727654783, |
|
"loss": 2.1964, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04197761194029851, |
|
"grad_norm": 1.115033507347107, |
|
"learning_rate": 0.0003414213562373095, |
|
"loss": 1.8306, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04291044776119403, |
|
"grad_norm": 1.1708894968032837, |
|
"learning_rate": 0.00033821252979737297, |
|
"loss": 1.9652, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.043843283582089554, |
|
"grad_norm": 1.185393214225769, |
|
"learning_rate": 0.0003349341091864149, |
|
"loss": 1.8172, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04477611940298507, |
|
"grad_norm": 1.1099275350570679, |
|
"learning_rate": 0.00033158774518794254, |
|
"loss": 1.7576, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0457089552238806, |
|
"grad_norm": 1.0703908205032349, |
|
"learning_rate": 0.0003281751227970048, |
|
"loss": 1.9036, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04664179104477612, |
|
"grad_norm": 1.1421232223510742, |
|
"learning_rate": 0.00032469796037174674, |
|
"loss": 1.9759, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04757462686567164, |
|
"grad_norm": 1.1319009065628052, |
|
"learning_rate": 0.000321158008768164, |
|
"loss": 1.7476, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.048507462686567165, |
|
"grad_norm": 1.1899311542510986, |
|
"learning_rate": 0.00031755705045849464, |
|
"loss": 1.8308, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.049440298507462684, |
|
"grad_norm": 1.246800184249878, |
|
"learning_rate": 0.0003138968986336904, |
|
"loss": 1.9183, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05037313432835821, |
|
"grad_norm": 1.207138180732727, |
|
"learning_rate": 0.0003101793962904205, |
|
"loss": 1.8768, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.051305970149253734, |
|
"grad_norm": 0.9686072468757629, |
|
"learning_rate": 0.00030640641530306733, |
|
"loss": 1.6107, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05223880597014925, |
|
"grad_norm": 1.1268424987792969, |
|
"learning_rate": 0.00030257985548118126, |
|
"loss": 1.9169, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05317164179104478, |
|
"grad_norm": 1.3243463039398193, |
|
"learning_rate": 0.0002987016436128694, |
|
"loss": 2.1651, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.054104477611940295, |
|
"grad_norm": 1.230141043663025, |
|
"learning_rate": 0.0002947737324945997, |
|
"loss": 2.0871, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05503731343283582, |
|
"grad_norm": 1.0468418598175049, |
|
"learning_rate": 0.00029079809994790937, |
|
"loss": 1.7348, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.055970149253731345, |
|
"grad_norm": 1.3394778966903687, |
|
"learning_rate": 0.00028677674782351165, |
|
"loss": 2.3597, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05690298507462686, |
|
"grad_norm": 1.2126855850219727, |
|
"learning_rate": 0.00028271170099330415, |
|
"loss": 2.0063, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05783582089552239, |
|
"grad_norm": 1.2102502584457397, |
|
"learning_rate": 0.00027860500633078477, |
|
"loss": 2.0048, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.058768656716417914, |
|
"grad_norm": 1.2987444400787354, |
|
"learning_rate": 0.00027445873168038907, |
|
"loss": 2.6436, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 1.1055057048797607, |
|
"learning_rate": 0.0002702749648162686, |
|
"loss": 1.8015, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06063432835820896, |
|
"grad_norm": 1.1894230842590332, |
|
"learning_rate": 0.00026605581239103347, |
|
"loss": 2.0075, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.061567164179104475, |
|
"grad_norm": 0.9544552564620972, |
|
"learning_rate": 0.00026180339887498953, |
|
"loss": 1.5043, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 1.2134445905685425, |
|
"learning_rate": 0.00025751986548640346, |
|
"loss": 1.9537, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06343283582089553, |
|
"grad_norm": 1.061187505722046, |
|
"learning_rate": 0.00025320736911333503, |
|
"loss": 1.6231, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06436567164179105, |
|
"grad_norm": 1.1641594171524048, |
|
"learning_rate": 0.0002488680812275788, |
|
"loss": 1.6016, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06529850746268656, |
|
"grad_norm": 1.1650375127792358, |
|
"learning_rate": 0.0002445041867912629, |
|
"loss": 1.8811, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06623134328358209, |
|
"grad_norm": 1.319548487663269, |
|
"learning_rate": 0.00024011788315665458, |
|
"loss": 1.7969, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06716417910447761, |
|
"grad_norm": 1.009516954421997, |
|
"learning_rate": 0.00023571137895972733, |
|
"loss": 1.4261, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06809701492537314, |
|
"grad_norm": 1.1219674348831177, |
|
"learning_rate": 0.0002312868930080462, |
|
"loss": 1.5247, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06902985074626866, |
|
"grad_norm": 1.0498907566070557, |
|
"learning_rate": 0.0002268466531635311, |
|
"loss": 1.7131, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06996268656716417, |
|
"grad_norm": 1.0986140966415405, |
|
"learning_rate": 0.00022239289522066157, |
|
"loss": 1.7584, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0708955223880597, |
|
"grad_norm": 1.2214365005493164, |
|
"learning_rate": 0.00021792786178068672, |
|
"loss": 1.7782, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0708955223880597, |
|
"eval_loss": 0.47117629647254944, |
|
"eval_runtime": 60.2201, |
|
"eval_samples_per_second": 7.506, |
|
"eval_steps_per_second": 3.753, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07182835820895522, |
|
"grad_norm": 1.2228177785873413, |
|
"learning_rate": 0.00021345380112240797, |
|
"loss": 2.1149, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07276119402985075, |
|
"grad_norm": 1.0541841983795166, |
|
"learning_rate": 0.00020897296607010301, |
|
"loss": 1.6306, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07369402985074627, |
|
"grad_norm": 1.1272865533828735, |
|
"learning_rate": 0.00020448761285916104, |
|
"loss": 1.9262, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 1.1912484169006348, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0926, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07555970149253731, |
|
"grad_norm": 1.2342007160186768, |
|
"learning_rate": 0.00019551238714083903, |
|
"loss": 2.3557, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07649253731343283, |
|
"grad_norm": 1.068264365196228, |
|
"learning_rate": 0.00019102703392989709, |
|
"loss": 1.9768, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07742537313432836, |
|
"grad_norm": 1.035170555114746, |
|
"learning_rate": 0.00018654619887759207, |
|
"loss": 1.6881, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07835820895522388, |
|
"grad_norm": 1.2881474494934082, |
|
"learning_rate": 0.00018207213821931333, |
|
"loss": 2.1289, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07929104477611941, |
|
"grad_norm": 1.1358542442321777, |
|
"learning_rate": 0.00017760710477933845, |
|
"loss": 2.2829, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08022388059701492, |
|
"grad_norm": 1.2520966529846191, |
|
"learning_rate": 0.00017315334683646897, |
|
"loss": 1.552, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08115671641791045, |
|
"grad_norm": 0.9834119081497192, |
|
"learning_rate": 0.00016871310699195379, |
|
"loss": 1.5413, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08208955223880597, |
|
"grad_norm": 1.043672800064087, |
|
"learning_rate": 0.00016428862104027268, |
|
"loss": 1.7337, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0830223880597015, |
|
"grad_norm": 1.0788074731826782, |
|
"learning_rate": 0.00015988211684334546, |
|
"loss": 1.3079, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08395522388059702, |
|
"grad_norm": 1.0924961566925049, |
|
"learning_rate": 0.00015549581320873715, |
|
"loss": 1.7402, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08488805970149253, |
|
"grad_norm": 1.1585627794265747, |
|
"learning_rate": 0.00015113191877242117, |
|
"loss": 1.7314, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08582089552238806, |
|
"grad_norm": 1.0373111963272095, |
|
"learning_rate": 0.00014679263088666499, |
|
"loss": 1.7103, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08675373134328358, |
|
"grad_norm": 1.178009271621704, |
|
"learning_rate": 0.00014248013451359656, |
|
"loss": 1.8889, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08768656716417911, |
|
"grad_norm": 1.110259771347046, |
|
"learning_rate": 0.00013819660112501054, |
|
"loss": 1.685, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08861940298507463, |
|
"grad_norm": 1.0290873050689697, |
|
"learning_rate": 0.00013394418760896666, |
|
"loss": 1.6533, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 1.0740941762924194, |
|
"learning_rate": 0.00012972503518373144, |
|
"loss": 1.8436, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09048507462686567, |
|
"grad_norm": 1.1851738691329956, |
|
"learning_rate": 0.00012554126831961098, |
|
"loss": 1.8081, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0914179104477612, |
|
"grad_norm": 1.0965279340744019, |
|
"learning_rate": 0.0001213949936692153, |
|
"loss": 1.8133, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09235074626865672, |
|
"grad_norm": 1.0990545749664307, |
|
"learning_rate": 0.00011728829900669591, |
|
"loss": 1.7193, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09328358208955224, |
|
"grad_norm": 1.0958396196365356, |
|
"learning_rate": 0.00011322325217648839, |
|
"loss": 1.3802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09421641791044776, |
|
"grad_norm": 1.0536209344863892, |
|
"learning_rate": 0.00010920190005209065, |
|
"loss": 1.8329, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09514925373134328, |
|
"grad_norm": 1.0651001930236816, |
|
"learning_rate": 0.00010522626750540028, |
|
"loss": 1.6494, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0960820895522388, |
|
"grad_norm": 1.1998820304870605, |
|
"learning_rate": 0.00010129835638713063, |
|
"loss": 2.0276, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09701492537313433, |
|
"grad_norm": 0.9894289374351501, |
|
"learning_rate": 9.74201445188188e-05, |
|
"loss": 1.5171, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09794776119402986, |
|
"grad_norm": 1.2795695066452026, |
|
"learning_rate": 9.359358469693271e-05, |
|
"loss": 1.7987, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09888059701492537, |
|
"grad_norm": 1.0911823511123657, |
|
"learning_rate": 8.982060370957952e-05, |
|
"loss": 1.6949, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09981343283582089, |
|
"grad_norm": 0.9669733047485352, |
|
"learning_rate": 8.610310136630962e-05, |
|
"loss": 1.405, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.10074626865671642, |
|
"grad_norm": 1.1521021127700806, |
|
"learning_rate": 8.24429495415054e-05, |
|
"loss": 1.8475, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10167910447761194, |
|
"grad_norm": 1.085671305656433, |
|
"learning_rate": 7.884199123183605e-05, |
|
"loss": 1.5308, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10261194029850747, |
|
"grad_norm": 1.141276240348816, |
|
"learning_rate": 7.530203962825331e-05, |
|
"loss": 1.8675, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10354477611940298, |
|
"grad_norm": 1.0789172649383545, |
|
"learning_rate": 7.182487720299517e-05, |
|
"loss": 1.7724, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1044776119402985, |
|
"grad_norm": 1.0939713716506958, |
|
"learning_rate": 6.841225481205749e-05, |
|
"loss": 1.5574, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.10541044776119403, |
|
"grad_norm": 1.2875022888183594, |
|
"learning_rate": 6.506589081358514e-05, |
|
"loss": 1.7821, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10634328358208955, |
|
"grad_norm": 1.1060545444488525, |
|
"learning_rate": 6.178747020262707e-05, |
|
"loss": 1.543, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10634328358208955, |
|
"eval_loss": 0.44875675439834595, |
|
"eval_runtime": 61.3854, |
|
"eval_samples_per_second": 7.363, |
|
"eval_steps_per_second": 3.682, |
|
"step": 114 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 38, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.494599973470208e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|