|
{ |
|
"best_metric": 1.2696720361709595, |
|
"best_model_checkpoint": "/home/datta0/models/lora_final/Meta-Llama-3-8B_magiccoder_default/checkpoint-152", |
|
"epoch": 0.9846153846153847, |
|
"eval_steps": 4, |
|
"global_step": 152, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006477732793522267, |
|
"grad_norm": 7.975759983062744, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3889, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012955465587044534, |
|
"grad_norm": 8.645442008972168, |
|
"learning_rate": 0.00015, |
|
"loss": 1.5051, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"grad_norm": 8.133455276489258, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2592, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"eval_loss": 1.4263103008270264, |
|
"eval_runtime": 26.5522, |
|
"eval_samples_per_second": 18.643, |
|
"eval_steps_per_second": 2.335, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.038866396761133605, |
|
"grad_norm": 15.554954528808594, |
|
"learning_rate": 0.00029986842451482874, |
|
"loss": 1.4659, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.051821862348178135, |
|
"grad_norm": 11.2620849609375, |
|
"learning_rate": 0.0002994739288874256, |
|
"loss": 1.4281, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.051821862348178135, |
|
"eval_loss": 1.4063184261322021, |
|
"eval_runtime": 26.5276, |
|
"eval_samples_per_second": 18.66, |
|
"eval_steps_per_second": 2.337, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06477732793522267, |
|
"grad_norm": 7.065455913543701, |
|
"learning_rate": 0.0002988172051971717, |
|
"loss": 1.3404, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"grad_norm": 9.748723030090332, |
|
"learning_rate": 0.0002978994055605757, |
|
"loss": 1.3795, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"eval_loss": 1.382373332977295, |
|
"eval_runtime": 26.5171, |
|
"eval_samples_per_second": 18.667, |
|
"eval_steps_per_second": 2.338, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09068825910931175, |
|
"grad_norm": 12.217620849609375, |
|
"learning_rate": 0.0002967221401100708, |
|
"loss": 1.4444, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10364372469635627, |
|
"grad_norm": 8.278894424438477, |
|
"learning_rate": 0.00029528747416929463, |
|
"loss": 1.3751, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10364372469635627, |
|
"eval_loss": 1.3937216997146606, |
|
"eval_runtime": 26.4559, |
|
"eval_samples_per_second": 18.71, |
|
"eval_steps_per_second": 2.344, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11659919028340081, |
|
"grad_norm": 14.924971580505371, |
|
"learning_rate": 0.00029359792462981004, |
|
"loss": 1.3393, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12955465587044535, |
|
"grad_norm": 7.198514461517334, |
|
"learning_rate": 0.00029165645553562214, |
|
"loss": 1.4053, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12955465587044535, |
|
"eval_loss": 1.3523073196411133, |
|
"eval_runtime": 26.4173, |
|
"eval_samples_per_second": 18.738, |
|
"eval_steps_per_second": 2.347, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14251012145748987, |
|
"grad_norm": 6.342569828033447, |
|
"learning_rate": 0.00028946647288323766, |
|
"loss": 1.3715, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"grad_norm": 5.877941131591797, |
|
"learning_rate": 0.0002870318186463901, |
|
"loss": 1.2927, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"eval_loss": 1.3473957777023315, |
|
"eval_runtime": 26.2462, |
|
"eval_samples_per_second": 18.86, |
|
"eval_steps_per_second": 2.362, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 4.236348628997803, |
|
"learning_rate": 0.0002843567640359119, |
|
"loss": 1.2866, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1813765182186235, |
|
"grad_norm": 8.370125770568848, |
|
"learning_rate": 0.0002814460020065795, |
|
"loss": 1.3619, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1813765182186235, |
|
"eval_loss": 1.3529084920883179, |
|
"eval_runtime": 58.5577, |
|
"eval_samples_per_second": 8.453, |
|
"eval_steps_per_second": 1.059, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19433198380566802, |
|
"grad_norm": 5.792184352874756, |
|
"learning_rate": 0.000278304639024076, |
|
"loss": 1.3633, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"grad_norm": 12.429760932922363, |
|
"learning_rate": 0.00027493818610651487, |
|
"loss": 1.3533, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"eval_loss": 1.3628934621810913, |
|
"eval_runtime": 58.1834, |
|
"eval_samples_per_second": 8.508, |
|
"eval_steps_per_second": 1.066, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2202429149797571, |
|
"grad_norm": 8.379545211791992, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 1.4074, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"grad_norm": 10.356365203857422, |
|
"learning_rate": 0.00026755401859887595, |
|
"loss": 1.3627, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"eval_loss": 1.3635836839675903, |
|
"eval_runtime": 58.1828, |
|
"eval_samples_per_second": 8.508, |
|
"eval_steps_per_second": 1.066, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 7.12795352935791, |
|
"learning_rate": 0.00026354925834776345, |
|
"loss": 1.2767, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2591093117408907, |
|
"grad_norm": 7.771018981933594, |
|
"learning_rate": 0.0002593452941132117, |
|
"loss": 1.4408, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2591093117408907, |
|
"eval_loss": 1.3530882596969604, |
|
"eval_runtime": 58.4466, |
|
"eval_samples_per_second": 8.469, |
|
"eval_steps_per_second": 1.061, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2720647773279352, |
|
"grad_norm": 4.957456588745117, |
|
"learning_rate": 0.0002549495010770048, |
|
"loss": 1.3767, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.28502024291497974, |
|
"grad_norm": 5.10299825668335, |
|
"learning_rate": 0.0002503695909538287, |
|
"loss": 1.3744, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28502024291497974, |
|
"eval_loss": 1.3395272493362427, |
|
"eval_runtime": 58.5125, |
|
"eval_samples_per_second": 8.46, |
|
"eval_steps_per_second": 1.06, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2979757085020243, |
|
"grad_norm": 5.048585891723633, |
|
"learning_rate": 0.0002456135984623034, |
|
"loss": 1.3295, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"grad_norm": 3.550816059112549, |
|
"learning_rate": 0.00024068986722935624, |
|
"loss": 1.2658, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"eval_loss": 1.3364324569702148, |
|
"eval_runtime": 58.5182, |
|
"eval_samples_per_second": 8.459, |
|
"eval_steps_per_second": 1.06, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.32388663967611336, |
|
"grad_norm": 9.427572250366211, |
|
"learning_rate": 0.00023560703515266478, |
|
"loss": 1.383, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 6.952998638153076, |
|
"learning_rate": 0.00023037401924684946, |
|
"loss": 1.3364, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"eval_loss": 1.340024709701538, |
|
"eval_runtime": 58.6238, |
|
"eval_samples_per_second": 8.444, |
|
"eval_steps_per_second": 1.058, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3497975708502024, |
|
"grad_norm": 3.620708465576172, |
|
"learning_rate": 0.000225, |
|
"loss": 1.299, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.362753036437247, |
|
"grad_norm": 5.74739408493042, |
|
"learning_rate": 0.00021949440526797926, |
|
"loss": 1.3765, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.362753036437247, |
|
"eval_loss": 1.3390815258026123, |
|
"eval_runtime": 58.1389, |
|
"eval_samples_per_second": 8.514, |
|
"eval_steps_per_second": 1.066, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3757085020242915, |
|
"grad_norm": 3.6690967082977295, |
|
"learning_rate": 0.00021386689373476087, |
|
"loss": 1.2833, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"grad_norm": 4.156944274902344, |
|
"learning_rate": 0.00020812733796781542, |
|
"loss": 1.3427, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"eval_loss": 1.3370171785354614, |
|
"eval_runtime": 58.6352, |
|
"eval_samples_per_second": 8.442, |
|
"eval_steps_per_second": 1.057, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.40161943319838056, |
|
"grad_norm": 4.592350959777832, |
|
"learning_rate": 0.00020228580709827227, |
|
"loss": 1.3145, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"grad_norm": 3.865915060043335, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 1.3975, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"eval_loss": 1.332935094833374, |
|
"eval_runtime": 58.8373, |
|
"eval_samples_per_second": 8.413, |
|
"eval_steps_per_second": 1.054, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.42753036437246966, |
|
"grad_norm": 3.824434757232666, |
|
"learning_rate": 0.00019033797309228983, |
|
"loss": 1.3048, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4404858299595142, |
|
"grad_norm": 4.484032154083252, |
|
"learning_rate": 0.00018425263051659836, |
|
"loss": 1.2595, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4404858299595142, |
|
"eval_loss": 1.3324816226959229, |
|
"eval_runtime": 26.5744, |
|
"eval_samples_per_second": 18.627, |
|
"eval_steps_per_second": 2.333, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4534412955465587, |
|
"grad_norm": 5.382876873016357, |
|
"learning_rate": 0.0001781071971878587, |
|
"loss": 1.3215, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"grad_norm": 5.109270095825195, |
|
"learning_rate": 0.00017191245428436173, |
|
"loss": 1.3291, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"eval_loss": 1.3312028646469116, |
|
"eval_runtime": 26.5491, |
|
"eval_samples_per_second": 18.645, |
|
"eval_steps_per_second": 2.335, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.47935222672064776, |
|
"grad_norm": 3.7885613441467285, |
|
"learning_rate": 0.000165679269490148, |
|
"loss": 1.2995, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"grad_norm": 4.146183967590332, |
|
"learning_rate": 0.000159418577929397, |
|
"loss": 1.2702, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"eval_loss": 1.3323029279708862, |
|
"eval_runtime": 26.5527, |
|
"eval_samples_per_second": 18.642, |
|
"eval_steps_per_second": 2.335, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 4.090492248535156, |
|
"learning_rate": 0.00015314136298250354, |
|
"loss": 1.3247, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5182186234817814, |
|
"grad_norm": 4.541670322418213, |
|
"learning_rate": 0.00014685863701749646, |
|
"loss": 1.3527, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5182186234817814, |
|
"eval_loss": 1.321340799331665, |
|
"eval_runtime": 26.4896, |
|
"eval_samples_per_second": 18.687, |
|
"eval_steps_per_second": 2.341, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5311740890688259, |
|
"grad_norm": 3.778151750564575, |
|
"learning_rate": 0.000140581422070603, |
|
"loss": 1.3419, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"grad_norm": 3.073720693588257, |
|
"learning_rate": 0.000134320730509852, |
|
"loss": 1.2799, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"eval_loss": 1.3154258728027344, |
|
"eval_runtime": 26.4775, |
|
"eval_samples_per_second": 18.695, |
|
"eval_steps_per_second": 2.342, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.557085020242915, |
|
"grad_norm": 3.3625338077545166, |
|
"learning_rate": 0.00012808754571563827, |
|
"loss": 1.2982, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5700404858299595, |
|
"grad_norm": 3.183030605316162, |
|
"learning_rate": 0.00012189280281214126, |
|
"loss": 1.3082, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5700404858299595, |
|
"eval_loss": 1.30991792678833, |
|
"eval_runtime": 26.3464, |
|
"eval_samples_per_second": 18.788, |
|
"eval_steps_per_second": 2.353, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.582995951417004, |
|
"grad_norm": 3.021327257156372, |
|
"learning_rate": 0.00011574736948340163, |
|
"loss": 1.2957, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5959514170040486, |
|
"grad_norm": 3.598100423812866, |
|
"learning_rate": 0.00010966202690771014, |
|
"loss": 1.4042, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5959514170040486, |
|
"eval_loss": 1.3089451789855957, |
|
"eval_runtime": 26.2013, |
|
"eval_samples_per_second": 18.892, |
|
"eval_steps_per_second": 2.366, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6089068825910932, |
|
"grad_norm": 3.076206922531128, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 1.2443, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"grad_norm": 3.216519832611084, |
|
"learning_rate": 9.771419290172773e-05, |
|
"loss": 1.2221, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"eval_loss": 1.3048464059829712, |
|
"eval_runtime": 58.5931, |
|
"eval_samples_per_second": 8.448, |
|
"eval_steps_per_second": 1.058, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6348178137651822, |
|
"grad_norm": 3.831418514251709, |
|
"learning_rate": 9.187266203218456e-05, |
|
"loss": 1.3533, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"grad_norm": 3.0627291202545166, |
|
"learning_rate": 8.613310626523909e-05, |
|
"loss": 1.3079, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"eval_loss": 1.3017206192016602, |
|
"eval_runtime": 58.4051, |
|
"eval_samples_per_second": 8.475, |
|
"eval_steps_per_second": 1.062, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6607287449392713, |
|
"grad_norm": 3.7238402366638184, |
|
"learning_rate": 8.050559473202077e-05, |
|
"loss": 1.3298, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"grad_norm": 3.6340792179107666, |
|
"learning_rate": 7.500000000000002e-05, |
|
"loss": 1.2165, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"eval_loss": 1.2970125675201416, |
|
"eval_runtime": 58.3679, |
|
"eval_samples_per_second": 8.481, |
|
"eval_steps_per_second": 1.062, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6866396761133603, |
|
"grad_norm": 3.414825677871704, |
|
"learning_rate": 6.962598075315046e-05, |
|
"loss": 1.3263, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"grad_norm": 3.9352622032165527, |
|
"learning_rate": 6.439296484733525e-05, |
|
"loss": 1.239, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"eval_loss": 1.2941410541534424, |
|
"eval_runtime": 58.4064, |
|
"eval_samples_per_second": 8.475, |
|
"eval_steps_per_second": 1.062, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7125506072874493, |
|
"grad_norm": 2.6827166080474854, |
|
"learning_rate": 5.931013277064377e-05, |
|
"loss": 1.2469, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.725506072874494, |
|
"grad_norm": 3.790194034576416, |
|
"learning_rate": 5.4386401537696536e-05, |
|
"loss": 1.2528, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.725506072874494, |
|
"eval_loss": 1.2877105474472046, |
|
"eval_runtime": 58.6006, |
|
"eval_samples_per_second": 8.447, |
|
"eval_steps_per_second": 1.058, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"grad_norm": 3.4227898120880127, |
|
"learning_rate": 4.963040904617131e-05, |
|
"loss": 1.2605, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.751417004048583, |
|
"grad_norm": 3.921224355697632, |
|
"learning_rate": 4.5050498922995166e-05, |
|
"loss": 1.2932, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.751417004048583, |
|
"eval_loss": 1.2858980894088745, |
|
"eval_runtime": 58.8103, |
|
"eval_samples_per_second": 8.417, |
|
"eval_steps_per_second": 1.054, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7643724696356275, |
|
"grad_norm": 3.2218077182769775, |
|
"learning_rate": 4.06547058867883e-05, |
|
"loss": 1.2063, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"grad_norm": 2.803480863571167, |
|
"learning_rate": 3.645074165223655e-05, |
|
"loss": 1.2762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"eval_loss": 1.2803733348846436, |
|
"eval_runtime": 58.4885, |
|
"eval_samples_per_second": 8.463, |
|
"eval_steps_per_second": 1.06, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7902834008097166, |
|
"grad_norm": 2.5244181156158447, |
|
"learning_rate": 3.2445981401124035e-05, |
|
"loss": 1.2421, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8032388663967611, |
|
"grad_norm": 3.004887819290161, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 1.2914, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8032388663967611, |
|
"eval_loss": 1.2790910005569458, |
|
"eval_runtime": 58.6109, |
|
"eval_samples_per_second": 8.446, |
|
"eval_steps_per_second": 1.058, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8161943319838056, |
|
"grad_norm": 3.6300973892211914, |
|
"learning_rate": 2.5061813893485085e-05, |
|
"loss": 1.3242, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"grad_norm": 2.6950979232788086, |
|
"learning_rate": 2.169536097592401e-05, |
|
"loss": 1.2835, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"eval_loss": 1.2755361795425415, |
|
"eval_runtime": 58.9713, |
|
"eval_samples_per_second": 8.394, |
|
"eval_steps_per_second": 1.051, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 3.0187506675720215, |
|
"learning_rate": 1.8553997993420495e-05, |
|
"loss": 1.2856, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"grad_norm": 2.8952746391296387, |
|
"learning_rate": 1.5643235964088064e-05, |
|
"loss": 1.2735, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"eval_loss": 1.2730661630630493, |
|
"eval_runtime": 58.4909, |
|
"eval_samples_per_second": 8.463, |
|
"eval_steps_per_second": 1.06, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8680161943319838, |
|
"grad_norm": 2.607783555984497, |
|
"learning_rate": 1.2968181353609852e-05, |
|
"loss": 1.265, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8809716599190284, |
|
"grad_norm": 3.7643980979919434, |
|
"learning_rate": 1.0533527116762296e-05, |
|
"loss": 1.2264, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8809716599190284, |
|
"eval_loss": 1.272207498550415, |
|
"eval_runtime": 26.5562, |
|
"eval_samples_per_second": 18.64, |
|
"eval_steps_per_second": 2.335, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8939271255060729, |
|
"grad_norm": 2.4194986820220947, |
|
"learning_rate": 8.343544464377849e-06, |
|
"loss": 1.2562, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9068825910931174, |
|
"grad_norm": 2.965425491333008, |
|
"learning_rate": 6.402075370189913e-06, |
|
"loss": 1.2637, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9068825910931174, |
|
"eval_loss": 1.271282434463501, |
|
"eval_runtime": 26.5644, |
|
"eval_samples_per_second": 18.634, |
|
"eval_steps_per_second": 2.334, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9198380566801619, |
|
"grad_norm": 2.7396891117095947, |
|
"learning_rate": 4.712525830705338e-06, |
|
"loss": 1.2302, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"grad_norm": 2.365996837615967, |
|
"learning_rate": 3.2778598899291465e-06, |
|
"loss": 1.2133, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"eval_loss": 1.2704302072525024, |
|
"eval_runtime": 26.54, |
|
"eval_samples_per_second": 18.651, |
|
"eval_steps_per_second": 2.336, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.945748987854251, |
|
"grad_norm": 2.8826277256011963, |
|
"learning_rate": 2.100594439424269e-06, |
|
"loss": 1.2561, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9587044534412955, |
|
"grad_norm": 2.938122034072876, |
|
"learning_rate": 1.1827948028283352e-06, |
|
"loss": 1.2379, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9587044534412955, |
|
"eval_loss": 1.2698720693588257, |
|
"eval_runtime": 26.5066, |
|
"eval_samples_per_second": 18.675, |
|
"eval_steps_per_second": 2.339, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.97165991902834, |
|
"grad_norm": 3.354945659637451, |
|
"learning_rate": 5.260711125743444e-07, |
|
"loss": 1.216, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9846153846153847, |
|
"grad_norm": 2.604530096054077, |
|
"learning_rate": 1.315754851712425e-07, |
|
"loss": 1.2131, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9846153846153847, |
|
"eval_loss": 1.2696720361709595, |
|
"eval_runtime": 26.4168, |
|
"eval_samples_per_second": 18.738, |
|
"eval_steps_per_second": 2.347, |
|
"step": 152 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 154, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.140047273385001e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|