{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 18720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.516245487364621, "eval_loss": 0.37735894322395325, "eval_runtime": 4.3147, "eval_samples_per_second": 64.199, "eval_steps_per_second": 8.112, "step": 312 }, { "best_epoch": 0, "best_eval_accuracy": 0.516245487364621, "epoch": 1.0, "step": 312 }, { "epoch": 1.6, "learning_rate": 0.003893162393162393, "loss": 0.5343, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.5018050541516246, "eval_loss": 0.35060495138168335, "eval_runtime": 4.3354, "eval_samples_per_second": 63.893, "eval_steps_per_second": 8.073, "step": 624 }, { "best_epoch": 0, "best_eval_accuracy": 0.516245487364621, "epoch": 2.0, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.45752400159835815, "eval_runtime": 4.3375, "eval_samples_per_second": 63.862, "eval_steps_per_second": 8.069, "step": 936 }, { "best_epoch": 0, "best_eval_accuracy": 0.516245487364621, "epoch": 3.0, "step": 936 }, { "epoch": 3.21, "learning_rate": 0.0037863247863247863, "loss": 0.4659, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5306859205776173, "eval_loss": 0.3758547008037567, "eval_runtime": 4.3393, "eval_samples_per_second": 63.836, "eval_steps_per_second": 8.066, "step": 1248 }, { "best_epoch": 3, "best_eval_accuracy": 0.5306859205776173, "epoch": 4.0, "step": 1248 }, { "epoch": 4.81, "learning_rate": 0.0036794871794871794, "loss": 0.4691, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.5812274368231047, "eval_loss": 0.3500436246395111, "eval_runtime": 4.3557, "eval_samples_per_second": 63.595, "eval_steps_per_second": 8.035, "step": 1560 }, { "best_epoch": 4, "best_eval_accuracy": 0.5812274368231047, "epoch": 5.0, "step": 1560 }, { "epoch": 6.0, "eval_accuracy": 0.5992779783393501, "eval_loss": 0.345672607421875, "eval_runtime": 4.342, "eval_samples_per_second": 63.796, "eval_steps_per_second": 8.061, "step": 1872 }, { "best_epoch": 5, "best_eval_accuracy": 0.5992779783393501, "epoch": 6.0, "step": 1872 }, { "epoch": 6.41, "learning_rate": 0.003572649572649573, "loss": 0.4442, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.6101083032490975, "eval_loss": 0.34997570514678955, "eval_runtime": 4.338, "eval_samples_per_second": 63.854, "eval_steps_per_second": 8.068, "step": 2184 }, { "best_epoch": 6, "best_eval_accuracy": 0.6101083032490975, "epoch": 7.0, "step": 2184 }, { "epoch": 8.0, "eval_accuracy": 0.6173285198555957, "eval_loss": 0.3403359055519104, "eval_runtime": 4.3408, "eval_samples_per_second": 63.814, "eval_steps_per_second": 8.063, "step": 2496 }, { "best_epoch": 7, "best_eval_accuracy": 0.6173285198555957, "epoch": 8.0, "step": 2496 }, { "epoch": 8.01, "learning_rate": 0.003465811965811966, "loss": 0.4366, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.5776173285198556, "eval_loss": 0.38398295640945435, "eval_runtime": 4.3382, "eval_samples_per_second": 63.852, "eval_steps_per_second": 8.068, "step": 2808 }, { "best_epoch": 7, "best_eval_accuracy": 0.6173285198555957, "epoch": 9.0, "step": 2808 }, { "epoch": 9.62, "learning_rate": 0.003358974358974359, "loss": 0.4097, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.5487364620938628, "eval_loss": 0.43909069895744324, "eval_runtime": 4.3443, "eval_samples_per_second": 63.762, "eval_steps_per_second": 8.057, "step": 3120 }, { "best_epoch": 7, "best_eval_accuracy": 0.6173285198555957, "epoch": 10.0, "step": 3120 }, { "epoch": 11.0, "eval_accuracy": 0.6028880866425993, "eval_loss": 0.35843461751937866, "eval_runtime": 4.3422, "eval_samples_per_second": 63.793, "eval_steps_per_second": 8.06, "step": 3432 }, { "best_epoch": 7, "best_eval_accuracy": 0.6173285198555957, "epoch": 11.0, "step": 3432 }, { "epoch": 11.22, "learning_rate": 0.0032521367521367523, "loss": 0.3922, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.6498194945848376, "eval_loss": 0.33557796478271484, "eval_runtime": 4.3425, "eval_samples_per_second": 63.788, "eval_steps_per_second": 8.06, "step": 3744 }, { "best_epoch": 11, "best_eval_accuracy": 0.6498194945848376, "epoch": 12.0, "step": 3744 }, { "epoch": 12.82, "learning_rate": 0.0031452991452991454, "loss": 0.3564, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.32750093936920166, "eval_runtime": 4.3335, "eval_samples_per_second": 63.921, "eval_steps_per_second": 8.077, "step": 4056 }, { "best_epoch": 12, "best_eval_accuracy": 0.6931407942238267, "epoch": 13.0, "step": 4056 }, { "epoch": 14.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.32828277349472046, "eval_runtime": 4.3334, "eval_samples_per_second": 63.922, "eval_steps_per_second": 8.077, "step": 4368 }, { "best_epoch": 13, "best_eval_accuracy": 0.7075812274368231, "epoch": 14.0, "step": 4368 }, { "epoch": 14.42, "learning_rate": 0.0030384615384615385, "loss": 0.3343, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.3377060890197754, "eval_runtime": 4.3243, "eval_samples_per_second": 64.056, "eval_steps_per_second": 8.094, "step": 4680 }, { "best_epoch": 13, "best_eval_accuracy": 0.7075812274368231, "epoch": 15.0, "step": 4680 }, { "epoch": 16.0, "eval_accuracy": 0.6389891696750902, "eval_loss": 0.35497620701789856, "eval_runtime": 4.328, "eval_samples_per_second": 64.002, "eval_steps_per_second": 8.087, "step": 4992 }, { "best_epoch": 13, "best_eval_accuracy": 0.7075812274368231, "epoch": 16.0, "step": 4992 }, { "epoch": 16.03, "learning_rate": 0.0029316239316239316, "loss": 0.335, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.3370417058467865, "eval_runtime": 4.3279, "eval_samples_per_second": 64.003, "eval_steps_per_second": 8.087, "step": 5304 }, { "best_epoch": 13, "best_eval_accuracy": 0.7075812274368231, "epoch": 17.0, "step": 5304 }, { "epoch": 17.63, "learning_rate": 0.002824786324786325, "loss": 0.3233, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.32560163736343384, "eval_runtime": 4.3346, "eval_samples_per_second": 63.904, "eval_steps_per_second": 8.075, "step": 5616 }, { "best_epoch": 13, "best_eval_accuracy": 0.7075812274368231, "epoch": 18.0, "step": 5616 }, { "epoch": 19.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.31737250089645386, "eval_runtime": 4.3417, "eval_samples_per_second": 63.8, "eval_steps_per_second": 8.061, "step": 5928 }, { "best_epoch": 18, "best_eval_accuracy": 0.7111913357400722, "epoch": 19.0, "step": 5928 }, { "epoch": 19.23, "learning_rate": 0.0027179487179487182, "loss": 0.3232, "step": 6000 }, { "epoch": 20.0, "eval_accuracy": 0.6642599277978339, "eval_loss": 0.34402281045913696, "eval_runtime": 4.3368, "eval_samples_per_second": 63.872, "eval_steps_per_second": 8.07, "step": 6240 }, { "best_epoch": 18, "best_eval_accuracy": 0.7111913357400722, "epoch": 20.0, "step": 6240 }, { "epoch": 20.83, "learning_rate": 0.0026111111111111114, "loss": 0.3102, "step": 6500 }, { "epoch": 21.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.3374693691730499, "eval_runtime": 4.3363, "eval_samples_per_second": 63.879, "eval_steps_per_second": 8.071, "step": 6552 }, { "best_epoch": 18, "best_eval_accuracy": 0.7111913357400722, "epoch": 21.0, "step": 6552 }, { "epoch": 22.0, "eval_accuracy": 0.6787003610108303, "eval_loss": 0.3433131277561188, "eval_runtime": 4.3244, "eval_samples_per_second": 64.055, "eval_steps_per_second": 8.094, "step": 6864 }, { "best_epoch": 18, "best_eval_accuracy": 0.7111913357400722, "epoch": 22.0, "step": 6864 }, { "epoch": 22.44, "learning_rate": 0.0025042735042735045, "loss": 0.3064, "step": 7000 }, { "epoch": 23.0, "eval_accuracy": 0.6714801444043321, "eval_loss": 0.36901581287384033, "eval_runtime": 4.3278, "eval_samples_per_second": 64.005, "eval_steps_per_second": 8.087, "step": 7176 }, { "best_epoch": 18, "best_eval_accuracy": 0.7111913357400722, "epoch": 23.0, "step": 7176 }, { "epoch": 24.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.3393624722957611, "eval_runtime": 4.332, "eval_samples_per_second": 63.943, "eval_steps_per_second": 8.079, "step": 7488 }, { "best_epoch": 18, "best_eval_accuracy": 0.7111913357400722, "epoch": 24.0, "step": 7488 }, { "epoch": 24.04, "learning_rate": 0.0023974358974358976, "loss": 0.3004, "step": 7500 }, { "epoch": 25.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3376837372779846, "eval_runtime": 4.3275, "eval_samples_per_second": 64.009, "eval_steps_per_second": 8.088, "step": 7800 }, { "best_epoch": 24, "best_eval_accuracy": 0.7256317689530686, "epoch": 25.0, "step": 7800 }, { "epoch": 25.64, "learning_rate": 0.0022905982905982907, "loss": 0.2962, "step": 8000 }, { "epoch": 26.0, "eval_accuracy": 0.6750902527075813, "eval_loss": 0.34352612495422363, "eval_runtime": 4.336, "eval_samples_per_second": 63.884, "eval_steps_per_second": 8.072, "step": 8112 }, { "best_epoch": 24, "best_eval_accuracy": 0.7256317689530686, "epoch": 26.0, "step": 8112 }, { "epoch": 27.0, "eval_accuracy": 0.7328519855595668, "eval_loss": 0.31822633743286133, "eval_runtime": 4.336, "eval_samples_per_second": 63.884, "eval_steps_per_second": 8.072, "step": 8424 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 27.0, "step": 8424 }, { "epoch": 27.24, "learning_rate": 0.002183760683760684, "loss": 0.2937, "step": 8500 }, { "epoch": 28.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.3305753767490387, "eval_runtime": 4.3337, "eval_samples_per_second": 63.918, "eval_steps_per_second": 8.076, "step": 8736 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 28.0, "step": 8736 }, { "epoch": 28.85, "learning_rate": 0.0020769230769230773, "loss": 0.2905, "step": 9000 }, { "epoch": 29.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3361673057079315, "eval_runtime": 4.3378, "eval_samples_per_second": 63.857, "eval_steps_per_second": 8.069, "step": 9048 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 29.0, "step": 9048 }, { "epoch": 30.0, "eval_accuracy": 0.6750902527075813, "eval_loss": 0.36750420928001404, "eval_runtime": 4.3383, "eval_samples_per_second": 63.85, "eval_steps_per_second": 8.068, "step": 9360 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 30.0, "step": 9360 }, { "epoch": 30.45, "learning_rate": 0.0019700854700854704, "loss": 0.2865, "step": 9500 }, { "epoch": 31.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.34060072898864746, "eval_runtime": 4.3406, "eval_samples_per_second": 63.816, "eval_steps_per_second": 8.063, "step": 9672 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 31.0, "step": 9672 }, { "epoch": 32.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.334277480840683, "eval_runtime": 4.3332, "eval_samples_per_second": 63.924, "eval_steps_per_second": 8.077, "step": 9984 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 32.0, "step": 9984 }, { "epoch": 32.05, "learning_rate": 0.0018632478632478633, "loss": 0.2812, "step": 10000 }, { "epoch": 33.0, "eval_accuracy": 0.6859205776173285, "eval_loss": 0.34722989797592163, "eval_runtime": 4.3283, "eval_samples_per_second": 63.998, "eval_steps_per_second": 8.086, "step": 10296 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 33.0, "step": 10296 }, { "epoch": 33.65, "learning_rate": 0.0017564102564102564, "loss": 0.2727, "step": 10500 }, { "epoch": 34.0, "eval_accuracy": 0.7292418772563177, "eval_loss": 0.337159126996994, "eval_runtime": 4.3326, "eval_samples_per_second": 63.933, "eval_steps_per_second": 8.078, "step": 10608 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 34.0, "step": 10608 }, { "epoch": 35.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.35748186707496643, "eval_runtime": 4.3343, "eval_samples_per_second": 63.909, "eval_steps_per_second": 8.075, "step": 10920 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 35.0, "step": 10920 }, { "epoch": 35.26, "learning_rate": 0.0016495726495726495, "loss": 0.2735, "step": 11000 }, { "epoch": 36.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3299775719642639, "eval_runtime": 4.3421, "eval_samples_per_second": 63.794, "eval_steps_per_second": 8.061, "step": 11232 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 36.0, "step": 11232 }, { "epoch": 36.86, "learning_rate": 0.0015427350427350429, "loss": 0.2701, "step": 11500 }, { "epoch": 37.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.3585418164730072, "eval_runtime": 4.3353, "eval_samples_per_second": 63.894, "eval_steps_per_second": 8.073, "step": 11544 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 37.0, "step": 11544 }, { "epoch": 38.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.3422161340713501, "eval_runtime": 4.3273, "eval_samples_per_second": 64.012, "eval_steps_per_second": 8.088, "step": 11856 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 38.0, "step": 11856 }, { "epoch": 38.46, "learning_rate": 0.001435897435897436, "loss": 0.2688, "step": 12000 }, { "epoch": 39.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.3579197824001312, "eval_runtime": 4.3267, "eval_samples_per_second": 64.021, "eval_steps_per_second": 8.089, "step": 12168 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 39.0, "step": 12168 }, { "epoch": 40.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.33261609077453613, "eval_runtime": 4.3229, "eval_samples_per_second": 64.078, "eval_steps_per_second": 8.097, "step": 12480 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 40.0, "step": 12480 }, { "epoch": 40.06, "learning_rate": 0.001329059829059829, "loss": 0.2644, "step": 12500 }, { "epoch": 41.0, "eval_accuracy": 0.7256317689530686, "eval_loss": 0.3463744819164276, "eval_runtime": 4.3279, "eval_samples_per_second": 64.003, "eval_steps_per_second": 8.087, "step": 12792 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 41.0, "step": 12792 }, { "epoch": 41.67, "learning_rate": 0.0012222222222222224, "loss": 0.2637, "step": 13000 }, { "epoch": 42.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.35785871744155884, "eval_runtime": 4.3221, "eval_samples_per_second": 64.089, "eval_steps_per_second": 8.098, "step": 13104 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 42.0, "step": 13104 }, { "epoch": 43.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3489207327365875, "eval_runtime": 4.3156, "eval_samples_per_second": 64.186, "eval_steps_per_second": 8.11, "step": 13416 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 43.0, "step": 13416 }, { "epoch": 43.27, "learning_rate": 0.0011153846153846155, "loss": 0.26, "step": 13500 }, { "epoch": 44.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.3438562750816345, "eval_runtime": 4.3177, "eval_samples_per_second": 64.154, "eval_steps_per_second": 8.106, "step": 13728 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 44.0, "step": 13728 }, { "epoch": 44.87, "learning_rate": 0.0010085470085470086, "loss": 0.2582, "step": 14000 }, { "epoch": 45.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.3585074543952942, "eval_runtime": 4.3123, "eval_samples_per_second": 64.235, "eval_steps_per_second": 8.116, "step": 14040 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 45.0, "step": 14040 }, { "epoch": 46.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.35347476601600647, "eval_runtime": 4.3136, "eval_samples_per_second": 64.215, "eval_steps_per_second": 8.114, "step": 14352 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 46.0, "step": 14352 }, { "epoch": 46.47, "learning_rate": 0.0009017094017094017, "loss": 0.2533, "step": 14500 }, { "epoch": 47.0, "eval_accuracy": 0.7148014440433214, "eval_loss": 0.34402725100517273, "eval_runtime": 4.315, "eval_samples_per_second": 64.194, "eval_steps_per_second": 8.111, "step": 14664 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 47.0, "step": 14664 }, { "epoch": 48.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3505990207195282, "eval_runtime": 4.3143, "eval_samples_per_second": 64.206, "eval_steps_per_second": 8.113, "step": 14976 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 48.0, "step": 14976 }, { "epoch": 48.08, "learning_rate": 0.0007948717948717948, "loss": 0.2535, "step": 15000 }, { "epoch": 49.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.35185110569000244, "eval_runtime": 4.3151, "eval_samples_per_second": 64.193, "eval_steps_per_second": 8.111, "step": 15288 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 49.0, "step": 15288 }, { "epoch": 49.68, "learning_rate": 0.0006880341880341881, "loss": 0.2498, "step": 15500 }, { "epoch": 50.0, "eval_accuracy": 0.6931407942238267, "eval_loss": 0.3456619083881378, "eval_runtime": 4.3131, "eval_samples_per_second": 64.223, "eval_steps_per_second": 8.115, "step": 15600 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 50.0, "step": 15600 }, { "epoch": 51.0, "eval_accuracy": 0.7111913357400722, "eval_loss": 0.34943750500679016, "eval_runtime": 4.314, "eval_samples_per_second": 64.21, "eval_steps_per_second": 8.113, "step": 15912 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 51.0, "step": 15912 }, { "epoch": 51.28, "learning_rate": 0.0005811965811965813, "loss": 0.2504, "step": 16000 }, { "epoch": 52.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.3430747985839844, "eval_runtime": 4.3163, "eval_samples_per_second": 64.176, "eval_steps_per_second": 8.109, "step": 16224 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 52.0, "step": 16224 }, { "epoch": 52.88, "learning_rate": 0.00047435897435897434, "loss": 0.2499, "step": 16500 }, { "epoch": 53.0, "eval_accuracy": 0.703971119133574, "eval_loss": 0.34503889083862305, "eval_runtime": 4.3129, "eval_samples_per_second": 64.226, "eval_steps_per_second": 8.115, "step": 16536 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 53.0, "step": 16536 }, { "epoch": 54.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.3484850227832794, "eval_runtime": 4.3131, "eval_samples_per_second": 64.223, "eval_steps_per_second": 8.115, "step": 16848 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 54.0, "step": 16848 }, { "epoch": 54.49, "learning_rate": 0.00036752136752136755, "loss": 0.2488, "step": 17000 }, { "epoch": 55.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.3436543345451355, "eval_runtime": 4.3132, "eval_samples_per_second": 64.222, "eval_steps_per_second": 8.115, "step": 17160 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 55.0, "step": 17160 }, { "epoch": 56.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.3464801013469696, "eval_runtime": 4.3155, "eval_samples_per_second": 64.187, "eval_steps_per_second": 8.11, "step": 17472 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 56.0, "step": 17472 }, { "epoch": 56.09, "learning_rate": 0.0002606837606837607, "loss": 0.2479, "step": 17500 }, { "epoch": 57.0, "eval_accuracy": 0.6895306859205776, "eval_loss": 0.34788957238197327, "eval_runtime": 4.3164, "eval_samples_per_second": 64.173, "eval_steps_per_second": 8.109, "step": 17784 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 57.0, "step": 17784 }, { "epoch": 57.69, "learning_rate": 0.00015384615384615385, "loss": 0.247, "step": 18000 }, { "epoch": 58.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.34465810656547546, "eval_runtime": 4.3158, "eval_samples_per_second": 64.183, "eval_steps_per_second": 8.11, "step": 18096 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 58.0, "step": 18096 }, { "epoch": 59.0, "eval_accuracy": 0.7003610108303249, "eval_loss": 0.3520893454551697, "eval_runtime": 4.3122, "eval_samples_per_second": 64.236, "eval_steps_per_second": 8.116, "step": 18408 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 59.0, "step": 18408 }, { "epoch": 59.29, "learning_rate": 4.700854700854701e-05, "loss": 0.2468, "step": 18500 }, { "epoch": 60.0, "eval_accuracy": 0.6967509025270758, "eval_loss": 0.3493305444717407, "eval_runtime": 4.3274, "eval_samples_per_second": 64.011, "eval_steps_per_second": 8.088, "step": 18720 }, { "best_epoch": 26, "best_eval_accuracy": 0.7328519855595668, "epoch": 60.0, "step": 18720 }, { "epoch": 60.0, "step": 18720, "total_flos": 6.96152728406016e+16, "train_loss": 0.3118535759102585, "train_runtime": 3964.0197, "train_samples_per_second": 37.689, "train_steps_per_second": 4.722 } ], "max_steps": 18720, "num_train_epochs": 60, "total_flos": 6.96152728406016e+16, "trial_name": null, "trial_params": null }