{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 2.88e-05, "loss": 0.3202, "step": 100 }, { "epoch": 0.4, "eval_accuracy": 0.8813009961455675, "eval_f1": 0.16261325703385787, "eval_loss": 0.24328218400478363, "eval_precision": 0.2519394163280384, "eval_recall": 0.12004928709734201, "eval_runtime": 4.054, "eval_samples_per_second": 123.334, "eval_steps_per_second": 30.833, "step": 100 }, { "epoch": 0.8, "learning_rate": 2.7600000000000003e-05, "loss": 0.23, "step": 200 }, { "epoch": 0.8, "eval_accuracy": 0.9034639835811182, "eval_f1": 0.49651100375738055, "eval_loss": 0.21028906106948853, "eval_precision": 0.5048208113516464, "eval_recall": 0.48847033972892095, "eval_runtime": 4.2572, "eval_samples_per_second": 117.449, "eval_steps_per_second": 29.362, "step": 200 }, { "epoch": 1.2, "learning_rate": 2.64e-05, "loss": 0.2013, "step": 300 }, { "epoch": 1.2, "eval_accuracy": 0.8792611503228713, "eval_f1": 0.5627305035874741, "eval_loss": 0.2621181607246399, "eval_precision": 0.4545060658578856, "eval_recall": 0.7386023587396585, "eval_runtime": 4.1357, "eval_samples_per_second": 120.897, "eval_steps_per_second": 30.224, "step": 300 }, { "epoch": 1.6, "learning_rate": 2.52e-05, "loss": 0.1874, "step": 400 }, { "epoch": 1.6, "eval_accuracy": 0.893452470340892, "eval_f1": 0.5619149696320114, "eval_loss": 0.2326020449399948, "eval_precision": 0.47293721433726243, "eval_recall": 0.6921316669600422, "eval_runtime": 4.241, "eval_samples_per_second": 117.897, "eval_steps_per_second": 29.474, "step": 400 }, { "epoch": 2.0, "learning_rate": 2.4e-05, "loss": 0.1847, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9060920058066777, "eval_f1": 0.5619039721369932, "eval_loss": 0.20794397592544556, "eval_precision": 0.5312010034493572, "eval_recall": 0.5963738778384088, "eval_runtime": 4.124, "eval_samples_per_second": 121.24, "eval_steps_per_second": 30.31, "step": 500 }, { "epoch": 2.4, "learning_rate": 2.2800000000000002e-05, "loss": 0.1567, "step": 600 }, { "epoch": 2.4, "eval_accuracy": 0.9071932722631025, "eval_f1": 0.503008186211658, "eval_loss": 0.23015139997005463, "eval_precision": 0.5720053835800808, "eval_recall": 0.4488646365076571, "eval_runtime": 4.1201, "eval_samples_per_second": 121.355, "eval_steps_per_second": 30.339, "step": 600 }, { "epoch": 2.8, "learning_rate": 2.16e-05, "loss": 0.1484, "step": 700 }, { "epoch": 2.8, "eval_accuracy": 0.9038269009360765, "eval_f1": 0.5580642412882338, "eval_loss": 0.22997109591960907, "eval_precision": 0.540785997357992, "eval_recall": 0.5764830135539518, "eval_runtime": 4.134, "eval_samples_per_second": 120.948, "eval_steps_per_second": 30.237, "step": 700 }, { "epoch": 3.2, "learning_rate": 2.04e-05, "loss": 0.1388, "step": 800 }, { "epoch": 3.2, "eval_accuracy": 0.9075561896180607, "eval_f1": 0.5364304509572634, "eval_loss": 0.2365296632051468, "eval_precision": 0.5535580524344569, "eval_recall": 0.5203309276535821, "eval_runtime": 4.1587, "eval_samples_per_second": 120.229, "eval_steps_per_second": 30.057, "step": 800 }, { "epoch": 3.6, "learning_rate": 1.9200000000000003e-05, "loss": 0.1191, "step": 900 }, { "epoch": 3.6, "eval_accuracy": 0.9067302397757421, "eval_f1": 0.574726200505476, "eval_loss": 0.26086461544036865, "eval_precision": 0.5511391177896268, "eval_recall": 0.6004224608343601, "eval_runtime": 4.1544, "eval_samples_per_second": 120.354, "eval_steps_per_second": 30.088, "step": 900 }, { "epoch": 4.0, "learning_rate": 1.8e-05, "loss": 0.1193, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.9059543474996246, "eval_f1": 0.5809305373525557, "eval_loss": 0.25283825397491455, "eval_precision": 0.543281752719473, "eval_recall": 0.6241858827671185, "eval_runtime": 4.1474, "eval_samples_per_second": 120.557, "eval_steps_per_second": 30.139, "step": 1000 }, { "epoch": 4.4, "learning_rate": 1.6800000000000002e-05, "loss": 0.088, "step": 1100 }, { "epoch": 4.4, "eval_accuracy": 0.9037142714121239, "eval_f1": 0.5845009103142563, "eval_loss": 0.2839806079864502, "eval_precision": 0.5310701956271576, "eval_recall": 0.6498855835240275, "eval_runtime": 4.1556, "eval_samples_per_second": 120.318, "eval_steps_per_second": 30.08, "step": 1100 }, { "epoch": 4.8, "learning_rate": 1.56e-05, "loss": 0.0924, "step": 1200 }, { "epoch": 4.8, "eval_accuracy": 0.9085197977674325, "eval_f1": 0.5776627856834843, "eval_loss": 0.27629220485687256, "eval_precision": 0.5662833953331079, "eval_recall": 0.5895088892800563, "eval_runtime": 4.1675, "eval_samples_per_second": 119.975, "eval_steps_per_second": 29.994, "step": 1200 }, { "epoch": 5.2, "learning_rate": 1.44e-05, "loss": 0.0834, "step": 1300 }, { "epoch": 5.2, "eval_accuracy": 0.9037267858036743, "eval_f1": 0.5866475003992974, "eval_loss": 0.332010954618454, "eval_precision": 0.5369098085075281, "eval_recall": 0.6465411019186763, "eval_runtime": 4.1738, "eval_samples_per_second": 119.795, "eval_steps_per_second": 29.949, "step": 1300 }, { "epoch": 5.6, "learning_rate": 1.32e-05, "loss": 0.0654, "step": 1400 }, { "epoch": 5.6, "eval_accuracy": 0.9057416028432698, "eval_f1": 0.574710687542546, "eval_loss": 0.32423922419548035, "eval_precision": 0.5562510294844342, "eval_recall": 0.5944375990142581, "eval_runtime": 4.1539, "eval_samples_per_second": 120.368, "eval_steps_per_second": 30.092, "step": 1400 }, { "epoch": 6.0, "learning_rate": 1.2e-05, "loss": 0.0689, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.9046403363868448, "eval_f1": 0.5581112750629285, "eval_loss": 0.31789475679397583, "eval_precision": 0.550513698630137, "eval_recall": 0.5659214926949481, "eval_runtime": 4.1716, "eval_samples_per_second": 119.859, "eval_steps_per_second": 29.965, "step": 1500 }, { "epoch": 6.4, "learning_rate": 1.08e-05, "loss": 0.0498, "step": 1600 }, { "epoch": 6.4, "eval_accuracy": 0.9053661710967613, "eval_f1": 0.5820808768579258, "eval_loss": 0.38915345072746277, "eval_precision": 0.5509273813266269, "eval_recall": 0.6169688435134659, "eval_runtime": 4.1814, "eval_samples_per_second": 119.577, "eval_steps_per_second": 29.894, "step": 1600 }, { "epoch": 6.8, "learning_rate": 9.600000000000001e-06, "loss": 0.0528, "step": 1700 }, { "epoch": 6.8, "eval_accuracy": 0.9048155378685488, "eval_f1": 0.5776866283839212, "eval_loss": 0.3601633608341217, "eval_precision": 0.5409433092640958, "eval_recall": 0.619785249075867, "eval_runtime": 4.1473, "eval_samples_per_second": 120.56, "eval_steps_per_second": 30.14, "step": 1700 }, { "epoch": 7.2, "learning_rate": 8.400000000000001e-06, "loss": 0.0474, "step": 1800 }, { "epoch": 7.2, "eval_accuracy": 0.9040396455924313, "eval_f1": 0.5793253173012691, "eval_loss": 0.39758625626564026, "eval_precision": 0.5510722795869738, "eval_recall": 0.6106319309980637, "eval_runtime": 4.1737, "eval_samples_per_second": 119.798, "eval_steps_per_second": 29.949, "step": 1800 }, { "epoch": 7.6, "learning_rate": 7.2e-06, "loss": 0.039, "step": 1900 }, { "epoch": 7.6, "eval_accuracy": 0.9035766131050709, "eval_f1": 0.5778368499750789, "eval_loss": 0.4138449728488922, "eval_precision": 0.5471134182790625, "eval_recall": 0.6122161591269143, "eval_runtime": 4.1525, "eval_samples_per_second": 120.408, "eval_steps_per_second": 30.102, "step": 1900 }, { "epoch": 8.0, "learning_rate": 6e-06, "loss": 0.0446, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.9039520448515793, "eval_f1": 0.5882447535579319, "eval_loss": 0.408151775598526, "eval_precision": 0.5414446417998816, "eval_recall": 0.6439007217039253, "eval_runtime": 4.1562, "eval_samples_per_second": 120.303, "eval_steps_per_second": 30.076, "step": 2000 }, { "epoch": 8.4, "learning_rate": 4.800000000000001e-06, "loss": 0.0333, "step": 2100 }, { "epoch": 8.4, "eval_accuracy": 0.9046528507783952, "eval_f1": 0.5720617062984743, "eval_loss": 0.4318484365940094, "eval_precision": 0.5545274289491078, "eval_recall": 0.5907410667136067, "eval_runtime": 4.1724, "eval_samples_per_second": 119.834, "eval_steps_per_second": 29.959, "step": 2100 }, { "epoch": 8.8, "learning_rate": 3.6e-06, "loss": 0.0327, "step": 2200 }, { "epoch": 8.8, "eval_accuracy": 0.9054913150122641, "eval_f1": 0.5734657499363381, "eval_loss": 0.4232546091079712, "eval_precision": 0.5537704918032786, "eval_recall": 0.5946136243619081, "eval_runtime": 4.1536, "eval_samples_per_second": 120.378, "eval_steps_per_second": 30.095, "step": 2200 }, { "epoch": 9.2, "learning_rate": 2.4000000000000003e-06, "loss": 0.03, "step": 2300 }, { "epoch": 9.2, "eval_accuracy": 0.9049782249587025, "eval_f1": 0.5769523005487548, "eval_loss": 0.44003215432167053, "eval_precision": 0.5543478260869565, "eval_recall": 0.6014786129202605, "eval_runtime": 4.2605, "eval_samples_per_second": 117.358, "eval_steps_per_second": 29.339, "step": 2300 }, { "epoch": 9.6, "learning_rate": 1.2000000000000002e-06, "loss": 0.0286, "step": 2400 }, { "epoch": 9.6, "eval_accuracy": 0.9048280522600991, "eval_f1": 0.5807528586929305, "eval_loss": 0.4442707598209381, "eval_precision": 0.5522222222222222, "eval_recall": 0.6123921844745643, "eval_runtime": 4.3652, "eval_samples_per_second": 114.542, "eval_steps_per_second": 28.636, "step": 2400 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.0261, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.9050407969164539, "eval_f1": 0.5811535881958416, "eval_loss": 0.4490407407283783, "eval_precision": 0.5548263166319833, "eval_recall": 0.6101038549551135, "eval_runtime": 4.2364, "eval_samples_per_second": 118.025, "eval_steps_per_second": 29.506, "step": 2500 }, { "epoch": 10.0, "step": 2500, "total_flos": 2612991191040000.0, "train_loss": 0.10352115373611451, "train_runtime": 364.2147, "train_samples_per_second": 27.456, "train_steps_per_second": 6.864 } ], "max_steps": 2500, "num_train_epochs": 10, "total_flos": 2612991191040000.0, "trial_name": null, "trial_params": null }