{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 3510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "eval_f1": 0.3722687284743791, "eval_loss": 1.3361328840255737, "eval_runtime": 3.2985, "eval_samples_per_second": 29.71, "eval_steps_per_second": 29.71, "step": 100 }, { "epoch": 0.28, "eval_f1": 0.4344699929794222, "eval_loss": 1.1836130619049072, "eval_runtime": 3.274, "eval_samples_per_second": 29.933, "eval_steps_per_second": 29.933, "step": 200 }, { "epoch": 0.43, "eval_f1": 0.3996935122704078, "eval_loss": 1.1635534763336182, "eval_runtime": 3.2772, "eval_samples_per_second": 29.904, "eval_steps_per_second": 29.904, "step": 300 }, { "epoch": 0.57, "eval_f1": 0.5028054395723479, "eval_loss": 1.353499174118042, "eval_runtime": 3.2926, "eval_samples_per_second": 29.764, "eval_steps_per_second": 29.764, "step": 400 }, { "epoch": 0.71, "learning_rate": 2.572649572649573e-05, "loss": 1.2064, "step": 500 }, { "epoch": 0.71, "eval_f1": 0.47071964928887344, "eval_loss": 1.2940737009048462, "eval_runtime": 3.3744, "eval_samples_per_second": 29.042, "eval_steps_per_second": 29.042, "step": 500 }, { "epoch": 0.85, "eval_f1": 0.49370416278560797, "eval_loss": 1.2891041040420532, "eval_runtime": 3.3094, "eval_samples_per_second": 29.613, "eval_steps_per_second": 29.613, "step": 600 }, { "epoch": 1.0, "eval_f1": 0.47736617527595926, "eval_loss": 1.2047343254089355, "eval_runtime": 3.3145, "eval_samples_per_second": 29.567, "eval_steps_per_second": 29.567, "step": 700 }, { "epoch": 1.14, "eval_f1": 0.4943830125990876, "eval_loss": 1.2190661430358887, "eval_runtime": 3.3377, "eval_samples_per_second": 29.362, "eval_steps_per_second": 29.362, "step": 800 }, { "epoch": 1.28, "eval_f1": 0.4777863203104454, "eval_loss": 1.174961805343628, "eval_runtime": 3.3391, "eval_samples_per_second": 29.349, "eval_steps_per_second": 29.349, "step": 900 }, { "epoch": 1.42, "learning_rate": 2.1452991452991456e-05, "loss": 0.9207, "step": 1000 }, { "epoch": 1.42, "eval_f1": 0.49086129207075757, "eval_loss": 1.3087153434753418, "eval_runtime": 3.4003, "eval_samples_per_second": 28.821, "eval_steps_per_second": 28.821, "step": 1000 }, { "epoch": 1.57, "eval_f1": 0.49757882395260544, "eval_loss": 1.2435556650161743, "eval_runtime": 3.4366, "eval_samples_per_second": 28.517, "eval_steps_per_second": 28.517, "step": 1100 }, { "epoch": 1.71, "eval_f1": 0.503327058221218, "eval_loss": 1.1465363502502441, "eval_runtime": 3.2944, "eval_samples_per_second": 29.748, "eval_steps_per_second": 29.748, "step": 1200 }, { "epoch": 1.85, "eval_f1": 0.5141589868888157, "eval_loss": 1.113364577293396, "eval_runtime": 3.3642, "eval_samples_per_second": 29.131, "eval_steps_per_second": 29.131, "step": 1300 }, { "epoch": 1.99, "eval_f1": 0.5383469405673188, "eval_loss": 1.1939647197723389, "eval_runtime": 3.3033, "eval_samples_per_second": 29.668, "eval_steps_per_second": 29.668, "step": 1400 }, { "epoch": 2.14, "learning_rate": 1.7179487179487178e-05, "loss": 0.8149, "step": 1500 }, { "epoch": 2.14, "eval_f1": 0.5291030100787034, "eval_loss": 1.2552497386932373, "eval_runtime": 3.7541, "eval_samples_per_second": 26.105, "eval_steps_per_second": 26.105, "step": 1500 }, { "epoch": 2.28, "eval_f1": 0.5259736412492381, "eval_loss": 1.3746747970581055, "eval_runtime": 4.9995, "eval_samples_per_second": 19.602, "eval_steps_per_second": 19.602, "step": 1600 }, { "epoch": 2.42, "eval_f1": 0.5329388682083431, "eval_loss": 1.3680145740509033, "eval_runtime": 5.1597, "eval_samples_per_second": 18.993, "eval_steps_per_second": 18.993, "step": 1700 }, { "epoch": 2.56, "eval_f1": 0.5189920532535025, "eval_loss": 1.27865469455719, "eval_runtime": 5.0223, "eval_samples_per_second": 19.513, "eval_steps_per_second": 19.513, "step": 1800 }, { "epoch": 2.71, "eval_f1": 0.5409205239275264, "eval_loss": 1.3888845443725586, "eval_runtime": 3.3132, "eval_samples_per_second": 29.579, "eval_steps_per_second": 29.579, "step": 1900 }, { "epoch": 2.85, "learning_rate": 1.2905982905982905e-05, "loss": 0.6152, "step": 2000 }, { "epoch": 2.85, "eval_f1": 0.543504294934508, "eval_loss": 1.3602004051208496, "eval_runtime": 3.3336, "eval_samples_per_second": 29.398, "eval_steps_per_second": 29.398, "step": 2000 }, { "epoch": 2.99, "eval_f1": 0.5467811408362643, "eval_loss": 1.3174574375152588, "eval_runtime": 5.0569, "eval_samples_per_second": 19.379, "eval_steps_per_second": 19.379, "step": 2100 }, { "epoch": 3.13, "eval_f1": 0.5365057187973831, "eval_loss": 1.5886870622634888, "eval_runtime": 4.5058, "eval_samples_per_second": 21.75, "eval_steps_per_second": 21.75, "step": 2200 }, { "epoch": 3.28, "eval_f1": 0.5563382534701277, "eval_loss": 1.517231822013855, "eval_runtime": 4.5269, "eval_samples_per_second": 21.648, "eval_steps_per_second": 21.648, "step": 2300 }, { "epoch": 3.42, "eval_f1": 0.5661390498930103, "eval_loss": 1.5470443964004517, "eval_runtime": 3.6905, "eval_samples_per_second": 26.555, "eval_steps_per_second": 26.555, "step": 2400 }, { "epoch": 3.56, "learning_rate": 8.632478632478633e-06, "loss": 0.4719, "step": 2500 }, { "epoch": 3.56, "eval_f1": 0.521216772952552, "eval_loss": 1.4928430318832397, "eval_runtime": 3.3155, "eval_samples_per_second": 29.558, "eval_steps_per_second": 29.558, "step": 2500 }, { "epoch": 3.7, "eval_f1": 0.5356457612585566, "eval_loss": 1.6497721672058105, "eval_runtime": 4.8518, "eval_samples_per_second": 20.199, "eval_steps_per_second": 20.199, "step": 2600 }, { "epoch": 3.85, "eval_f1": 0.5596834952223371, "eval_loss": 1.4976708889007568, "eval_runtime": 4.6972, "eval_samples_per_second": 20.863, "eval_steps_per_second": 20.863, "step": 2700 }, { "epoch": 3.99, "eval_f1": 0.5470066167039311, "eval_loss": 1.471981167793274, "eval_runtime": 4.5019, "eval_samples_per_second": 21.769, "eval_steps_per_second": 21.769, "step": 2800 }, { "epoch": 4.13, "eval_f1": 0.5492964393504802, "eval_loss": 1.5796676874160767, "eval_runtime": 4.7041, "eval_samples_per_second": 20.833, "eval_steps_per_second": 20.833, "step": 2900 }, { "epoch": 4.27, "learning_rate": 4.358974358974359e-06, "loss": 0.372, "step": 3000 }, { "epoch": 4.27, "eval_f1": 0.5445354826532323, "eval_loss": 1.6874395608901978, "eval_runtime": 3.9793, "eval_samples_per_second": 24.628, "eval_steps_per_second": 24.628, "step": 3000 }, { "epoch": 4.42, "eval_f1": 0.5544723066439012, "eval_loss": 1.6702477931976318, "eval_runtime": 4.7443, "eval_samples_per_second": 20.656, "eval_steps_per_second": 20.656, "step": 3100 }, { "epoch": 4.56, "eval_f1": 0.5469058666319371, "eval_loss": 1.7671833038330078, "eval_runtime": 4.6665, "eval_samples_per_second": 21.001, "eval_steps_per_second": 21.001, "step": 3200 }, { "epoch": 4.7, "eval_f1": 0.5485370297299399, "eval_loss": 1.7351080179214478, "eval_runtime": 4.8851, "eval_samples_per_second": 20.061, "eval_steps_per_second": 20.061, "step": 3300 }, { "epoch": 4.84, "eval_f1": 0.5497797755164764, "eval_loss": 1.7282612323760986, "eval_runtime": 5.1791, "eval_samples_per_second": 18.922, "eval_steps_per_second": 18.922, "step": 3400 }, { "epoch": 4.99, "learning_rate": 8.547008547008547e-08, "loss": 0.2944, "step": 3500 }, { "epoch": 4.99, "eval_f1": 0.5503861750639598, "eval_loss": 1.698703408241272, "eval_runtime": 5.6701, "eval_samples_per_second": 17.284, "eval_steps_per_second": 17.284, "step": 3500 }, { "epoch": 5.0, "step": 3510, "total_flos": 2890172619430200.0, "train_loss": 0.6695007115008145, "train_runtime": 916.4977, "train_samples_per_second": 3.83, "train_steps_per_second": 3.83 } ], "max_steps": 3510, "num_train_epochs": 5, "total_flos": 2890172619430200.0, "trial_name": null, "trial_params": null }