{ "best_metric": 0.8807787895202637, "best_model_checkpoint": "org_org_a/org_aug_a/checkpoint-400", "epoch": 0.24615384615384617, "eval_steps": 25, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015384615384615385, "grad_norm": 99.7617416381836, "learning_rate": 9.375e-05, "loss": 2.2195, "step": 25 }, { "epoch": 0.015384615384615385, "eval_f1_macro": 0.5521299126846324, "eval_f1_micro": 0.5691194856224325, "eval_f1_weighted": 0.569423288164649, "eval_loss": 1.520703673362732, "eval_runtime": 1286.7359, "eval_samples_per_second": 8.703, "eval_steps_per_second": 0.272, "step": 25 }, { "epoch": 0.03076923076923077, "grad_norm": 40.433555603027344, "learning_rate": 8.75e-05, "loss": 1.4371, "step": 50 }, { "epoch": 0.03076923076923077, "eval_f1_macro": 0.5857301862067549, "eval_f1_micro": 0.6089480264332917, "eval_f1_weighted": 0.6051732662908408, "eval_loss": 1.2746953964233398, "eval_runtime": 1302.368, "eval_samples_per_second": 8.598, "eval_steps_per_second": 0.269, "step": 50 }, { "epoch": 0.046153846153846156, "grad_norm": 50.680335998535156, "learning_rate": 8.125000000000001e-05, "loss": 1.2556, "step": 75 }, { "epoch": 0.046153846153846156, "eval_f1_macro": 0.6036315452406847, "eval_f1_micro": 0.6303804250759064, "eval_f1_weighted": 0.6240449220829647, "eval_loss": 1.1545159816741943, "eval_runtime": 1308.8563, "eval_samples_per_second": 8.556, "eval_steps_per_second": 0.267, "step": 75 }, { "epoch": 0.06153846153846154, "grad_norm": 24.990671157836914, "learning_rate": 7.500000000000001e-05, "loss": 1.2415, "step": 100 }, { "epoch": 0.06153846153846154, "eval_f1_macro": 0.6131831448419165, "eval_f1_micro": 0.6319878549741025, "eval_f1_weighted": 0.6300913896752308, "eval_loss": 1.0690622329711914, "eval_runtime": 1302.0106, "eval_samples_per_second": 8.601, "eval_steps_per_second": 0.269, "step": 100 }, { "epoch": 0.07692307692307693, "grad_norm": 20.941816329956055, "learning_rate": 6.875e-05, "loss": 0.9864, "step": 125 }, { "epoch": 0.07692307692307693, "eval_f1_macro": 0.627800207605976, "eval_f1_micro": 0.6399357028040722, "eval_f1_weighted": 0.6411240450638264, "eval_loss": 1.0263742208480835, "eval_runtime": 1310.3949, "eval_samples_per_second": 8.546, "eval_steps_per_second": 0.267, "step": 125 }, { "epoch": 0.09230769230769231, "grad_norm": 17.243314743041992, "learning_rate": 6.25e-05, "loss": 1.0647, "step": 150 }, { "epoch": 0.09230769230769231, "eval_f1_macro": 0.6265827051287185, "eval_f1_micro": 0.6510091087694231, "eval_f1_weighted": 0.6455165794591529, "eval_loss": 0.9917964339256287, "eval_runtime": 1292.6103, "eval_samples_per_second": 8.663, "eval_steps_per_second": 0.271, "step": 150 }, { "epoch": 0.1076923076923077, "grad_norm": 20.114173889160156, "learning_rate": 5.6250000000000005e-05, "loss": 0.9849, "step": 175 }, { "epoch": 0.1076923076923077, "eval_f1_macro": 0.6317476736951155, "eval_f1_micro": 0.6576174316842294, "eval_f1_weighted": 0.6510976948325254, "eval_loss": 0.9679338932037354, "eval_runtime": 1305.0812, "eval_samples_per_second": 8.58, "eval_steps_per_second": 0.268, "step": 175 }, { "epoch": 0.12307692307692308, "grad_norm": 39.2221565246582, "learning_rate": 5e-05, "loss": 1.0067, "step": 200 }, { "epoch": 0.12307692307692308, "eval_f1_macro": 0.6383959350585475, "eval_f1_micro": 0.6501160921593142, "eval_f1_weighted": 0.6513020604373679, "eval_loss": 0.9382981061935425, "eval_runtime": 1283.0843, "eval_samples_per_second": 8.727, "eval_steps_per_second": 0.273, "step": 200 }, { "epoch": 0.13846153846153847, "grad_norm": 26.992185592651367, "learning_rate": 4.375e-05, "loss": 0.8928, "step": 225 }, { "epoch": 0.13846153846153847, "eval_f1_macro": 0.640450740779414, "eval_f1_micro": 0.6619932130737631, "eval_f1_weighted": 0.6578984928748007, "eval_loss": 0.9242791533470154, "eval_runtime": 1276.3685, "eval_samples_per_second": 8.773, "eval_steps_per_second": 0.274, "step": 225 }, { "epoch": 0.15384615384615385, "grad_norm": 71.61570739746094, "learning_rate": 3.7500000000000003e-05, "loss": 0.9858, "step": 250 }, { "epoch": 0.15384615384615385, "eval_f1_macro": 0.640469116958249, "eval_f1_micro": 0.6627076263618503, "eval_f1_weighted": 0.6581960436641718, "eval_loss": 0.9131789803504944, "eval_runtime": 1285.3671, "eval_samples_per_second": 8.712, "eval_steps_per_second": 0.272, "step": 250 }, { "epoch": 0.16923076923076924, "grad_norm": 51.381019592285156, "learning_rate": 3.125e-05, "loss": 0.9085, "step": 275 }, { "epoch": 0.16923076923076924, "eval_f1_macro": 0.6446344740224741, "eval_f1_micro": 0.6575281300232184, "eval_f1_weighted": 0.6580540316041209, "eval_loss": 0.9010853171348572, "eval_runtime": 1307.6049, "eval_samples_per_second": 8.564, "eval_steps_per_second": 0.268, "step": 275 }, { "epoch": 0.18461538461538463, "grad_norm": 25.47317886352539, "learning_rate": 2.5e-05, "loss": 1.0059, "step": 300 }, { "epoch": 0.18461538461538463, "eval_f1_macro": 0.6435798595814134, "eval_f1_micro": 0.6686015359885694, "eval_f1_weighted": 0.662301841928527, "eval_loss": 0.9018191695213318, "eval_runtime": 1330.8319, "eval_samples_per_second": 8.414, "eval_steps_per_second": 0.263, "step": 300 }, { "epoch": 0.2, "grad_norm": 16.64508628845215, "learning_rate": 1.8750000000000002e-05, "loss": 0.8939, "step": 325 }, { "epoch": 0.2, "eval_f1_macro": 0.6448244249080287, "eval_f1_micro": 0.6681550276835149, "eval_f1_weighted": 0.6628710932570111, "eval_loss": 0.892798125743866, "eval_runtime": 1305.0822, "eval_samples_per_second": 8.58, "eval_steps_per_second": 0.268, "step": 325 }, { "epoch": 0.2153846153846154, "grad_norm": 32.9316520690918, "learning_rate": 1.25e-05, "loss": 0.864, "step": 350 }, { "epoch": 0.2153846153846154, "eval_f1_macro": 0.6477608498148076, "eval_f1_micro": 0.6621718163957849, "eval_f1_weighted": 0.6618855058014609, "eval_loss": 0.8832775950431824, "eval_runtime": 1344.9811, "eval_samples_per_second": 8.326, "eval_steps_per_second": 0.26, "step": 350 }, { "epoch": 0.23076923076923078, "grad_norm": 63.84526824951172, "learning_rate": 6.25e-06, "loss": 0.9499, "step": 375 }, { "epoch": 0.23076923076923078, "eval_f1_macro": 0.646307301425064, "eval_f1_micro": 0.6585104482943382, "eval_f1_weighted": 0.659331813949001, "eval_loss": 0.8836826682090759, "eval_runtime": 1287.5574, "eval_samples_per_second": 8.697, "eval_steps_per_second": 0.272, "step": 375 }, { "epoch": 0.24615384615384617, "grad_norm": 21.614879608154297, "learning_rate": 0.0, "loss": 0.9721, "step": 400 }, { "epoch": 0.24615384615384617, "eval_f1_macro": 0.6475943444675745, "eval_f1_micro": 0.6614574031076978, "eval_f1_weighted": 0.6614511612983146, "eval_loss": 0.8807787895202637, "eval_runtime": 1313.0891, "eval_samples_per_second": 8.528, "eval_steps_per_second": 0.267, "step": 400 }, { "epoch": 0.24615384615384617, "step": 400, "total_flos": 1.690284412871639e+17, "train_loss": 1.104334650039673, "train_runtime": 22485.3021, "train_samples_per_second": 0.569, "train_steps_per_second": 0.018 }, { "epoch": 0.24615384615384617, "eval_f1_macro": 0.6475943444675745, "eval_f1_micro": 0.6614574031076978, "eval_f1_weighted": 0.6614511612983146, "eval_loss": 0.8807787895202637, "eval_runtime": 1299.8097, "eval_samples_per_second": 8.615, "eval_steps_per_second": 0.269, "step": 400 } ], "logging_steps": 25, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "total_flos": 1.690284412871639e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }