{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.431522846221924, "learning_rate": 4.75e-05, "loss": 0.5653, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6333236742973052, "eval_loss": 0.5245500802993774, "eval_precision": 0.6540233301136786, "eval_recall": 0.6256592107655937, "eval_runtime": 4.6575, "eval_samples_per_second": 85.668, "eval_steps_per_second": 10.735, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.8715531826019287, "learning_rate": 4.5e-05, "loss": 0.5167, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7293233082706767, "eval_f1": 0.6854598540145985, "eval_loss": 0.521543025970459, "eval_precision": 0.6804511278195489, "eval_recall": 0.6934897254046191, "eval_runtime": 5.0566, "eval_samples_per_second": 78.907, "eval_steps_per_second": 9.888, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.720804691314697, "learning_rate": 4.25e-05, "loss": 0.4984, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7443609022556391, "eval_f1": 0.6916257501363885, "eval_loss": 0.49747392535209656, "eval_precision": 0.6916257501363885, "eval_recall": 0.6916257501363885, "eval_runtime": 5.0553, "eval_samples_per_second": 78.927, "eval_steps_per_second": 9.891, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.2620692253112793, "learning_rate": 4e-05, "loss": 0.4765, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7418546365914787, "eval_f1": 0.6619339448031918, "eval_loss": 0.4854496121406555, "eval_precision": 0.6836634025138848, "eval_recall": 0.6523458810692853, "eval_runtime": 5.0451, "eval_samples_per_second": 79.087, "eval_steps_per_second": 9.911, "step": 488 }, { "epoch": 5.0, "grad_norm": 4.330729007720947, "learning_rate": 3.7500000000000003e-05, "loss": 0.4797, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7719298245614035, "eval_f1": 0.7320072332730561, "eval_loss": 0.485201358795166, "eval_precision": 0.7269805119926199, "eval_recall": 0.7386342971449354, "eval_runtime": 5.0467, "eval_samples_per_second": 79.062, "eval_steps_per_second": 9.907, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.9019949436187744, "learning_rate": 3.5e-05, "loss": 0.4668, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.7669172932330827, "eval_f1": 0.7195344091304183, "eval_loss": 0.47377943992614746, "eval_precision": 0.7189969238192895, "eval_recall": 0.7200854700854701, "eval_runtime": 5.0582, "eval_samples_per_second": 78.882, "eval_steps_per_second": 9.885, "step": 732 }, { "epoch": 7.0, "grad_norm": 14.114830017089844, "learning_rate": 3.2500000000000004e-05, "loss": 0.4622, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.7719298245614035, "eval_f1": 0.7295157072938161, "eval_loss": 0.47686928510665894, "eval_precision": 0.7260557184750733, "eval_recall": 0.7336333878887071, "eval_runtime": 5.045, "eval_samples_per_second": 79.088, "eval_steps_per_second": 9.911, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.223310947418213, "learning_rate": 3e-05, "loss": 0.4621, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.7493734335839599, "eval_f1": 0.6685826300750881, "eval_loss": 0.46253928542137146, "eval_precision": 0.6949044585987261, "eval_recall": 0.6576650300054556, "eval_runtime": 5.052, "eval_samples_per_second": 78.979, "eval_steps_per_second": 9.897, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.2552032470703125, "learning_rate": 2.7500000000000004e-05, "loss": 0.4561, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.7769423558897243, "eval_f1": 0.7199470035725271, "eval_loss": 0.4609311521053314, "eval_precision": 0.7310853634383045, "eval_recall": 0.7121749408983451, "eval_runtime": 5.0567, "eval_samples_per_second": 78.905, "eval_steps_per_second": 9.888, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.524580240249634, "learning_rate": 2.5e-05, "loss": 0.4519, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.7669172932330827, "eval_f1": 0.6822078533807219, "eval_loss": 0.4608353078365326, "eval_precision": 0.7252321981424149, "eval_recall": 0.6675759228950718, "eval_runtime": 5.0558, "eval_samples_per_second": 78.919, "eval_steps_per_second": 9.89, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.3073718547821045, "learning_rate": 2.25e-05, "loss": 0.4413, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.7694235588972431, "eval_f1": 0.7079992363497518, "eval_loss": 0.4543740451335907, "eval_precision": 0.7214646464646465, "eval_recall": 0.6993544280778323, "eval_runtime": 5.0512, "eval_samples_per_second": 78.992, "eval_steps_per_second": 9.899, "step": 1342 }, { "epoch": 12.0, "grad_norm": 6.039628982543945, "learning_rate": 2e-05, "loss": 0.4449, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.7844611528822055, "eval_f1": 0.7412841546534773, "eval_loss": 0.4569094777107239, "eval_precision": 0.7401260504201681, "eval_recall": 0.7424986361156574, "eval_runtime": 5.0491, "eval_samples_per_second": 79.023, "eval_steps_per_second": 9.903, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.8723957538604736, "learning_rate": 1.75e-05, "loss": 0.4506, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.7644110275689223, "eval_f1": 0.6821309919316564, "eval_loss": 0.4527250826358795, "eval_precision": 0.7196598101265823, "eval_recall": 0.6683033278777959, "eval_runtime": 5.0532, "eval_samples_per_second": 78.959, "eval_steps_per_second": 9.895, "step": 1586 }, { "epoch": 14.0, "grad_norm": 2.8429393768310547, "learning_rate": 1.5e-05, "loss": 0.4446, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.7794486215538847, "eval_f1": 0.7120834426659669, "eval_loss": 0.4487856924533844, "eval_precision": 0.7379122870605291, "eval_recall": 0.69894526277505, "eval_runtime": 5.0594, "eval_samples_per_second": 78.864, "eval_steps_per_second": 9.883, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.688943386077881, "learning_rate": 1.25e-05, "loss": 0.4426, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.7869674185463659, "eval_f1": 0.7355039968804835, "eval_loss": 0.44907739758491516, "eval_precision": 0.7435604353145727, "eval_recall": 0.7292689579923622, "eval_runtime": 5.0481, "eval_samples_per_second": 79.039, "eval_steps_per_second": 9.905, "step": 1830 }, { "epoch": 16.0, "grad_norm": 2.842677593231201, "learning_rate": 1e-05, "loss": 0.4409, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.7719298245614035, "eval_f1": 0.7068474127421138, "eval_loss": 0.44651278853416443, "eval_precision": 0.725706313219393, "eval_recall": 0.696126568466994, "eval_runtime": 5.0391, "eval_samples_per_second": 79.18, "eval_steps_per_second": 9.922, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.7264163494110107, "learning_rate": 7.5e-06, "loss": 0.4348, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.7869674185463659, "eval_f1": 0.7355039968804835, "eval_loss": 0.4473975598812103, "eval_precision": 0.7435604353145727, "eval_recall": 0.7292689579923622, "eval_runtime": 5.0762, "eval_samples_per_second": 78.602, "eval_steps_per_second": 9.85, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.3701038360595703, "learning_rate": 5e-06, "loss": 0.4478, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.7844611528822055, "eval_f1": 0.7301509908776345, "eval_loss": 0.4460136294364929, "eval_precision": 0.7407832589871425, "eval_recall": 0.7224949990907438, "eval_runtime": 5.1002, "eval_samples_per_second": 78.233, "eval_steps_per_second": 9.804, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.103198289871216, "learning_rate": 2.5e-06, "loss": 0.4382, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.7869674185463659, "eval_f1": 0.7309977236133474, "eval_loss": 0.44484180212020874, "eval_precision": 0.7447157190635452, "eval_recall": 0.7217675941080197, "eval_runtime": 5.061, "eval_samples_per_second": 78.838, "eval_steps_per_second": 9.879, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.061123847961426, "learning_rate": 0.0, "loss": 0.4313, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.7869674185463659, "eval_f1": 0.7325336550973572, "eval_loss": 0.445127934217453, "eval_precision": 0.7442562883739354, "eval_recall": 0.7242680487361338, "eval_runtime": 5.049, "eval_samples_per_second": 79.025, "eval_steps_per_second": 9.903, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7588990440528000.0, "train_loss": 0.4626342241881324, "train_runtime": 1944.0727, "train_samples_per_second": 37.427, "train_steps_per_second": 1.255 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7588990440528000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }