{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.217932224273682, "learning_rate": 4.75e-05, "loss": 0.5643, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7042606516290727, "eval_f1": 0.5981941694544958, "eval_loss": 0.5218127369880676, "eval_precision": 0.6240824392998306, "eval_recall": 0.5932442262229496, "eval_runtime": 1.7249, "eval_samples_per_second": 231.323, "eval_steps_per_second": 28.988, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.5043983459472656, "learning_rate": 4.5e-05, "loss": 0.5086, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7293233082706767, "eval_f1": 0.6868131868131868, "eval_loss": 0.5021316409111023, "eval_precision": 0.6814393939393939, "eval_recall": 0.6959901800327333, "eval_runtime": 1.7316, "eval_samples_per_second": 230.424, "eval_steps_per_second": 28.875, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.2947137355804443, "learning_rate": 4.25e-05, "loss": 0.4652, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7894736842105263, "eval_f1": 0.7407327186088248, "eval_loss": 0.444990336894989, "eval_precision": 0.7463777285669092, "eval_recall": 0.7360429168939807, "eval_runtime": 1.7474, "eval_samples_per_second": 228.342, "eval_steps_per_second": 28.614, "step": 366 }, { "epoch": 4.0, "grad_norm": 4.659485816955566, "learning_rate": 4e-05, "loss": 0.4248, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.7917273014868713, "eval_loss": 0.39323920011520386, "eval_precision": 0.8074456774536514, "eval_recall": 0.780460083651573, "eval_runtime": 1.7487, "eval_samples_per_second": 228.174, "eval_steps_per_second": 28.593, "step": 488 }, { "epoch": 5.0, "grad_norm": 5.222157955169678, "learning_rate": 3.7500000000000003e-05, "loss": 0.3812, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8118502107020158, "eval_loss": 0.37043145298957825, "eval_precision": 0.8083091673078061, "eval_recall": 0.8157846881251136, "eval_runtime": 1.756, "eval_samples_per_second": 227.224, "eval_steps_per_second": 28.474, "step": 610 }, { "epoch": 6.0, "grad_norm": 6.741718769073486, "learning_rate": 3.5e-05, "loss": 0.3506, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8289446964056049, "eval_loss": 0.35664382576942444, "eval_precision": 0.8266129032258065, "eval_recall": 0.8314238952536825, "eval_runtime": 1.751, "eval_samples_per_second": 227.875, "eval_steps_per_second": 28.556, "step": 732 }, { "epoch": 7.0, "grad_norm": 3.737555503845215, "learning_rate": 3.2500000000000004e-05, "loss": 0.3323, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.820640215771169, "eval_loss": 0.3437691330909729, "eval_precision": 0.8364527629233511, "eval_recall": 0.8089198036006546, "eval_runtime": 1.756, "eval_samples_per_second": 227.216, "eval_steps_per_second": 28.473, "step": 854 }, { "epoch": 8.0, "grad_norm": 6.206509113311768, "learning_rate": 3e-05, "loss": 0.3108, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8279052989013229, "eval_loss": 0.33262690901756287, "eval_precision": 0.8414113428943938, "eval_recall": 0.8174668121476631, "eval_runtime": 1.7517, "eval_samples_per_second": 227.78, "eval_steps_per_second": 28.544, "step": 976 }, { "epoch": 9.0, "grad_norm": 3.5029311180114746, "learning_rate": 2.7500000000000004e-05, "loss": 0.2998, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8385441718775052, "eval_loss": 0.32504233717918396, "eval_precision": 0.8412280701754387, "eval_recall": 0.8360156392071285, "eval_runtime": 1.7542, "eval_samples_per_second": 227.46, "eval_steps_per_second": 28.504, "step": 1098 }, { "epoch": 10.0, "grad_norm": 5.729393005371094, "learning_rate": 2.5e-05, "loss": 0.2923, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.31815892457962036, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 1.7579, "eval_samples_per_second": 226.98, "eval_steps_per_second": 28.444, "step": 1220 }, { "epoch": 11.0, "grad_norm": 11.397634506225586, "learning_rate": 2.25e-05, "loss": 0.2887, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8438308224802573, "eval_loss": 0.31445056200027466, "eval_precision": 0.8485409407665505, "eval_recall": 0.8395617384979087, "eval_runtime": 1.7554, "eval_samples_per_second": 227.301, "eval_steps_per_second": 28.484, "step": 1342 }, { "epoch": 12.0, "grad_norm": 10.077719688415527, "learning_rate": 2e-05, "loss": 0.2716, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8430067043674827, "eval_loss": 0.30924713611602783, "eval_precision": 0.8498269896193772, "eval_recall": 0.8370612838697945, "eval_runtime": 1.7521, "eval_samples_per_second": 227.724, "eval_steps_per_second": 28.537, "step": 1464 }, { "epoch": 13.0, "grad_norm": 3.046454906463623, "learning_rate": 1.75e-05, "loss": 0.2598, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8458135188208294, "eval_loss": 0.30985820293426514, "eval_precision": 0.8627946127946129, "eval_recall": 0.8331060192762321, "eval_runtime": 1.754, "eval_samples_per_second": 227.479, "eval_steps_per_second": 28.506, "step": 1586 }, { "epoch": 14.0, "grad_norm": 7.8712358474731445, "learning_rate": 1.5e-05, "loss": 0.2722, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8491633041962088, "eval_loss": 0.3003251254558563, "eval_precision": 0.85610254797106, "eval_recall": 0.8431078377886888, "eval_runtime": 1.7541, "eval_samples_per_second": 227.468, "eval_steps_per_second": 28.505, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.7105746269226074, "learning_rate": 1.25e-05, "loss": 0.2536, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8491633041962088, "eval_loss": 0.29776889085769653, "eval_precision": 0.85610254797106, "eval_recall": 0.8431078377886888, "eval_runtime": 1.7521, "eval_samples_per_second": 227.726, "eval_steps_per_second": 28.537, "step": 1830 }, { "epoch": 16.0, "grad_norm": 3.3939132690429688, "learning_rate": 1e-05, "loss": 0.2536, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8568221901555235, "eval_loss": 0.2970341145992279, "eval_precision": 0.8596491228070176, "eval_recall": 0.8541553009638116, "eval_runtime": 1.757, "eval_samples_per_second": 227.091, "eval_steps_per_second": 28.457, "step": 1952 }, { "epoch": 17.0, "grad_norm": 2.2166881561279297, "learning_rate": 7.5e-06, "loss": 0.2479, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8545433391506689, "eval_loss": 0.29783862829208374, "eval_precision": 0.8639270714012982, "eval_recall": 0.846653937079469, "eval_runtime": 1.7549, "eval_samples_per_second": 227.365, "eval_steps_per_second": 28.492, "step": 2074 }, { "epoch": 18.0, "grad_norm": 7.031806945800781, "learning_rate": 5e-06, "loss": 0.2487, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8491633041962088, "eval_loss": 0.29700446128845215, "eval_precision": 0.85610254797106, "eval_recall": 0.8431078377886888, "eval_runtime": 1.7527, "eval_samples_per_second": 227.643, "eval_steps_per_second": 28.527, "step": 2196 }, { "epoch": 19.0, "grad_norm": 5.178986549377441, "learning_rate": 2.5e-06, "loss": 0.2457, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8421640488656195, "eval_loss": 0.2947324812412262, "eval_precision": 0.8512313860252005, "eval_recall": 0.8345608292416803, "eval_runtime": 1.7547, "eval_samples_per_second": 227.393, "eval_steps_per_second": 28.495, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.935147285461426, "learning_rate": 0.0, "loss": 0.2499, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8421640488656195, "eval_loss": 0.29513633251190186, "eval_precision": 0.8512313860252005, "eval_recall": 0.8345608292416803, "eval_runtime": 1.7593, "eval_samples_per_second": 226.794, "eval_steps_per_second": 28.42, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.32608639607664014, "train_runtime": 629.3107, "train_samples_per_second": 115.619, "train_steps_per_second": 3.877 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }