{ "best_metric": 0.9596994535519126, "best_model_checkpoint": "train_authorship/train_outputs/05-25-2024_21:30:31/checkpoint-560", "epoch": 8.115942028985508, "eval_steps": 35, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.51, "grad_norm": 9.12179946899414, "learning_rate": 2.5362318840579714e-05, "loss": 0.9599, "step": 35 }, { "epoch": 0.51, "eval_acc_product": 0.522295927646477, "eval_fitzgerald_acc": 0.6823770491803278, "eval_hemingway_acc": 0.9651639344262295, "eval_loss": 0.4738686978816986, "eval_overall_acc": 0.8135245901639344, "eval_runtime": 13.1203, "eval_samples_per_second": 111.583, "eval_steps_per_second": 1.753, "eval_woolf_acc": 0.7930327868852459, "step": 35 }, { "epoch": 1.01, "grad_norm": 18.4027099609375, "learning_rate": 4.99194847020934e-05, "loss": 0.3839, "step": 70 }, { "epoch": 1.01, "eval_acc_product": 0.7210372836134964, "eval_fitzgerald_acc": 0.764344262295082, "eval_hemingway_acc": 0.9815573770491803, "eval_loss": 0.24899350106716156, "eval_overall_acc": 0.9023224043715847, "eval_runtime": 13.1386, "eval_samples_per_second": 111.428, "eval_steps_per_second": 1.751, "eval_woolf_acc": 0.9610655737704918, "step": 70 }, { "epoch": 1.52, "grad_norm": 7.836585998535156, "learning_rate": 4.710144927536232e-05, "loss": 0.2387, "step": 105 }, { "epoch": 1.52, "eval_acc_product": 0.8025821475696203, "eval_fitzgerald_acc": 0.9631147540983607, "eval_hemingway_acc": 0.9221311475409836, "eval_loss": 0.20742054283618927, "eval_overall_acc": 0.9296448087431693, "eval_runtime": 13.1443, "eval_samples_per_second": 111.379, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.9036885245901639, "step": 105 }, { "epoch": 2.03, "grad_norm": 10.438405990600586, "learning_rate": 4.428341384863124e-05, "loss": 0.1938, "step": 140 }, { "epoch": 2.03, "eval_acc_product": 0.8229529244050163, "eval_fitzgerald_acc": 0.9528688524590164, "eval_hemingway_acc": 0.9733606557377049, "eval_loss": 0.2009282410144806, "eval_overall_acc": 0.9378415300546448, "eval_runtime": 13.1419, "eval_samples_per_second": 111.399, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.8872950819672131, "step": 140 }, { "epoch": 2.54, "grad_norm": 11.882472038269043, "learning_rate": 4.146537842190016e-05, "loss": 0.1373, "step": 175 }, { "epoch": 2.54, "eval_acc_product": 0.8087266940845269, "eval_fitzgerald_acc": 0.9467213114754098, "eval_hemingway_acc": 0.8831967213114754, "eval_loss": 0.20678555965423584, "eval_overall_acc": 0.9323770491803278, "eval_runtime": 13.1478, "eval_samples_per_second": 111.35, "eval_steps_per_second": 1.749, "eval_woolf_acc": 0.9672131147540983, "step": 175 }, { "epoch": 3.04, "grad_norm": 3.76464581489563, "learning_rate": 3.864734299516908e-05, "loss": 0.0751, "step": 210 }, { "epoch": 3.04, "eval_acc_product": 0.8355915183980156, "eval_fitzgerald_acc": 0.8913934426229508, "eval_hemingway_acc": 0.9774590163934426, "eval_loss": 0.21373361349105835, "eval_overall_acc": 0.9426229508196722, "eval_runtime": 13.1464, "eval_samples_per_second": 111.361, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.9590163934426229, "step": 210 }, { "epoch": 3.55, "grad_norm": 7.145068168640137, "learning_rate": 3.5829307568438e-05, "loss": 0.0799, "step": 245 }, { "epoch": 3.55, "eval_acc_product": 0.8595802071539027, "eval_fitzgerald_acc": 0.9467213114754098, "eval_hemingway_acc": 0.9508196721311475, "eval_loss": 0.19885893166065216, "eval_overall_acc": 0.9508196721311475, "eval_runtime": 13.143, "eval_samples_per_second": 111.39, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.9549180327868853, "step": 245 }, { "epoch": 4.06, "grad_norm": 19.213457107543945, "learning_rate": 3.301127214170693e-05, "loss": 0.0689, "step": 280 }, { "epoch": 4.06, "eval_acc_product": 0.720976809113428, "eval_fitzgerald_acc": 0.985655737704918, "eval_hemingway_acc": 0.9036885245901639, "eval_loss": 0.44272100925445557, "eval_overall_acc": 0.8995901639344263, "eval_runtime": 13.1467, "eval_samples_per_second": 111.359, "eval_steps_per_second": 1.749, "eval_woolf_acc": 0.8094262295081968, "step": 280 }, { "epoch": 4.57, "grad_norm": 18.969886779785156, "learning_rate": 3.0193236714975848e-05, "loss": 0.0514, "step": 315 }, { "epoch": 4.57, "eval_acc_product": 0.8326357712760099, "eval_fitzgerald_acc": 0.9139344262295082, "eval_hemingway_acc": 0.9836065573770492, "eval_loss": 0.3214350938796997, "eval_overall_acc": 0.9412568306010929, "eval_runtime": 13.1454, "eval_samples_per_second": 111.37, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.9262295081967213, "step": 315 }, { "epoch": 5.07, "grad_norm": 3.789048433303833, "learning_rate": 2.7375201288244768e-05, "loss": 0.0247, "step": 350 }, { "epoch": 5.07, "eval_acc_product": 0.8625536629442553, "eval_fitzgerald_acc": 0.9200819672131147, "eval_hemingway_acc": 0.9651639344262295, "eval_loss": 0.282697856426239, "eval_overall_acc": 0.9521857923497268, "eval_runtime": 13.1482, "eval_samples_per_second": 111.346, "eval_steps_per_second": 1.749, "eval_woolf_acc": 0.9713114754098361, "step": 350 }, { "epoch": 5.58, "grad_norm": 0.13263003528118134, "learning_rate": 2.455716586151369e-05, "loss": 0.0061, "step": 385 }, { "epoch": 5.58, "eval_acc_product": 0.870603844594922, "eval_fitzgerald_acc": 0.9508196721311475, "eval_hemingway_acc": 0.9692622950819673, "eval_loss": 0.28144514560699463, "eval_overall_acc": 0.9549180327868853, "eval_runtime": 13.1424, "eval_samples_per_second": 111.395, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.944672131147541, "step": 385 }, { "epoch": 6.09, "grad_norm": 0.01614902913570404, "learning_rate": 2.173913043478261e-05, "loss": 0.017, "step": 420 }, { "epoch": 6.09, "eval_acc_product": 0.8501492828694912, "eval_fitzgerald_acc": 0.9528688524590164, "eval_hemingway_acc": 0.930327868852459, "eval_loss": 0.34684956073760986, "eval_overall_acc": 0.9474043715846995, "eval_runtime": 13.1477, "eval_samples_per_second": 111.35, "eval_steps_per_second": 1.749, "eval_woolf_acc": 0.9590163934426229, "step": 420 }, { "epoch": 6.59, "grad_norm": 0.007705519441515207, "learning_rate": 1.892109500805153e-05, "loss": 0.0108, "step": 455 }, { "epoch": 6.59, "eval_acc_product": 0.8705621113386143, "eval_fitzgerald_acc": 0.9385245901639344, "eval_hemingway_acc": 0.9651639344262295, "eval_loss": 0.26387327909469604, "eval_overall_acc": 0.9549180327868853, "eval_runtime": 13.1463, "eval_samples_per_second": 111.362, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.9610655737704918, "step": 455 }, { "epoch": 7.1, "grad_norm": 0.04329540580511093, "learning_rate": 1.610305958132045e-05, "loss": 0.0237, "step": 490 }, { "epoch": 7.1, "eval_acc_product": 0.8607751722611143, "eval_fitzgerald_acc": 0.9426229508196722, "eval_hemingway_acc": 0.9815573770491803, "eval_loss": 0.32457903027534485, "eval_overall_acc": 0.9515027322404371, "eval_runtime": 13.1416, "eval_samples_per_second": 111.402, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.930327868852459, "step": 490 }, { "epoch": 7.61, "grad_norm": 18.448688507080078, "learning_rate": 1.3285024154589374e-05, "loss": 0.0109, "step": 525 }, { "epoch": 7.61, "eval_acc_product": 0.8574545818262321, "eval_fitzgerald_acc": 0.9569672131147541, "eval_hemingway_acc": 0.9631147540983607, "eval_loss": 0.32199960947036743, "eval_overall_acc": 0.950136612021858, "eval_runtime": 13.141, "eval_samples_per_second": 111.407, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.930327868852459, "step": 525 }, { "epoch": 8.12, "grad_norm": 0.07037464529275894, "learning_rate": 1.0466988727858294e-05, "loss": 0.0108, "step": 560 }, { "epoch": 8.12, "eval_acc_product": 0.8835792216639279, "eval_fitzgerald_acc": 0.9385245901639344, "eval_hemingway_acc": 0.9692622950819673, "eval_loss": 0.27515432238578796, "eval_overall_acc": 0.9596994535519126, "eval_runtime": 13.1404, "eval_samples_per_second": 111.412, "eval_steps_per_second": 1.75, "eval_woolf_acc": 0.9713114754098361, "step": 560 } ], "logging_steps": 35, "max_steps": 690, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 35, "total_flos": 1.4725988675974908e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }