{ "best_metric": 0.7947742682321812, "best_model_checkpoint": "./XLM-V_64-multi-outputs/checkpoint-15000", "epoch": 14.8619957537155, "eval_steps": 1000, "global_step": 21000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7077140835102619, "grad_norm": 1.493615746498108, "learning_rate": 9.433962264150943e-07, "loss": 0.6936, "step": 1000 }, { "epoch": 0.7077140835102619, "eval_accuracy": 0.5659663447177138, "eval_f1": 0.6995657867530498, "eval_loss": 0.682445764541626, "eval_precision": 0.5400659785037778, "eval_recall": 0.9927621283255086, "eval_runtime": 57.8197, "eval_samples_per_second": 173.695, "eval_steps_per_second": 2.715, "step": 1000 }, { "epoch": 1.4154281670205238, "grad_norm": 3.4982151985168457, "learning_rate": 1.8867924528301887e-06, "loss": 0.672, "step": 2000 }, { "epoch": 1.4154281670205238, "eval_accuracy": 0.6204321417903017, "eval_f1": 0.7176714560805807, "eval_loss": 0.6340225338935852, "eval_precision": 0.5774731823599524, "eval_recall": 0.9477699530516432, "eval_runtime": 57.5505, "eval_samples_per_second": 174.508, "eval_steps_per_second": 2.728, "step": 2000 }, { "epoch": 2.1231422505307855, "grad_norm": 12.846685409545898, "learning_rate": 1.907732634338139e-06, "loss": 0.6537, "step": 3000 }, { "epoch": 2.1231422505307855, "eval_accuracy": 0.6434332370805537, "eval_f1": 0.7224676431837557, "eval_loss": 0.6225757002830505, "eval_precision": 0.5982543960980619, "eval_recall": 0.9117762128325508, "eval_runtime": 57.5731, "eval_samples_per_second": 174.439, "eval_steps_per_second": 2.727, "step": 3000 }, { "epoch": 2.8308563340410475, "grad_norm": 4.933294296264648, "learning_rate": 1.8028833551769331e-06, "loss": 0.6499, "step": 4000 }, { "epoch": 2.8308563340410475, "eval_accuracy": 0.6594643034949716, "eval_f1": 0.743858597962852, "eval_loss": 0.604403555393219, "eval_precision": 0.6026699029126213, "eval_recall": 0.9714397496087637, "eval_runtime": 57.5924, "eval_samples_per_second": 174.381, "eval_steps_per_second": 2.726, "step": 4000 }, { "epoch": 3.538570417551309, "grad_norm": 9.257452011108398, "learning_rate": 1.6980340760157273e-06, "loss": 0.6389, "step": 5000 }, { "epoch": 3.538570417551309, "eval_accuracy": 0.660460021905805, "eval_f1": 0.6269146608315098, "eval_loss": 0.6047727465629578, "eval_precision": 0.7112711022840119, "eval_recall": 0.5604460093896714, "eval_runtime": 57.6043, "eval_samples_per_second": 174.344, "eval_steps_per_second": 2.725, "step": 5000 }, { "epoch": 4.246284501061571, "grad_norm": 8.192702293395996, "learning_rate": 1.5931847968545215e-06, "loss": 0.6187, "step": 6000 }, { "epoch": 4.246284501061571, "eval_accuracy": 0.6978990341531415, "eval_f1": 0.7157046476761619, "eval_loss": 0.5651090741157532, "eval_precision": 0.6868705035971223, "eval_recall": 0.7470657276995305, "eval_runtime": 57.5852, "eval_samples_per_second": 174.402, "eval_steps_per_second": 2.726, "step": 6000 }, { "epoch": 4.953998584571833, "grad_norm": 49.664493560791016, "learning_rate": 1.488335517693316e-06, "loss": 0.5962, "step": 7000 }, { "epoch": 4.953998584571833, "eval_accuracy": 0.7021806233197252, "eval_f1": 0.7534009398961168, "eval_loss": 0.5552804470062256, "eval_precision": 0.6511329628046174, "eval_recall": 0.8937793427230047, "eval_runtime": 57.4997, "eval_samples_per_second": 174.662, "eval_steps_per_second": 2.73, "step": 7000 }, { "epoch": 5.661712668082095, "grad_norm": 7.591745376586914, "learning_rate": 1.38348623853211e-06, "loss": 0.5716, "step": 8000 }, { "epoch": 5.661712668082095, "eval_accuracy": 0.7096485114009758, "eval_f1": 0.7601184600197434, "eval_loss": 0.5378949046134949, "eval_precision": 0.65587734241908, "eval_recall": 0.903755868544601, "eval_runtime": 57.5168, "eval_samples_per_second": 174.61, "eval_steps_per_second": 2.73, "step": 8000 }, { "epoch": 6.369426751592357, "grad_norm": 7.424753189086914, "learning_rate": 1.2786369593709043e-06, "loss": 0.531, "step": 9000 }, { "epoch": 6.369426751592357, "eval_accuracy": 0.7241860001991437, "eval_f1": 0.746800731261426, "eval_loss": 0.4708074629306793, "eval_precision": 0.7009265614275909, "eval_recall": 0.7991001564945227, "eval_runtime": 57.6158, "eval_samples_per_second": 174.31, "eval_steps_per_second": 2.725, "step": 9000 }, { "epoch": 7.077140835102618, "grad_norm": 9.683096885681152, "learning_rate": 1.1737876802096983e-06, "loss": 0.4858, "step": 10000 }, { "epoch": 7.077140835102618, "eval_accuracy": 0.7385243453151449, "eval_f1": 0.7664947536902009, "eval_loss": 0.44091591238975525, "eval_precision": 0.7026410172807304, "eval_recall": 0.843114241001565, "eval_runtime": 57.5121, "eval_samples_per_second": 174.624, "eval_steps_per_second": 2.73, "step": 10000 }, { "epoch": 7.78485491861288, "grad_norm": 25.252531051635742, "learning_rate": 1.0689384010484928e-06, "loss": 0.4577, "step": 11000 }, { "epoch": 7.78485491861288, "eval_accuracy": 0.7428059344817286, "eval_f1": 0.7718399434678915, "eval_loss": 0.42118868231773376, "eval_precision": 0.7036559832501208, "eval_recall": 0.8546557120500783, "eval_runtime": 57.4963, "eval_samples_per_second": 174.672, "eval_steps_per_second": 2.731, "step": 11000 }, { "epoch": 8.492569002123142, "grad_norm": 9.067980766296387, "learning_rate": 9.64089121887287e-07, "loss": 0.4404, "step": 12000 }, { "epoch": 8.492569002123142, "eval_accuracy": 0.7539579806830629, "eval_f1": 0.7588091752074183, "eval_loss": 0.40373048186302185, "eval_precision": 0.75725696473797, "eval_recall": 0.7603677621283255, "eval_runtime": 57.7852, "eval_samples_per_second": 173.799, "eval_steps_per_second": 2.717, "step": 12000 }, { "epoch": 9.200283085633403, "grad_norm": 6.412005424499512, "learning_rate": 8.592398427260812e-07, "loss": 0.42, "step": 13000 }, { "epoch": 9.200283085633403, "eval_accuracy": 0.7534601214776461, "eval_f1": 0.7862938028655274, "eval_loss": 0.394222617149353, "eval_precision": 0.7035835650293482, "eval_recall": 0.8910406885758998, "eval_runtime": 57.6787, "eval_samples_per_second": 174.12, "eval_steps_per_second": 2.722, "step": 13000 }, { "epoch": 9.907997169143666, "grad_norm": 9.248723983764648, "learning_rate": 7.543905635648754e-07, "loss": 0.4061, "step": 14000 }, { "epoch": 9.907997169143666, "eval_accuracy": 0.7524644030668127, "eval_f1": 0.7793752218672346, "eval_loss": 0.3883645236492157, "eval_precision": 0.7132878492527616, "eval_recall": 0.8589593114241002, "eval_runtime": 57.5171, "eval_samples_per_second": 174.609, "eval_steps_per_second": 2.73, "step": 14000 }, { "epoch": 10.615711252653927, "grad_norm": 8.524171829223633, "learning_rate": 6.495412844036698e-07, "loss": 0.4007, "step": 15000 }, { "epoch": 10.615711252653927, "eval_accuracy": 0.7528626904311461, "eval_f1": 0.7947742682321812, "eval_loss": 0.38538095355033875, "eval_precision": 0.6883414494414208, "eval_recall": 0.9401408450704225, "eval_runtime": 57.8102, "eval_samples_per_second": 173.724, "eval_steps_per_second": 2.716, "step": 15000 }, { "epoch": 11.32342533616419, "grad_norm": 4.774406909942627, "learning_rate": 5.44692005242464e-07, "loss": 0.3936, "step": 16000 }, { "epoch": 11.32342533616419, "eval_accuracy": 0.7545554117295629, "eval_f1": 0.793083186434987, "eval_loss": 0.38386669754981995, "eval_precision": 0.6946037347448905, "eval_recall": 0.9241001564945227, "eval_runtime": 57.8533, "eval_samples_per_second": 173.594, "eval_steps_per_second": 2.714, "step": 16000 }, { "epoch": 12.031139419674451, "grad_norm": 4.318003177642822, "learning_rate": 4.398427260812582e-07, "loss": 0.3915, "step": 17000 }, { "epoch": 12.031139419674451, "eval_accuracy": 0.7562481330279797, "eval_f1": 0.7931034482758621, "eval_loss": 0.3860309422016144, "eval_precision": 0.6982142857142857, "eval_recall": 0.9178403755868545, "eval_runtime": 57.8089, "eval_samples_per_second": 173.728, "eval_steps_per_second": 2.716, "step": 17000 }, { "epoch": 12.738853503184714, "grad_norm": 15.07564926147461, "learning_rate": 3.3499344692005245e-07, "loss": 0.3888, "step": 18000 }, { "epoch": 12.738853503184714, "eval_accuracy": 0.7568455640744798, "eval_f1": 0.7816523605150214, "eval_loss": 0.3812848627567291, "eval_precision": 0.7198616600790514, "eval_recall": 0.8550469483568075, "eval_runtime": 57.8126, "eval_samples_per_second": 173.716, "eval_steps_per_second": 2.716, "step": 18000 }, { "epoch": 13.446567586694975, "grad_norm": 10.069337844848633, "learning_rate": 2.3014416775884665e-07, "loss": 0.3832, "step": 19000 }, { "epoch": 13.446567586694975, "eval_accuracy": 0.7569451359155631, "eval_f1": 0.7900576244947106, "eval_loss": 0.37964996695518494, "eval_precision": 0.7049884881043745, "eval_recall": 0.8984741784037559, "eval_runtime": 57.6196, "eval_samples_per_second": 174.298, "eval_steps_per_second": 2.725, "step": 19000 }, { "epoch": 14.154281670205236, "grad_norm": 8.055023193359375, "learning_rate": 1.252948885976409e-07, "loss": 0.383, "step": 20000 }, { "epoch": 14.154281670205236, "eval_accuracy": 0.7573434232798965, "eval_f1": 0.7904016513288037, "eval_loss": 0.3802996873855591, "eval_precision": 0.7052954719877207, "eval_recall": 0.8988654147104851, "eval_runtime": 57.777, "eval_samples_per_second": 173.824, "eval_steps_per_second": 2.717, "step": 20000 }, { "epoch": 14.8619957537155, "grad_norm": 2.661689281463623, "learning_rate": 2.0445609436435123e-08, "loss": 0.3823, "step": 21000 }, { "epoch": 14.8619957537155, "eval_accuracy": 0.7576421388031465, "eval_f1": 0.7906056434962148, "eval_loss": 0.37897399067878723, "eval_precision": 0.7056203931203932, "eval_recall": 0.8988654147104851, "eval_runtime": 57.6891, "eval_samples_per_second": 174.088, "eval_steps_per_second": 2.721, "step": 21000 } ], "logging_steps": 1000, "max_steps": 21195, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.173409489802339e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }