{ "best_metric": 0.5068484544754028, "best_model_checkpoint": "/mnt/beegfs/farid/mlora/outputs/xnli/aya-101/zh/rank4_lr5e-5/checkpoint-4000", "epoch": 0.24445893089960888, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020371577574967405, "grad_norm": 2.87671160697937, "learning_rate": 4.166666666666667e-05, "loss": 1.201, "step": 500 }, { "epoch": 0.020371577574967405, "eval_accuracy": 0.3718875502008032, "eval_f1": 0.3144388665500335, "eval_loss": 1.0995594263076782, "eval_runtime": 413.743, "eval_samples_per_second": 6.018, "eval_steps_per_second": 0.377, "step": 500 }, { "epoch": 0.04074315514993481, "grad_norm": 9.780874252319336, "learning_rate": 4.62962962962963e-05, "loss": 1.0225, "step": 1000 }, { "epoch": 0.04074315514993481, "eval_accuracy": 0.6277108433734939, "eval_f1": 0.6294051779002606, "eval_loss": 0.9010089039802551, "eval_runtime": 411.7026, "eval_samples_per_second": 6.048, "eval_steps_per_second": 0.379, "step": 1000 }, { "epoch": 0.06111473272490222, "grad_norm": 54.40706253051758, "learning_rate": 4.166666666666667e-05, "loss": 0.7434, "step": 1500 }, { "epoch": 0.06111473272490222, "eval_accuracy": 0.748995983935743, "eval_f1": 0.7514237869702255, "eval_loss": 0.6406136155128479, "eval_runtime": 411.3183, "eval_samples_per_second": 6.054, "eval_steps_per_second": 0.379, "step": 1500 }, { "epoch": 0.08148631029986962, "grad_norm": 11.607203483581543, "learning_rate": 3.7037037037037037e-05, "loss": 0.6288, "step": 2000 }, { "epoch": 0.08148631029986962, "eval_accuracy": 0.7738955823293173, "eval_f1": 0.7758546821907705, "eval_loss": 0.5777280926704407, "eval_runtime": 411.4602, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.379, "step": 2000 }, { "epoch": 0.10185788787483703, "grad_norm": 5.107800483703613, "learning_rate": 3.240740740740741e-05, "loss": 0.6186, "step": 2500 }, { "epoch": 0.10185788787483703, "eval_accuracy": 0.7847389558232932, "eval_f1": 0.7871483289805999, "eval_loss": 0.5597745180130005, "eval_runtime": 411.7146, "eval_samples_per_second": 6.048, "eval_steps_per_second": 0.379, "step": 2500 }, { "epoch": 0.12222946544980444, "grad_norm": 7.775627136230469, "learning_rate": 2.777777777777778e-05, "loss": 0.5909, "step": 3000 }, { "epoch": 0.12222946544980444, "eval_accuracy": 0.7963855421686747, "eval_f1": 0.7956904592219637, "eval_loss": 0.5284361839294434, "eval_runtime": 411.1781, "eval_samples_per_second": 6.056, "eval_steps_per_second": 0.379, "step": 3000 }, { "epoch": 0.14260104302477183, "grad_norm": 18.797571182250977, "learning_rate": 2.314814814814815e-05, "loss": 0.58, "step": 3500 }, { "epoch": 0.14260104302477183, "eval_accuracy": 0.7891566265060241, "eval_f1": 0.7909172071907363, "eval_loss": 0.5231069922447205, "eval_runtime": 412.124, "eval_samples_per_second": 6.042, "eval_steps_per_second": 0.379, "step": 3500 }, { "epoch": 0.16297262059973924, "grad_norm": 5.233774185180664, "learning_rate": 1.8518518518518518e-05, "loss": 0.5752, "step": 4000 }, { "epoch": 0.16297262059973924, "eval_accuracy": 0.8028112449799196, "eval_f1": 0.8033343776283127, "eval_loss": 0.5068484544754028, "eval_runtime": 411.2741, "eval_samples_per_second": 6.054, "eval_steps_per_second": 0.379, "step": 4000 }, { "epoch": 0.18334419817470665, "grad_norm": 5.612044334411621, "learning_rate": 1.388888888888889e-05, "loss": 0.5495, "step": 4500 }, { "epoch": 0.18334419817470665, "eval_accuracy": 0.8016064257028113, "eval_f1": 0.8027178095880526, "eval_loss": 0.5132325887680054, "eval_runtime": 411.4311, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.379, "step": 4500 }, { "epoch": 0.20371577574967406, "grad_norm": 5.75107479095459, "learning_rate": 9.259259259259259e-06, "loss": 0.5594, "step": 5000 }, { "epoch": 0.20371577574967406, "eval_accuracy": 0.8016064257028113, "eval_f1": 0.802067784242174, "eval_loss": 0.52597576379776, "eval_runtime": 412.0059, "eval_samples_per_second": 6.044, "eval_steps_per_second": 0.379, "step": 5000 }, { "epoch": 0.22408735332464147, "grad_norm": 4.854472637176514, "learning_rate": 4.6296296296296296e-06, "loss": 0.5561, "step": 5500 }, { "epoch": 0.22408735332464147, "eval_accuracy": 0.8016064257028113, "eval_f1": 0.8025734250007549, "eval_loss": 0.5094338059425354, "eval_runtime": 411.5334, "eval_samples_per_second": 6.051, "eval_steps_per_second": 0.379, "step": 5500 }, { "epoch": 0.24445893089960888, "grad_norm": 6.345026016235352, "learning_rate": 0.0, "loss": 0.5454, "step": 6000 }, { "epoch": 0.24445893089960888, "eval_accuracy": 0.804417670682731, "eval_f1": 0.8051855538804084, "eval_loss": 0.5121804475784302, "eval_runtime": 411.4571, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.379, "step": 6000 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 8.03166870528e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }