{ "best_metric": 78.2193331519916, "best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_large_model/checkpoint-2760", "epoch": 5.0, "eval_steps": 500, "global_step": 3450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 690, "train_exact_match": 56.043956043956044, "train_f1": 73.10238632897224, "train_runtime": 28.967, "train_samples_per_second": 43.567, "train_steps_per_second": 1.588 }, { "epoch": 1.0, "grad_norm": 34.386253356933594, "learning_rate": 1e-05, "loss": 3.1538, "step": 690 }, { "epoch": 1.0, "eval_exact_match": 53.28125, "eval_f1": 68.90784509260168, "eval_runtime": 92.5664, "eval_samples_per_second": 43.774, "eval_steps_per_second": 1.566, "step": 690 }, { "epoch": 2.0, "step": 1380, "train_exact_match": 71.22877122877122, "train_f1": 84.717686864544, "train_runtime": 29.1704, "train_samples_per_second": 43.64, "train_steps_per_second": 1.577 }, { "epoch": 2.0, "grad_norm": 118.70819854736328, "learning_rate": 7.500000000000001e-06, "loss": 1.105, "step": 1380 }, { "epoch": 2.0, "eval_exact_match": 63.1875, "eval_f1": 77.04679698006159, "eval_runtime": 92.8422, "eval_samples_per_second": 43.644, "eval_steps_per_second": 1.562, "step": 1380 }, { "epoch": 3.0, "step": 2070, "train_exact_match": 76.62337662337663, "train_f1": 88.54201062497397, "train_runtime": 29.7625, "train_samples_per_second": 43.511, "train_steps_per_second": 1.579 }, { "epoch": 3.0, "grad_norm": 41.994937896728516, "learning_rate": 5e-06, "loss": 0.7535, "step": 2070 }, { "epoch": 3.0, "eval_exact_match": 64.125, "eval_f1": 77.84371511436862, "eval_runtime": 92.7563, "eval_samples_per_second": 43.684, "eval_steps_per_second": 1.563, "step": 2070 }, { "epoch": 4.0, "step": 2760, "train_exact_match": 82.01798201798202, "train_f1": 91.71025821891249, "train_runtime": 29.7394, "train_samples_per_second": 43.545, "train_steps_per_second": 1.58 }, { "epoch": 4.0, "grad_norm": 416.8191223144531, "learning_rate": 2.5e-06, "loss": 0.5495, "step": 2760 }, { "epoch": 4.0, "eval_exact_match": 65.03125, "eval_f1": 78.2193331519916, "eval_runtime": 92.5461, "eval_samples_per_second": 43.784, "eval_steps_per_second": 1.567, "step": 2760 }, { "epoch": 5.0, "step": 3450, "train_exact_match": 83.71628371628371, "train_f1": 92.89072366821784, "train_runtime": 28.6071, "train_samples_per_second": 43.311, "train_steps_per_second": 1.573 }, { "epoch": 5.0, "grad_norm": 5.402435779571533, "learning_rate": 0.0, "loss": 0.418, "step": 3450 }, { "epoch": 5.0, "eval_exact_match": 64.875, "eval_f1": 78.00354149557015, "eval_runtime": 92.9043, "eval_samples_per_second": 43.615, "eval_steps_per_second": 1.561, "step": 3450 }, { "epoch": 5.0, "step": 3450, "total_flos": 6.719078999672064e+16, "train_loss": 1.1959592426687047, "train_runtime": 6180.983, "train_samples_per_second": 15.607, "train_steps_per_second": 0.558 } ], "logging_steps": 500, "max_steps": 3450, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 6.719078999672064e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }