{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 20, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2e-05, "loss": 0.9626, "step": 5 }, { "epoch": 0.17, "learning_rate": 2e-05, "loss": 0.7551, "step": 10 }, { "epoch": 0.25, "learning_rate": 2e-05, "loss": 0.6737, "step": 15 }, { "epoch": 0.33, "learning_rate": 2e-05, "loss": 0.5923, "step": 20 }, { "epoch": 0.33, "eval_f1": 0.2973817897616256, "eval_loss": 0.6075014472007751, "eval_runtime": 2.0329, "eval_samples_per_second": 464.848, "eval_steps_per_second": 14.757, "step": 20 }, { "epoch": 0.42, "learning_rate": 2e-05, "loss": 0.5935, "step": 25 }, { "epoch": 0.5, "learning_rate": 2e-05, "loss": 0.6499, "step": 30 }, { "epoch": 0.58, "learning_rate": 2e-05, "loss": 0.5639, "step": 35 }, { "epoch": 0.67, "learning_rate": 2e-05, "loss": 0.6694, "step": 40 }, { "epoch": 0.67, "eval_f1": 0.2973817897616256, "eval_loss": 0.5937851071357727, "eval_runtime": 2.0028, "eval_samples_per_second": 471.842, "eval_steps_per_second": 14.979, "step": 40 }, { "epoch": 0.75, "learning_rate": 2e-05, "loss": 0.5923, "step": 45 }, { "epoch": 0.83, "learning_rate": 2e-05, "loss": 0.6133, "step": 50 }, { "epoch": 0.92, "learning_rate": 2e-05, "loss": 0.5702, "step": 55 }, { "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.5225, "step": 60 }, { "epoch": 1.0, "eval_f1": 0.3024223872822884, "eval_loss": 0.534581184387207, "eval_runtime": 2.0063, "eval_samples_per_second": 471.005, "eval_steps_per_second": 14.953, "step": 60 }, { "epoch": 1.08, "learning_rate": 2e-05, "loss": 0.4961, "step": 65 }, { "epoch": 1.17, "learning_rate": 2e-05, "loss": 0.4879, "step": 70 }, { "epoch": 1.25, "learning_rate": 2e-05, "loss": 0.5475, "step": 75 }, { "epoch": 1.33, "learning_rate": 2e-05, "loss": 0.5458, "step": 80 }, { "epoch": 1.33, "eval_f1": 0.4681950604946577, "eval_loss": 0.5171502828598022, "eval_runtime": 2.0079, "eval_samples_per_second": 470.651, "eval_steps_per_second": 14.941, "step": 80 }, { "epoch": 1.42, "learning_rate": 2e-05, "loss": 0.5224, "step": 85 }, { "epoch": 1.5, "learning_rate": 2e-05, "loss": 0.5344, "step": 90 }, { "epoch": 1.58, "learning_rate": 2e-05, "loss": 0.4791, "step": 95 }, { "epoch": 1.67, "learning_rate": 2e-05, "loss": 0.5052, "step": 100 }, { "epoch": 1.67, "eval_f1": 0.43614802647951817, "eval_loss": 0.5550346970558167, "eval_runtime": 2.0143, "eval_samples_per_second": 469.148, "eval_steps_per_second": 14.894, "step": 100 }, { "epoch": 1.75, "learning_rate": 2e-05, "loss": 0.5821, "step": 105 }, { "epoch": 1.83, "learning_rate": 2e-05, "loss": 0.4842, "step": 110 }, { "epoch": 1.92, "learning_rate": 2e-05, "loss": 0.4613, "step": 115 }, { "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.3689, "step": 120 }, { "epoch": 2.0, "eval_f1": 0.4394838882203503, "eval_loss": 0.5187910795211792, "eval_runtime": 2.0147, "eval_samples_per_second": 469.048, "eval_steps_per_second": 14.89, "step": 120 }, { "epoch": 2.08, "learning_rate": 2e-05, "loss": 0.4172, "step": 125 }, { "epoch": 2.17, "learning_rate": 2e-05, "loss": 0.4645, "step": 130 }, { "epoch": 2.25, "learning_rate": 2e-05, "loss": 0.4628, "step": 135 }, { "epoch": 2.33, "learning_rate": 2e-05, "loss": 0.4097, "step": 140 }, { "epoch": 2.33, "eval_f1": 0.4676343432076015, "eval_loss": 0.4918379485607147, "eval_runtime": 2.021, "eval_samples_per_second": 467.58, "eval_steps_per_second": 14.844, "step": 140 }, { "epoch": 2.42, "learning_rate": 2e-05, "loss": 0.4716, "step": 145 }, { "epoch": 2.5, "learning_rate": 2e-05, "loss": 0.4947, "step": 150 }, { "epoch": 2.58, "learning_rate": 2e-05, "loss": 0.4288, "step": 155 }, { "epoch": 2.67, "learning_rate": 2e-05, "loss": 0.4904, "step": 160 }, { "epoch": 2.67, "eval_f1": 0.4640191084468284, "eval_loss": 0.4755867123603821, "eval_runtime": 2.0186, "eval_samples_per_second": 468.144, "eval_steps_per_second": 14.862, "step": 160 }, { "epoch": 2.75, "learning_rate": 2e-05, "loss": 0.4391, "step": 165 }, { "epoch": 2.83, "learning_rate": 2e-05, "loss": 0.408, "step": 170 }, { "epoch": 2.92, "learning_rate": 2e-05, "loss": 0.4724, "step": 175 }, { "epoch": 3.0, "learning_rate": 2e-05, "loss": 0.3696, "step": 180 }, { "epoch": 3.0, "eval_f1": 0.46883968839688395, "eval_loss": 0.47884294390678406, "eval_runtime": 2.0223, "eval_samples_per_second": 467.285, "eval_steps_per_second": 14.834, "step": 180 }, { "epoch": 3.08, "learning_rate": 2e-05, "loss": 0.4094, "step": 185 }, { "epoch": 3.17, "learning_rate": 2e-05, "loss": 0.4162, "step": 190 }, { "epoch": 3.25, "learning_rate": 2e-05, "loss": 0.3975, "step": 195 }, { "epoch": 3.33, "learning_rate": 2e-05, "loss": 0.3631, "step": 200 }, { "epoch": 3.33, "eval_f1": 0.4735017608176137, "eval_loss": 0.4751051366329193, "eval_runtime": 2.0238, "eval_samples_per_second": 466.941, "eval_steps_per_second": 14.824, "step": 200 }, { "epoch": 3.42, "learning_rate": 2e-05, "loss": 0.3953, "step": 205 }, { "epoch": 3.5, "learning_rate": 2e-05, "loss": 0.3781, "step": 210 }, { "epoch": 3.58, "learning_rate": 2e-05, "loss": 0.4447, "step": 215 }, { "epoch": 3.67, "learning_rate": 2e-05, "loss": 0.4345, "step": 220 }, { "epoch": 3.67, "eval_f1": 0.4796518931606489, "eval_loss": 0.4609261453151703, "eval_runtime": 2.0228, "eval_samples_per_second": 467.18, "eval_steps_per_second": 14.831, "step": 220 }, { "epoch": 3.75, "learning_rate": 2e-05, "loss": 0.3724, "step": 225 }, { "epoch": 3.83, "learning_rate": 2e-05, "loss": 0.366, "step": 230 }, { "epoch": 3.92, "learning_rate": 2e-05, "loss": 0.3763, "step": 235 }, { "epoch": 4.0, "learning_rate": 2e-05, "loss": 0.3398, "step": 240 }, { "epoch": 4.0, "eval_f1": 0.5672850456534463, "eval_loss": 0.46373221278190613, "eval_runtime": 2.0191, "eval_samples_per_second": 468.022, "eval_steps_per_second": 14.858, "step": 240 }, { "epoch": 4.08, "learning_rate": 2e-05, "loss": 0.3861, "step": 245 }, { "epoch": 4.17, "learning_rate": 2e-05, "loss": 0.315, "step": 250 }, { "epoch": 4.25, "learning_rate": 2e-05, "loss": 0.3501, "step": 255 }, { "epoch": 4.33, "learning_rate": 2e-05, "loss": 0.2985, "step": 260 }, { "epoch": 4.33, "eval_f1": 0.5091622097836045, "eval_loss": 0.4849531948566437, "eval_runtime": 2.0286, "eval_samples_per_second": 465.831, "eval_steps_per_second": 14.788, "step": 260 }, { "epoch": 4.42, "learning_rate": 2e-05, "loss": 0.3734, "step": 265 }, { "epoch": 4.5, "learning_rate": 2e-05, "loss": 0.3658, "step": 270 }, { "epoch": 4.58, "learning_rate": 2e-05, "loss": 0.3268, "step": 275 }, { "epoch": 4.67, "learning_rate": 2e-05, "loss": 0.3248, "step": 280 }, { "epoch": 4.67, "eval_f1": 0.6056820231448513, "eval_loss": 0.508940577507019, "eval_runtime": 2.0241, "eval_samples_per_second": 466.871, "eval_steps_per_second": 14.821, "step": 280 }, { "epoch": 4.75, "learning_rate": 2e-05, "loss": 0.3573, "step": 285 }, { "epoch": 4.83, "learning_rate": 2e-05, "loss": 0.3298, "step": 290 }, { "epoch": 4.92, "learning_rate": 2e-05, "loss": 0.2909, "step": 295 }, { "epoch": 5.0, "learning_rate": 2e-05, "loss": 0.3323, "step": 300 }, { "epoch": 5.0, "eval_f1": 0.6159893659893659, "eval_loss": 0.46433570981025696, "eval_runtime": 2.0165, "eval_samples_per_second": 468.623, "eval_steps_per_second": 14.877, "step": 300 }, { "epoch": 5.0, "step": 300, "total_flos": 9950949458472960.0, "train_loss": 0.4641021112600962, "train_runtime": 274.9964, "train_samples_per_second": 137.529, "train_steps_per_second": 1.091 } ], "logging_steps": 5, "max_steps": 300, "num_train_epochs": 5, "save_steps": 20, "total_flos": 9950949458472960.0, "trial_name": null, "trial_params": null }