{ "best_metric": 60.099999999999994, "best_model_checkpoint": "outputs/soft_prompt/t5-base/sst2/checkpoint-5800", "epoch": 3.0, "global_step": 6222, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_accuracy": 0.0, "eval_average_metrics": 0.0, "eval_loss": 1.9322336912155151, "eval_runtime": 9.7016, "eval_samples_per_second": 103.076, "step": 200 }, { "epoch": 0.19, "eval_accuracy": 51.0, "eval_average_metrics": 51.0, "eval_loss": 0.7396745681762695, "eval_runtime": 3.3022, "eval_samples_per_second": 302.832, "step": 400 }, { "epoch": 0.24, "learning_rate": 0.00027589199614271937, "loss": 2.5059, "step": 500 }, { "epoch": 0.29, "eval_accuracy": 55.2, "eval_average_metrics": 55.2, "eval_loss": 0.5560445785522461, "eval_runtime": 3.2155, "eval_samples_per_second": 310.993, "step": 600 }, { "epoch": 0.39, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.463209867477417, "eval_runtime": 3.1314, "eval_samples_per_second": 319.345, "step": 800 }, { "epoch": 0.48, "learning_rate": 0.0002517839922854387, "loss": 0.7329, "step": 1000 }, { "epoch": 0.48, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.4116092920303345, "eval_runtime": 3.3786, "eval_samples_per_second": 295.977, "step": 1000 }, { "epoch": 0.58, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.38179224729537964, "eval_runtime": 3.2857, "eval_samples_per_second": 304.351, "step": 1200 }, { "epoch": 0.68, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.35022690892219543, "eval_runtime": 2.8293, "eval_samples_per_second": 353.443, "step": 1400 }, { "epoch": 0.72, "learning_rate": 0.00022767598842815813, "loss": 0.4828, "step": 1500 }, { "epoch": 0.77, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.3595990538597107, "eval_runtime": 3.2567, "eval_samples_per_second": 307.055, "step": 1600 }, { "epoch": 0.87, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.3511555790901184, "eval_runtime": 3.0816, "eval_samples_per_second": 324.506, "step": 1800 }, { "epoch": 0.96, "learning_rate": 0.00020356798457087753, "loss": 0.421, "step": 2000 }, { "epoch": 0.96, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.3314475417137146, "eval_runtime": 2.522, "eval_samples_per_second": 396.513, "step": 2000 }, { "epoch": 1.06, "eval_accuracy": 55.300000000000004, "eval_average_metrics": 55.300000000000004, "eval_loss": 0.3108561336994171, "eval_runtime": 2.7063, "eval_samples_per_second": 369.512, "step": 2200 }, { "epoch": 1.16, "eval_accuracy": 55.7, "eval_average_metrics": 55.7, "eval_loss": 0.2974016070365906, "eval_runtime": 3.5599, "eval_samples_per_second": 280.91, "step": 2400 }, { "epoch": 1.21, "learning_rate": 0.0001794599807135969, "loss": 0.3931, "step": 2500 }, { "epoch": 1.25, "eval_accuracy": 55.7, "eval_average_metrics": 55.7, "eval_loss": 0.3047039210796356, "eval_runtime": 3.0837, "eval_samples_per_second": 324.282, "step": 2600 }, { "epoch": 1.35, "eval_accuracy": 55.900000000000006, "eval_average_metrics": 55.900000000000006, "eval_loss": 0.2895060181617737, "eval_runtime": 3.9994, "eval_samples_per_second": 250.039, "step": 2800 }, { "epoch": 1.45, "learning_rate": 0.00015535197685631627, "loss": 0.3777, "step": 3000 }, { "epoch": 1.45, "eval_accuracy": 56.2, "eval_average_metrics": 56.2, "eval_loss": 0.28209826350212097, "eval_runtime": 3.5924, "eval_samples_per_second": 278.366, "step": 3000 }, { "epoch": 1.54, "eval_accuracy": 57.4, "eval_average_metrics": 57.4, "eval_loss": 0.2806238532066345, "eval_runtime": 3.1947, "eval_samples_per_second": 313.019, "step": 3200 }, { "epoch": 1.64, "eval_accuracy": 56.699999999999996, "eval_average_metrics": 56.699999999999996, "eval_loss": 0.2809857428073883, "eval_runtime": 3.3046, "eval_samples_per_second": 302.612, "step": 3400 }, { "epoch": 1.69, "learning_rate": 0.00013124397299903566, "loss": 0.372, "step": 3500 }, { "epoch": 1.74, "eval_accuracy": 56.89999999999999, "eval_average_metrics": 56.89999999999999, "eval_loss": 0.2799268066883087, "eval_runtime": 3.5652, "eval_samples_per_second": 280.491, "step": 3600 }, { "epoch": 1.83, "eval_accuracy": 56.599999999999994, "eval_average_metrics": 56.599999999999994, "eval_loss": 0.2795256972312927, "eval_runtime": 3.5541, "eval_samples_per_second": 281.363, "step": 3800 }, { "epoch": 1.93, "learning_rate": 0.00010713596914175504, "loss": 0.3629, "step": 4000 }, { "epoch": 1.93, "eval_accuracy": 57.099999999999994, "eval_average_metrics": 57.099999999999994, "eval_loss": 0.27900928258895874, "eval_runtime": 3.42, "eval_samples_per_second": 292.396, "step": 4000 }, { "epoch": 2.03, "eval_accuracy": 57.699999999999996, "eval_average_metrics": 57.699999999999996, "eval_loss": 0.27878034114837646, "eval_runtime": 3.38, "eval_samples_per_second": 295.859, "step": 4200 }, { "epoch": 2.12, "eval_accuracy": 57.699999999999996, "eval_average_metrics": 57.699999999999996, "eval_loss": 0.27947279810905457, "eval_runtime": 3.1427, "eval_samples_per_second": 318.196, "step": 4400 }, { "epoch": 2.17, "learning_rate": 8.302796528447444e-05, "loss": 0.3597, "step": 4500 }, { "epoch": 2.22, "eval_accuracy": 57.99999999999999, "eval_average_metrics": 57.99999999999999, "eval_loss": 0.2773045599460602, "eval_runtime": 3.1388, "eval_samples_per_second": 318.593, "step": 4600 }, { "epoch": 2.31, "eval_accuracy": 59.8, "eval_average_metrics": 59.8, "eval_loss": 0.2791491448879242, "eval_runtime": 3.8832, "eval_samples_per_second": 257.519, "step": 4800 }, { "epoch": 2.41, "learning_rate": 5.891996142719383e-05, "loss": 0.3555, "step": 5000 }, { "epoch": 2.41, "eval_accuracy": 59.4, "eval_average_metrics": 59.4, "eval_loss": 0.2792259454727173, "eval_runtime": 3.3994, "eval_samples_per_second": 294.169, "step": 5000 }, { "epoch": 2.51, "eval_accuracy": 59.3, "eval_average_metrics": 59.3, "eval_loss": 0.27831217646598816, "eval_runtime": 3.2184, "eval_samples_per_second": 310.718, "step": 5200 }, { "epoch": 2.6, "eval_accuracy": 59.199999999999996, "eval_average_metrics": 59.199999999999996, "eval_loss": 0.2772988975048065, "eval_runtime": 3.2951, "eval_samples_per_second": 303.485, "step": 5400 }, { "epoch": 2.65, "learning_rate": 3.481195756991321e-05, "loss": 0.3528, "step": 5500 }, { "epoch": 2.7, "eval_accuracy": 59.9, "eval_average_metrics": 59.9, "eval_loss": 0.2777373790740967, "eval_runtime": 3.3995, "eval_samples_per_second": 294.161, "step": 5600 }, { "epoch": 2.8, "eval_accuracy": 60.099999999999994, "eval_average_metrics": 60.099999999999994, "eval_loss": 0.2783910036087036, "eval_runtime": 3.8322, "eval_samples_per_second": 260.948, "step": 5800 }, { "epoch": 2.89, "learning_rate": 1.0703953712632592e-05, "loss": 0.3503, "step": 6000 }, { "epoch": 2.89, "eval_accuracy": 59.699999999999996, "eval_average_metrics": 59.699999999999996, "eval_loss": 0.27744776010513306, "eval_runtime": 3.3726, "eval_samples_per_second": 296.511, "step": 6000 }, { "epoch": 2.99, "eval_accuracy": 59.9, "eval_average_metrics": 59.9, "eval_loss": 0.27743667364120483, "eval_runtime": 3.6434, "eval_samples_per_second": 274.47, "step": 6200 }, { "epoch": 3.0, "step": 6222, "total_flos": 1.316605032958464e+16, "train_loss": 0.5804944922785328, "train_runtime": 1254.7674, "train_samples_per_second": 158.633, "train_steps_per_second": 4.959 } ], "max_steps": 6222, "num_train_epochs": 3, "total_flos": 1.316605032958464e+16, "trial_name": null, "trial_params": null }