|
{ |
|
"best_metric": 60.099999999999994, |
|
"best_model_checkpoint": "outputs/soft_prompt/t5-base/sst2/checkpoint-5800", |
|
"epoch": 3.0, |
|
"global_step": 6222, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.0, |
|
"eval_average_metrics": 0.0, |
|
"eval_loss": 1.9322336912155151, |
|
"eval_runtime": 9.7016, |
|
"eval_samples_per_second": 103.076, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 51.0, |
|
"eval_average_metrics": 51.0, |
|
"eval_loss": 0.7396745681762695, |
|
"eval_runtime": 3.3022, |
|
"eval_samples_per_second": 302.832, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027589199614271937, |
|
"loss": 2.5059, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 55.2, |
|
"eval_average_metrics": 55.2, |
|
"eval_loss": 0.5560445785522461, |
|
"eval_runtime": 3.2155, |
|
"eval_samples_per_second": 310.993, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.463209867477417, |
|
"eval_runtime": 3.1314, |
|
"eval_samples_per_second": 319.345, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002517839922854387, |
|
"loss": 0.7329, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.4116092920303345, |
|
"eval_runtime": 3.3786, |
|
"eval_samples_per_second": 295.977, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.38179224729537964, |
|
"eval_runtime": 3.2857, |
|
"eval_samples_per_second": 304.351, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.35022690892219543, |
|
"eval_runtime": 2.8293, |
|
"eval_samples_per_second": 353.443, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022767598842815813, |
|
"loss": 0.4828, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.3595990538597107, |
|
"eval_runtime": 3.2567, |
|
"eval_samples_per_second": 307.055, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.3511555790901184, |
|
"eval_runtime": 3.0816, |
|
"eval_samples_per_second": 324.506, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020356798457087753, |
|
"loss": 0.421, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.3314475417137146, |
|
"eval_runtime": 2.522, |
|
"eval_samples_per_second": 396.513, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 55.300000000000004, |
|
"eval_average_metrics": 55.300000000000004, |
|
"eval_loss": 0.3108561336994171, |
|
"eval_runtime": 2.7063, |
|
"eval_samples_per_second": 369.512, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 55.7, |
|
"eval_average_metrics": 55.7, |
|
"eval_loss": 0.2974016070365906, |
|
"eval_runtime": 3.5599, |
|
"eval_samples_per_second": 280.91, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001794599807135969, |
|
"loss": 0.3931, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 55.7, |
|
"eval_average_metrics": 55.7, |
|
"eval_loss": 0.3047039210796356, |
|
"eval_runtime": 3.0837, |
|
"eval_samples_per_second": 324.282, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 55.900000000000006, |
|
"eval_average_metrics": 55.900000000000006, |
|
"eval_loss": 0.2895060181617737, |
|
"eval_runtime": 3.9994, |
|
"eval_samples_per_second": 250.039, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015535197685631627, |
|
"loss": 0.3777, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 56.2, |
|
"eval_average_metrics": 56.2, |
|
"eval_loss": 0.28209826350212097, |
|
"eval_runtime": 3.5924, |
|
"eval_samples_per_second": 278.366, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 57.4, |
|
"eval_average_metrics": 57.4, |
|
"eval_loss": 0.2806238532066345, |
|
"eval_runtime": 3.1947, |
|
"eval_samples_per_second": 313.019, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 56.699999999999996, |
|
"eval_average_metrics": 56.699999999999996, |
|
"eval_loss": 0.2809857428073883, |
|
"eval_runtime": 3.3046, |
|
"eval_samples_per_second": 302.612, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013124397299903566, |
|
"loss": 0.372, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 56.89999999999999, |
|
"eval_average_metrics": 56.89999999999999, |
|
"eval_loss": 0.2799268066883087, |
|
"eval_runtime": 3.5652, |
|
"eval_samples_per_second": 280.491, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 56.599999999999994, |
|
"eval_average_metrics": 56.599999999999994, |
|
"eval_loss": 0.2795256972312927, |
|
"eval_runtime": 3.5541, |
|
"eval_samples_per_second": 281.363, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010713596914175504, |
|
"loss": 0.3629, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 57.099999999999994, |
|
"eval_average_metrics": 57.099999999999994, |
|
"eval_loss": 0.27900928258895874, |
|
"eval_runtime": 3.42, |
|
"eval_samples_per_second": 292.396, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 57.699999999999996, |
|
"eval_average_metrics": 57.699999999999996, |
|
"eval_loss": 0.27878034114837646, |
|
"eval_runtime": 3.38, |
|
"eval_samples_per_second": 295.859, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 57.699999999999996, |
|
"eval_average_metrics": 57.699999999999996, |
|
"eval_loss": 0.27947279810905457, |
|
"eval_runtime": 3.1427, |
|
"eval_samples_per_second": 318.196, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.302796528447444e-05, |
|
"loss": 0.3597, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 57.99999999999999, |
|
"eval_average_metrics": 57.99999999999999, |
|
"eval_loss": 0.2773045599460602, |
|
"eval_runtime": 3.1388, |
|
"eval_samples_per_second": 318.593, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 59.8, |
|
"eval_average_metrics": 59.8, |
|
"eval_loss": 0.2791491448879242, |
|
"eval_runtime": 3.8832, |
|
"eval_samples_per_second": 257.519, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.891996142719383e-05, |
|
"loss": 0.3555, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 59.4, |
|
"eval_average_metrics": 59.4, |
|
"eval_loss": 0.2792259454727173, |
|
"eval_runtime": 3.3994, |
|
"eval_samples_per_second": 294.169, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 59.3, |
|
"eval_average_metrics": 59.3, |
|
"eval_loss": 0.27831217646598816, |
|
"eval_runtime": 3.2184, |
|
"eval_samples_per_second": 310.718, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 59.199999999999996, |
|
"eval_average_metrics": 59.199999999999996, |
|
"eval_loss": 0.2772988975048065, |
|
"eval_runtime": 3.2951, |
|
"eval_samples_per_second": 303.485, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.481195756991321e-05, |
|
"loss": 0.3528, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 59.9, |
|
"eval_average_metrics": 59.9, |
|
"eval_loss": 0.2777373790740967, |
|
"eval_runtime": 3.3995, |
|
"eval_samples_per_second": 294.161, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 60.099999999999994, |
|
"eval_average_metrics": 60.099999999999994, |
|
"eval_loss": 0.2783910036087036, |
|
"eval_runtime": 3.8322, |
|
"eval_samples_per_second": 260.948, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0703953712632592e-05, |
|
"loss": 0.3503, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 59.699999999999996, |
|
"eval_average_metrics": 59.699999999999996, |
|
"eval_loss": 0.27744776010513306, |
|
"eval_runtime": 3.3726, |
|
"eval_samples_per_second": 296.511, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 59.9, |
|
"eval_average_metrics": 59.9, |
|
"eval_loss": 0.27743667364120483, |
|
"eval_runtime": 3.6434, |
|
"eval_samples_per_second": 274.47, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6222, |
|
"total_flos": 1.316605032958464e+16, |
|
"train_loss": 0.5804944922785328, |
|
"train_runtime": 1254.7674, |
|
"train_samples_per_second": 158.633, |
|
"train_steps_per_second": 4.959 |
|
} |
|
], |
|
"max_steps": 6222, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.316605032958464e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|