|
{ |
|
"best_metric": 72.47706422018348, |
|
"best_model_checkpoint": "outputs/adapter/superglue-boolq/checkpoint-3835", |
|
"epoch": 20.0, |
|
"global_step": 5900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 62.62996941896024, |
|
"eval_average_metrics": 62.62996941896024, |
|
"eval_loss": 0.2848173677921295, |
|
"eval_runtime": 4.504, |
|
"eval_samples_per_second": 363.014, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00027457627118644066, |
|
"loss": 0.3238, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 62.62996941896024, |
|
"eval_average_metrics": 62.62996941896024, |
|
"eval_loss": 0.27746617794036865, |
|
"eval_runtime": 4.5022, |
|
"eval_samples_per_second": 363.153, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 62.62996941896024, |
|
"eval_average_metrics": 62.62996941896024, |
|
"eval_loss": 0.2767568826675415, |
|
"eval_runtime": 4.4934, |
|
"eval_samples_per_second": 363.871, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00024915254237288135, |
|
"loss": 0.2896, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 62.75229357798165, |
|
"eval_average_metrics": 62.75229357798165, |
|
"eval_loss": 0.2767893373966217, |
|
"eval_runtime": 4.7772, |
|
"eval_samples_per_second": 342.253, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 63.36391437308868, |
|
"eval_average_metrics": 63.36391437308868, |
|
"eval_loss": 0.26716169714927673, |
|
"eval_runtime": 4.7616, |
|
"eval_samples_per_second": 343.37, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.000223728813559322, |
|
"loss": 0.2823, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 66.11620795107034, |
|
"eval_average_metrics": 66.11620795107034, |
|
"eval_loss": 0.2606015205383301, |
|
"eval_runtime": 4.7573, |
|
"eval_samples_per_second": 343.683, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0001983050847457627, |
|
"loss": 0.2715, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 69.0519877675841, |
|
"eval_average_metrics": 69.0519877675841, |
|
"eval_loss": 0.25209498405456543, |
|
"eval_runtime": 4.7545, |
|
"eval_samples_per_second": 343.882, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 69.2354740061162, |
|
"eval_average_metrics": 69.2354740061162, |
|
"eval_loss": 0.25652435421943665, |
|
"eval_runtime": 4.7648, |
|
"eval_samples_per_second": 343.14, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0001728813559322034, |
|
"loss": 0.236, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 71.0091743119266, |
|
"eval_average_metrics": 71.0091743119266, |
|
"eval_loss": 0.24900275468826294, |
|
"eval_runtime": 4.7724, |
|
"eval_samples_per_second": 342.596, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 71.80428134556574, |
|
"eval_average_metrics": 71.80428134556574, |
|
"eval_loss": 0.2635628283023834, |
|
"eval_runtime": 4.7656, |
|
"eval_samples_per_second": 343.086, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00014745762711864405, |
|
"loss": 0.2038, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 70.70336391437309, |
|
"eval_average_metrics": 70.70336391437309, |
|
"eval_loss": 0.26442670822143555, |
|
"eval_runtime": 4.7895, |
|
"eval_samples_per_second": 341.37, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00012203389830508474, |
|
"loss": 0.1743, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 71.49847094801223, |
|
"eval_average_metrics": 71.49847094801223, |
|
"eval_loss": 0.2672346830368042, |
|
"eval_runtime": 4.7696, |
|
"eval_samples_per_second": 342.793, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 72.47706422018348, |
|
"eval_average_metrics": 72.47706422018348, |
|
"eval_loss": 0.27757981419563293, |
|
"eval_runtime": 4.7935, |
|
"eval_samples_per_second": 341.085, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 9.661016949152541e-05, |
|
"loss": 0.1493, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 70.8868501529052, |
|
"eval_average_metrics": 70.8868501529052, |
|
"eval_loss": 0.29447805881500244, |
|
"eval_runtime": 4.7712, |
|
"eval_samples_per_second": 342.681, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 70.21406727828746, |
|
"eval_average_metrics": 70.21406727828746, |
|
"eval_loss": 0.2965507209300995, |
|
"eval_runtime": 4.7813, |
|
"eval_samples_per_second": 341.96, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 7.11864406779661e-05, |
|
"loss": 0.1336, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 72.29357798165138, |
|
"eval_average_metrics": 72.29357798165138, |
|
"eval_loss": 0.3127536177635193, |
|
"eval_runtime": 4.7677, |
|
"eval_samples_per_second": 342.93, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.576271186440678e-05, |
|
"loss": 0.1166, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 70.9480122324159, |
|
"eval_average_metrics": 70.9480122324159, |
|
"eval_loss": 0.3478758931159973, |
|
"eval_runtime": 4.7648, |
|
"eval_samples_per_second": 343.144, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 72.17125382262996, |
|
"eval_average_metrics": 72.17125382262996, |
|
"eval_loss": 0.33715757727622986, |
|
"eval_runtime": 4.7601, |
|
"eval_samples_per_second": 343.478, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 2.0338983050847455e-05, |
|
"loss": 0.1059, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 71.92660550458716, |
|
"eval_average_metrics": 71.92660550458716, |
|
"eval_loss": 0.34181272983551025, |
|
"eval_runtime": 4.7537, |
|
"eval_samples_per_second": 343.942, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 71.80428134556574, |
|
"eval_average_metrics": 71.80428134556574, |
|
"eval_loss": 0.34913983941078186, |
|
"eval_runtime": 4.7707, |
|
"eval_samples_per_second": 342.716, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5900, |
|
"total_flos": 5.78564930408256e+16, |
|
"train_loss": 0.2005054286374884, |
|
"train_runtime": 1452.5651, |
|
"train_samples_per_second": 129.798, |
|
"train_steps_per_second": 4.062 |
|
} |
|
], |
|
"max_steps": 5900, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.78564930408256e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|