|
{ |
|
"best_metric": 72.47706422018348, |
|
"best_model_checkpoint": "outputs/adapter/superglue-boolq/checkpoint-3835", |
|
"epoch": 20.0, |
|
"global_step": 5900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 62.62996941896024, |
|
"eval_average_metrics": 62.62996941896024, |
|
"eval_loss": 0.2848173677921295, |
|
"eval_runtime": 4.5081, |
|
"eval_samples_per_second": 362.682, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00027457627118644066, |
|
"loss": 0.3238, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 62.62996941896024, |
|
"eval_average_metrics": 62.62996941896024, |
|
"eval_loss": 0.27746617794036865, |
|
"eval_runtime": 4.4839, |
|
"eval_samples_per_second": 364.64, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 62.62996941896024, |
|
"eval_average_metrics": 62.62996941896024, |
|
"eval_loss": 0.2767568826675415, |
|
"eval_runtime": 4.4969, |
|
"eval_samples_per_second": 363.585, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00024915254237288135, |
|
"loss": 0.2896, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 62.75229357798165, |
|
"eval_average_metrics": 62.75229357798165, |
|
"eval_loss": 0.2767893373966217, |
|
"eval_runtime": 4.7449, |
|
"eval_samples_per_second": 344.579, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 63.36391437308868, |
|
"eval_average_metrics": 63.36391437308868, |
|
"eval_loss": 0.26716169714927673, |
|
"eval_runtime": 4.7487, |
|
"eval_samples_per_second": 344.302, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.000223728813559322, |
|
"loss": 0.2823, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 66.11620795107034, |
|
"eval_average_metrics": 66.11620795107034, |
|
"eval_loss": 0.2606015205383301, |
|
"eval_runtime": 4.7538, |
|
"eval_samples_per_second": 343.936, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0001983050847457627, |
|
"loss": 0.2715, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 69.0519877675841, |
|
"eval_average_metrics": 69.0519877675841, |
|
"eval_loss": 0.25209498405456543, |
|
"eval_runtime": 4.7471, |
|
"eval_samples_per_second": 344.422, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 69.2354740061162, |
|
"eval_average_metrics": 69.2354740061162, |
|
"eval_loss": 0.25652435421943665, |
|
"eval_runtime": 4.7461, |
|
"eval_samples_per_second": 344.493, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0001728813559322034, |
|
"loss": 0.236, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 71.0091743119266, |
|
"eval_average_metrics": 71.0091743119266, |
|
"eval_loss": 0.24900275468826294, |
|
"eval_runtime": 4.7529, |
|
"eval_samples_per_second": 344.002, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 71.80428134556574, |
|
"eval_average_metrics": 71.80428134556574, |
|
"eval_loss": 0.2635628283023834, |
|
"eval_runtime": 4.7533, |
|
"eval_samples_per_second": 343.974, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00014745762711864405, |
|
"loss": 0.2038, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 70.70336391437309, |
|
"eval_average_metrics": 70.70336391437309, |
|
"eval_loss": 0.26442670822143555, |
|
"eval_runtime": 4.7661, |
|
"eval_samples_per_second": 343.05, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00012203389830508474, |
|
"loss": 0.1743, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 71.49847094801223, |
|
"eval_average_metrics": 71.49847094801223, |
|
"eval_loss": 0.2672346830368042, |
|
"eval_runtime": 4.761, |
|
"eval_samples_per_second": 343.414, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 72.47706422018348, |
|
"eval_average_metrics": 72.47706422018348, |
|
"eval_loss": 0.27757981419563293, |
|
"eval_runtime": 4.7532, |
|
"eval_samples_per_second": 343.982, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 9.661016949152541e-05, |
|
"loss": 0.1493, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 70.8868501529052, |
|
"eval_average_metrics": 70.8868501529052, |
|
"eval_loss": 0.29447805881500244, |
|
"eval_runtime": 4.7492, |
|
"eval_samples_per_second": 344.268, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 70.21406727828746, |
|
"eval_average_metrics": 70.21406727828746, |
|
"eval_loss": 0.2965507209300995, |
|
"eval_runtime": 4.7633, |
|
"eval_samples_per_second": 343.251, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 7.11864406779661e-05, |
|
"loss": 0.1336, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 72.29357798165138, |
|
"eval_average_metrics": 72.29357798165138, |
|
"eval_loss": 0.3127536177635193, |
|
"eval_runtime": 4.7675, |
|
"eval_samples_per_second": 342.95, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.576271186440678e-05, |
|
"loss": 0.1166, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 70.9480122324159, |
|
"eval_average_metrics": 70.9480122324159, |
|
"eval_loss": 0.3478758931159973, |
|
"eval_runtime": 4.7792, |
|
"eval_samples_per_second": 342.105, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 72.17125382262996, |
|
"eval_average_metrics": 72.17125382262996, |
|
"eval_loss": 0.33715757727622986, |
|
"eval_runtime": 4.7549, |
|
"eval_samples_per_second": 343.858, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 2.0338983050847455e-05, |
|
"loss": 0.1059, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 71.92660550458716, |
|
"eval_average_metrics": 71.92660550458716, |
|
"eval_loss": 0.34181272983551025, |
|
"eval_runtime": 4.753, |
|
"eval_samples_per_second": 343.993, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 71.80428134556574, |
|
"eval_average_metrics": 71.80428134556574, |
|
"eval_loss": 0.34913983941078186, |
|
"eval_runtime": 4.7538, |
|
"eval_samples_per_second": 343.937, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5900, |
|
"total_flos": 5.78564930408256e+16, |
|
"train_loss": 0.2005054286374884, |
|
"train_runtime": 1293.5612, |
|
"train_samples_per_second": 145.753, |
|
"train_steps_per_second": 4.561 |
|
} |
|
], |
|
"max_steps": 5900, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.78564930408256e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|