|
{ |
|
"best_metric": 0.8354740061162079, |
|
"best_model_checkpoint": "checkpoints/boolq-roberta/checkpoint-15930", |
|
"epoch": 54.0, |
|
"global_step": 15930, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5525993883792049, |
|
"eval_loss": 0.6860924959182739, |
|
"eval_runtime": 9.8191, |
|
"eval_samples_per_second": 333.024, |
|
"eval_steps_per_second": 41.654, |
|
"step": 295 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.5525993883792049, |
|
"epoch": 1.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0068813559322033905, |
|
"loss": 0.768, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6259938837920489, |
|
"eval_loss": 0.6413273811340332, |
|
"eval_runtime": 9.8093, |
|
"eval_samples_per_second": 333.357, |
|
"eval_steps_per_second": 41.695, |
|
"step": 590 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.6259938837920489, |
|
"epoch": 2.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7058103975535168, |
|
"eval_loss": 0.7142531871795654, |
|
"eval_runtime": 9.821, |
|
"eval_samples_per_second": 332.961, |
|
"eval_steps_per_second": 41.646, |
|
"step": 885 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.7058103975535168, |
|
"epoch": 3.0, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00676271186440678, |
|
"loss": 0.7341, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7798165137614679, |
|
"eval_loss": 0.4908980429172516, |
|
"eval_runtime": 9.8251, |
|
"eval_samples_per_second": 332.822, |
|
"eval_steps_per_second": 41.628, |
|
"step": 1180 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.7798165137614679, |
|
"epoch": 4.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7865443425076453, |
|
"eval_loss": 0.4921702444553375, |
|
"eval_runtime": 9.8323, |
|
"eval_samples_per_second": 332.579, |
|
"eval_steps_per_second": 41.598, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.7865443425076453, |
|
"epoch": 5.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00664406779661017, |
|
"loss": 0.6102, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.791743119266055, |
|
"eval_loss": 0.5190699696540833, |
|
"eval_runtime": 9.8121, |
|
"eval_samples_per_second": 333.263, |
|
"eval_steps_per_second": 41.683, |
|
"step": 1770 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.791743119266055, |
|
"epoch": 6.0, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.006525423728813559, |
|
"loss": 0.5638, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7602446483180428, |
|
"eval_loss": 0.5134357810020447, |
|
"eval_runtime": 9.8046, |
|
"eval_samples_per_second": 333.516, |
|
"eval_steps_per_second": 41.715, |
|
"step": 2065 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.791743119266055, |
|
"epoch": 7.0, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8048929663608563, |
|
"eval_loss": 0.4786987006664276, |
|
"eval_runtime": 9.8048, |
|
"eval_samples_per_second": 333.508, |
|
"eval_steps_per_second": 41.714, |
|
"step": 2360 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.8048929663608563, |
|
"epoch": 8.0, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.006406779661016949, |
|
"loss": 0.5123, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8036697247706422, |
|
"eval_loss": 0.4702308177947998, |
|
"eval_runtime": 9.8236, |
|
"eval_samples_per_second": 332.873, |
|
"eval_steps_per_second": 41.635, |
|
"step": 2655 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.8048929663608563, |
|
"epoch": 9.0, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.791743119266055, |
|
"eval_loss": 0.5443930625915527, |
|
"eval_runtime": 9.8027, |
|
"eval_samples_per_second": 333.58, |
|
"eval_steps_per_second": 41.723, |
|
"step": 2950 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.8048929663608563, |
|
"epoch": 10.0, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.006288135593220339, |
|
"loss": 0.5045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7871559633027523, |
|
"eval_loss": 0.5409664511680603, |
|
"eval_runtime": 9.8211, |
|
"eval_samples_per_second": 332.955, |
|
"eval_steps_per_second": 41.645, |
|
"step": 3245 |
|
}, |
|
{ |
|
"best_epoch": 7, |
|
"best_eval_accuracy": 0.8048929663608563, |
|
"epoch": 11.0, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.006169491525423729, |
|
"loss": 0.4583, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8162079510703364, |
|
"eval_loss": 0.460715115070343, |
|
"eval_runtime": 9.8053, |
|
"eval_samples_per_second": 333.493, |
|
"eval_steps_per_second": 41.712, |
|
"step": 3540 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.8162079510703364, |
|
"epoch": 12.0, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8079510703363915, |
|
"eval_loss": 0.5161741375923157, |
|
"eval_runtime": 9.8021, |
|
"eval_samples_per_second": 333.603, |
|
"eval_steps_per_second": 41.726, |
|
"step": 3835 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.8162079510703364, |
|
"epoch": 13.0, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.006050847457627119, |
|
"loss": 0.4401, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.826605504587156, |
|
"eval_loss": 0.5039022564888, |
|
"eval_runtime": 9.7965, |
|
"eval_samples_per_second": 333.792, |
|
"eval_steps_per_second": 41.75, |
|
"step": 4130 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 14.0, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.807645259938838, |
|
"eval_loss": 0.4858939051628113, |
|
"eval_runtime": 9.8364, |
|
"eval_samples_per_second": 332.437, |
|
"eval_steps_per_second": 41.58, |
|
"step": 4425 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 15.0, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.005932203389830509, |
|
"loss": 0.4317, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8006116207951071, |
|
"eval_loss": 0.5990845561027527, |
|
"eval_runtime": 9.8097, |
|
"eval_samples_per_second": 333.344, |
|
"eval_steps_per_second": 41.694, |
|
"step": 4720 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 16.0, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.005813559322033898, |
|
"loss": 0.3821, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8214067278287461, |
|
"eval_loss": 0.6158734560012817, |
|
"eval_runtime": 9.8135, |
|
"eval_samples_per_second": 333.213, |
|
"eval_steps_per_second": 41.677, |
|
"step": 5015 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 17.0, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8024464831804281, |
|
"eval_loss": 0.6013011336326599, |
|
"eval_runtime": 9.8051, |
|
"eval_samples_per_second": 333.5, |
|
"eval_steps_per_second": 41.713, |
|
"step": 5310 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 18.0, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.005694915254237288, |
|
"loss": 0.3534, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8226299694189603, |
|
"eval_loss": 0.5256037712097168, |
|
"eval_runtime": 9.8062, |
|
"eval_samples_per_second": 333.464, |
|
"eval_steps_per_second": 41.708, |
|
"step": 5605 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 19.0, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8256880733944955, |
|
"eval_loss": 0.6150120496749878, |
|
"eval_runtime": 9.8251, |
|
"eval_samples_per_second": 332.821, |
|
"eval_steps_per_second": 41.628, |
|
"step": 5900 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.826605504587156, |
|
"epoch": 20.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 0.005576271186440678, |
|
"loss": 0.337, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.828440366972477, |
|
"eval_loss": 0.5820947885513306, |
|
"eval_runtime": 9.8077, |
|
"eval_samples_per_second": 333.413, |
|
"eval_steps_per_second": 41.702, |
|
"step": 6195 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.828440366972477, |
|
"epoch": 21.0, |
|
"step": 6195 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.82782874617737, |
|
"eval_loss": 0.5438685417175293, |
|
"eval_runtime": 9.8107, |
|
"eval_samples_per_second": 333.309, |
|
"eval_steps_per_second": 41.689, |
|
"step": 6490 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.828440366972477, |
|
"epoch": 22.0, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 0.005457627118644068, |
|
"loss": 0.3002, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8302752293577982, |
|
"eval_loss": 0.568505048751831, |
|
"eval_runtime": 9.8907, |
|
"eval_samples_per_second": 330.614, |
|
"eval_steps_per_second": 41.352, |
|
"step": 6785 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 23.0, |
|
"step": 6785 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 0.005338983050847458, |
|
"loss": 0.2875, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8125382262996942, |
|
"eval_loss": 0.6513988375663757, |
|
"eval_runtime": 9.8218, |
|
"eval_samples_per_second": 332.932, |
|
"eval_steps_per_second": 41.642, |
|
"step": 7080 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 24.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8223241590214068, |
|
"eval_loss": 0.6830200552940369, |
|
"eval_runtime": 9.8098, |
|
"eval_samples_per_second": 333.34, |
|
"eval_steps_per_second": 41.693, |
|
"step": 7375 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 25.0, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 0.005220338983050848, |
|
"loss": 0.2724, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8207951070336391, |
|
"eval_loss": 0.6568824052810669, |
|
"eval_runtime": 9.8777, |
|
"eval_samples_per_second": 331.049, |
|
"eval_steps_per_second": 41.406, |
|
"step": 7670 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 26.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8275229357798165, |
|
"eval_loss": 0.6946196556091309, |
|
"eval_runtime": 9.7866, |
|
"eval_samples_per_second": 334.131, |
|
"eval_steps_per_second": 41.792, |
|
"step": 7965 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 27.0, |
|
"step": 7965 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 0.005101694915254237, |
|
"loss": 0.2599, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8229357798165138, |
|
"eval_loss": 0.7936107516288757, |
|
"eval_runtime": 9.8279, |
|
"eval_samples_per_second": 332.727, |
|
"eval_steps_per_second": 41.616, |
|
"step": 8260 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 28.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 0.004983050847457627, |
|
"loss": 0.2504, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8287461773700305, |
|
"eval_loss": 0.6681709885597229, |
|
"eval_runtime": 9.8102, |
|
"eval_samples_per_second": 333.327, |
|
"eval_steps_per_second": 41.691, |
|
"step": 8555 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 29.0, |
|
"step": 8555 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8226299694189603, |
|
"eval_loss": 0.7363224029541016, |
|
"eval_runtime": 9.8095, |
|
"eval_samples_per_second": 333.349, |
|
"eval_steps_per_second": 41.694, |
|
"step": 8850 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 30.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 30.51, |
|
"learning_rate": 0.004864406779661017, |
|
"loss": 0.2333, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8287461773700305, |
|
"eval_loss": 0.6868186593055725, |
|
"eval_runtime": 9.835, |
|
"eval_samples_per_second": 332.487, |
|
"eval_steps_per_second": 41.586, |
|
"step": 9145 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 31.0, |
|
"step": 9145 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8299694189602447, |
|
"eval_loss": 0.7315894365310669, |
|
"eval_runtime": 9.8164, |
|
"eval_samples_per_second": 333.117, |
|
"eval_steps_per_second": 41.665, |
|
"step": 9440 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 32.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 0.0047457627118644066, |
|
"loss": 0.2116, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8235474006116208, |
|
"eval_loss": 0.7920485734939575, |
|
"eval_runtime": 9.8087, |
|
"eval_samples_per_second": 333.379, |
|
"eval_steps_per_second": 41.698, |
|
"step": 9735 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 33.0, |
|
"step": 9735 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"learning_rate": 0.004627118644067797, |
|
"loss": 0.2016, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8302752293577982, |
|
"eval_loss": 0.8122466802597046, |
|
"eval_runtime": 9.8089, |
|
"eval_samples_per_second": 333.371, |
|
"eval_steps_per_second": 41.697, |
|
"step": 10030 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 34.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8235474006116208, |
|
"eval_loss": 0.8033488988876343, |
|
"eval_runtime": 9.8046, |
|
"eval_samples_per_second": 333.516, |
|
"eval_steps_per_second": 41.715, |
|
"step": 10325 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 35.0, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 35.59, |
|
"learning_rate": 0.004508474576271186, |
|
"loss": 0.2053, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8275229357798165, |
|
"eval_loss": 0.8479099273681641, |
|
"eval_runtime": 9.8042, |
|
"eval_samples_per_second": 333.529, |
|
"eval_steps_per_second": 41.717, |
|
"step": 10620 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 36.0, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.826605504587156, |
|
"eval_loss": 0.7674300074577332, |
|
"eval_runtime": 9.8042, |
|
"eval_samples_per_second": 333.532, |
|
"eval_steps_per_second": 41.717, |
|
"step": 10915 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 37.0, |
|
"step": 10915 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"learning_rate": 0.004389830508474577, |
|
"loss": 0.1784, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8302752293577982, |
|
"eval_loss": 0.7334691882133484, |
|
"eval_runtime": 9.8101, |
|
"eval_samples_per_second": 333.331, |
|
"eval_steps_per_second": 41.692, |
|
"step": 11210 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 38.0, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"learning_rate": 0.004271186440677966, |
|
"loss": 0.1742, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8244648318042813, |
|
"eval_loss": 0.8439134955406189, |
|
"eval_runtime": 9.8448, |
|
"eval_samples_per_second": 332.155, |
|
"eval_steps_per_second": 41.545, |
|
"step": 11505 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 39.0, |
|
"step": 11505 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8269113149847095, |
|
"eval_loss": 0.7226125597953796, |
|
"eval_runtime": 9.7971, |
|
"eval_samples_per_second": 333.772, |
|
"eval_steps_per_second": 41.747, |
|
"step": 11800 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 40.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 0.0041525423728813555, |
|
"loss": 0.1607, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8262996941896025, |
|
"eval_loss": 0.783122181892395, |
|
"eval_runtime": 9.8034, |
|
"eval_samples_per_second": 333.559, |
|
"eval_steps_per_second": 41.72, |
|
"step": 12095 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 41.0, |
|
"step": 12095 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8204892966360856, |
|
"eval_loss": 0.8774231672286987, |
|
"eval_runtime": 9.8122, |
|
"eval_samples_per_second": 333.26, |
|
"eval_steps_per_second": 41.683, |
|
"step": 12390 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 42.0, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"learning_rate": 0.004033898305084746, |
|
"loss": 0.1482, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.8201834862385321, |
|
"eval_loss": 0.9140273928642273, |
|
"eval_runtime": 9.8202, |
|
"eval_samples_per_second": 332.988, |
|
"eval_steps_per_second": 41.649, |
|
"step": 12685 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 43.0, |
|
"step": 12685 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8293577981651377, |
|
"eval_loss": 1.0441981554031372, |
|
"eval_runtime": 9.8317, |
|
"eval_samples_per_second": 332.596, |
|
"eval_steps_per_second": 41.6, |
|
"step": 12980 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 44.0, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 44.07, |
|
"learning_rate": 0.003915254237288135, |
|
"loss": 0.1353, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.8204892966360856, |
|
"eval_loss": 0.9284624457359314, |
|
"eval_runtime": 9.8086, |
|
"eval_samples_per_second": 333.381, |
|
"eval_steps_per_second": 41.698, |
|
"step": 13275 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 45.0, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 0.0037966101694915256, |
|
"loss": 0.1442, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8296636085626912, |
|
"eval_loss": 0.9967836141586304, |
|
"eval_runtime": 9.7985, |
|
"eval_samples_per_second": 333.726, |
|
"eval_steps_per_second": 41.741, |
|
"step": 13570 |
|
}, |
|
{ |
|
"best_epoch": 22, |
|
"best_eval_accuracy": 0.8302752293577982, |
|
"epoch": 46.0, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.969611406326294, |
|
"eval_runtime": 9.8112, |
|
"eval_samples_per_second": 333.294, |
|
"eval_steps_per_second": 41.687, |
|
"step": 13865 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 47.0, |
|
"step": 13865 |
|
}, |
|
{ |
|
"epoch": 47.46, |
|
"learning_rate": 0.0036779661016949155, |
|
"loss": 0.1308, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.9742096662521362, |
|
"eval_runtime": 9.8104, |
|
"eval_samples_per_second": 333.319, |
|
"eval_steps_per_second": 41.69, |
|
"step": 14160 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 48.0, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8324159021406727, |
|
"eval_loss": 0.9350805282592773, |
|
"eval_runtime": 9.8421, |
|
"eval_samples_per_second": 332.247, |
|
"eval_steps_per_second": 41.556, |
|
"step": 14455 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 49.0, |
|
"step": 14455 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 0.003559322033898305, |
|
"loss": 0.12, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8232415902140673, |
|
"eval_loss": 1.0194205045700073, |
|
"eval_runtime": 9.8031, |
|
"eval_samples_per_second": 333.567, |
|
"eval_steps_per_second": 41.721, |
|
"step": 14750 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 50.0, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"learning_rate": 0.0034406779661016952, |
|
"loss": 0.1151, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 1.0169928073883057, |
|
"eval_runtime": 9.8124, |
|
"eval_samples_per_second": 333.252, |
|
"eval_steps_per_second": 41.682, |
|
"step": 15045 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 51.0, |
|
"step": 15045 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8324159021406727, |
|
"eval_loss": 0.9195823073387146, |
|
"eval_runtime": 9.8284, |
|
"eval_samples_per_second": 332.708, |
|
"eval_steps_per_second": 41.614, |
|
"step": 15340 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 52.0, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 52.54, |
|
"learning_rate": 0.003322033898305085, |
|
"loss": 0.1081, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.8290519877675842, |
|
"eval_loss": 0.9286013245582581, |
|
"eval_runtime": 9.8253, |
|
"eval_samples_per_second": 332.815, |
|
"eval_steps_per_second": 41.627, |
|
"step": 15635 |
|
}, |
|
{ |
|
"best_epoch": 46, |
|
"best_eval_accuracy": 0.8333333333333334, |
|
"epoch": 53.0, |
|
"step": 15635 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8354740061162079, |
|
"eval_loss": 0.9297749400138855, |
|
"eval_runtime": 9.8216, |
|
"eval_samples_per_second": 332.938, |
|
"eval_steps_per_second": 41.643, |
|
"step": 15930 |
|
}, |
|
{ |
|
"best_epoch": 53, |
|
"best_eval_accuracy": 0.8354740061162079, |
|
"epoch": 54.0, |
|
"step": 15930 |
|
} |
|
], |
|
"max_steps": 29500, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.1860177898825626e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|