|
{ |
|
"best_metric": 0.2576049864292145, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_xlarge/epochs_3_bs_32_lr_5e-6/checkpoint-1200", |
|
"epoch": 3.0, |
|
"global_step": 4632, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.7799999713897705, |
|
"eval_loss": 0.4942445755004883, |
|
"eval_runtime": 10.4303, |
|
"eval_samples_per_second": 191.749, |
|
"eval_steps_per_second": 6.04, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.465673575129534e-06, |
|
"loss": 0.4488, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.8755000233650208, |
|
"eval_loss": 0.3036494255065918, |
|
"eval_runtime": 10.411, |
|
"eval_samples_per_second": 192.104, |
|
"eval_steps_per_second": 6.051, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.8794999718666077, |
|
"eval_loss": 0.29540833830833435, |
|
"eval_runtime": 10.4508, |
|
"eval_samples_per_second": 191.372, |
|
"eval_steps_per_second": 6.028, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.925949913644214e-06, |
|
"loss": 0.2328, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.9075000286102295, |
|
"eval_loss": 0.2576049864292145, |
|
"eval_runtime": 10.4132, |
|
"eval_samples_per_second": 192.063, |
|
"eval_steps_per_second": 6.05, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.386226252158895e-06, |
|
"loss": 0.2015, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.8995000123977661, |
|
"eval_loss": 0.2740257680416107, |
|
"eval_runtime": 10.4482, |
|
"eval_samples_per_second": 191.42, |
|
"eval_steps_per_second": 6.03, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.9020000100135803, |
|
"eval_loss": 0.2881057858467102, |
|
"eval_runtime": 10.4618, |
|
"eval_samples_per_second": 191.171, |
|
"eval_steps_per_second": 6.022, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8465025906735755e-06, |
|
"loss": 0.1539, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.906499981880188, |
|
"eval_loss": 0.2683846056461334, |
|
"eval_runtime": 10.4536, |
|
"eval_samples_per_second": 191.321, |
|
"eval_steps_per_second": 6.027, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.906000018119812, |
|
"eval_loss": 0.34197333455085754, |
|
"eval_runtime": 10.4733, |
|
"eval_samples_per_second": 190.961, |
|
"eval_steps_per_second": 6.015, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3078583765112265e-06, |
|
"loss": 0.1428, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.906499981880188, |
|
"eval_loss": 0.3059616684913635, |
|
"eval_runtime": 10.4777, |
|
"eval_samples_per_second": 190.882, |
|
"eval_steps_per_second": 6.013, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7681347150259068e-06, |
|
"loss": 0.1399, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.9020000100135803, |
|
"eval_loss": 0.2935360372066498, |
|
"eval_runtime": 10.5064, |
|
"eval_samples_per_second": 190.361, |
|
"eval_steps_per_second": 5.996, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.909500002861023, |
|
"eval_loss": 0.3417232036590576, |
|
"eval_runtime": 10.4876, |
|
"eval_samples_per_second": 190.702, |
|
"eval_steps_per_second": 6.007, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2284110535405874e-06, |
|
"loss": 0.1069, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.8999999761581421, |
|
"eval_loss": 0.4427025616168976, |
|
"eval_runtime": 10.5396, |
|
"eval_samples_per_second": 189.761, |
|
"eval_steps_per_second": 5.977, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.9039999842643738, |
|
"eval_loss": 0.40283679962158203, |
|
"eval_runtime": 10.5439, |
|
"eval_samples_per_second": 189.684, |
|
"eval_steps_per_second": 5.975, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.886873920552678e-07, |
|
"loss": 0.1009, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.9024999737739563, |
|
"eval_loss": 0.4096806049346924, |
|
"eval_runtime": 10.4683, |
|
"eval_samples_per_second": 191.052, |
|
"eval_steps_per_second": 6.018, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5004317789291884e-07, |
|
"loss": 0.0936, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.9070000052452087, |
|
"eval_loss": 0.4148932099342346, |
|
"eval_runtime": 10.465, |
|
"eval_samples_per_second": 191.113, |
|
"eval_steps_per_second": 6.02, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4632, |
|
"total_flos": 4794349372791840.0, |
|
"train_loss": 0.17779028992166998, |
|
"train_runtime": 4326.2693, |
|
"train_samples_per_second": 34.257, |
|
"train_steps_per_second": 1.071 |
|
} |
|
], |
|
"max_steps": 4632, |
|
"num_train_epochs": 3, |
|
"total_flos": 4794349372791840.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|