josecannete's picture
adding model finetuned on PAWS-X
7ba6ead
{
"best_metric": 0.2576049864292145,
"best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_xlarge/epochs_3_bs_32_lr_5e-6/checkpoint-1200",
"epoch": 3.0,
"global_step": 4632,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"eval_accuracy": 0.7799999713897705,
"eval_loss": 0.4942445755004883,
"eval_runtime": 10.4303,
"eval_samples_per_second": 191.749,
"eval_steps_per_second": 6.04,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 4.465673575129534e-06,
"loss": 0.4488,
"step": 500
},
{
"epoch": 0.39,
"eval_accuracy": 0.8755000233650208,
"eval_loss": 0.3036494255065918,
"eval_runtime": 10.411,
"eval_samples_per_second": 192.104,
"eval_steps_per_second": 6.051,
"step": 600
},
{
"epoch": 0.58,
"eval_accuracy": 0.8794999718666077,
"eval_loss": 0.29540833830833435,
"eval_runtime": 10.4508,
"eval_samples_per_second": 191.372,
"eval_steps_per_second": 6.028,
"step": 900
},
{
"epoch": 0.65,
"learning_rate": 3.925949913644214e-06,
"loss": 0.2328,
"step": 1000
},
{
"epoch": 0.78,
"eval_accuracy": 0.9075000286102295,
"eval_loss": 0.2576049864292145,
"eval_runtime": 10.4132,
"eval_samples_per_second": 192.063,
"eval_steps_per_second": 6.05,
"step": 1200
},
{
"epoch": 0.97,
"learning_rate": 3.386226252158895e-06,
"loss": 0.2015,
"step": 1500
},
{
"epoch": 0.97,
"eval_accuracy": 0.8995000123977661,
"eval_loss": 0.2740257680416107,
"eval_runtime": 10.4482,
"eval_samples_per_second": 191.42,
"eval_steps_per_second": 6.03,
"step": 1500
},
{
"epoch": 1.17,
"eval_accuracy": 0.9020000100135803,
"eval_loss": 0.2881057858467102,
"eval_runtime": 10.4618,
"eval_samples_per_second": 191.171,
"eval_steps_per_second": 6.022,
"step": 1800
},
{
"epoch": 1.3,
"learning_rate": 2.8465025906735755e-06,
"loss": 0.1539,
"step": 2000
},
{
"epoch": 1.36,
"eval_accuracy": 0.906499981880188,
"eval_loss": 0.2683846056461334,
"eval_runtime": 10.4536,
"eval_samples_per_second": 191.321,
"eval_steps_per_second": 6.027,
"step": 2100
},
{
"epoch": 1.55,
"eval_accuracy": 0.906000018119812,
"eval_loss": 0.34197333455085754,
"eval_runtime": 10.4733,
"eval_samples_per_second": 190.961,
"eval_steps_per_second": 6.015,
"step": 2400
},
{
"epoch": 1.62,
"learning_rate": 2.3078583765112265e-06,
"loss": 0.1428,
"step": 2500
},
{
"epoch": 1.75,
"eval_accuracy": 0.906499981880188,
"eval_loss": 0.3059616684913635,
"eval_runtime": 10.4777,
"eval_samples_per_second": 190.882,
"eval_steps_per_second": 6.013,
"step": 2700
},
{
"epoch": 1.94,
"learning_rate": 1.7681347150259068e-06,
"loss": 0.1399,
"step": 3000
},
{
"epoch": 1.94,
"eval_accuracy": 0.9020000100135803,
"eval_loss": 0.2935360372066498,
"eval_runtime": 10.5064,
"eval_samples_per_second": 190.361,
"eval_steps_per_second": 5.996,
"step": 3000
},
{
"epoch": 2.14,
"eval_accuracy": 0.909500002861023,
"eval_loss": 0.3417232036590576,
"eval_runtime": 10.4876,
"eval_samples_per_second": 190.702,
"eval_steps_per_second": 6.007,
"step": 3300
},
{
"epoch": 2.27,
"learning_rate": 1.2284110535405874e-06,
"loss": 0.1069,
"step": 3500
},
{
"epoch": 2.33,
"eval_accuracy": 0.8999999761581421,
"eval_loss": 0.4427025616168976,
"eval_runtime": 10.5396,
"eval_samples_per_second": 189.761,
"eval_steps_per_second": 5.977,
"step": 3600
},
{
"epoch": 2.53,
"eval_accuracy": 0.9039999842643738,
"eval_loss": 0.40283679962158203,
"eval_runtime": 10.5439,
"eval_samples_per_second": 189.684,
"eval_steps_per_second": 5.975,
"step": 3900
},
{
"epoch": 2.59,
"learning_rate": 6.886873920552678e-07,
"loss": 0.1009,
"step": 4000
},
{
"epoch": 2.72,
"eval_accuracy": 0.9024999737739563,
"eval_loss": 0.4096806049346924,
"eval_runtime": 10.4683,
"eval_samples_per_second": 191.052,
"eval_steps_per_second": 6.018,
"step": 4200
},
{
"epoch": 2.91,
"learning_rate": 1.5004317789291884e-07,
"loss": 0.0936,
"step": 4500
},
{
"epoch": 2.91,
"eval_accuracy": 0.9070000052452087,
"eval_loss": 0.4148932099342346,
"eval_runtime": 10.465,
"eval_samples_per_second": 191.113,
"eval_steps_per_second": 6.02,
"step": 4500
},
{
"epoch": 3.0,
"step": 4632,
"total_flos": 4794349372791840.0,
"train_loss": 0.17779028992166998,
"train_runtime": 4326.2693,
"train_samples_per_second": 34.257,
"train_steps_per_second": 1.071
}
],
"max_steps": 4632,
"num_train_epochs": 3,
"total_flos": 4794349372791840.0,
"trial_name": null,
"trial_params": null
}