RaushanTurganbay's picture
Upload folder using huggingface_hub
e4b5812
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.776119402985074,
"eval_steps": 200,
"global_step": 3600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"learning_rate": 2.5e-06,
"loss": 0.4891,
"step": 200
},
{
"epoch": 0.27,
"eval_accuracy": 0.7833955223880597,
"eval_loss": 0.4603855013847351,
"eval_runtime": 253.4527,
"eval_samples_per_second": 63.444,
"eval_steps_per_second": 10.574,
"step": 200
},
{
"epoch": 0.53,
"learning_rate": 5e-06,
"loss": 0.4781,
"step": 400
},
{
"epoch": 0.53,
"eval_accuracy": 0.7914179104477612,
"eval_loss": 0.44968461990356445,
"eval_runtime": 253.7178,
"eval_samples_per_second": 63.378,
"eval_steps_per_second": 10.563,
"step": 400
},
{
"epoch": 0.8,
"learning_rate": 4.722222222222222e-06,
"loss": 0.4697,
"step": 600
},
{
"epoch": 0.8,
"eval_accuracy": 0.7952114427860697,
"eval_loss": 0.43988558650016785,
"eval_runtime": 253.4986,
"eval_samples_per_second": 63.432,
"eval_steps_per_second": 10.572,
"step": 600
},
{
"epoch": 1.06,
"learning_rate": 4.444444444444444e-06,
"loss": 0.459,
"step": 800
},
{
"epoch": 1.06,
"eval_accuracy": 0.8071517412935323,
"eval_loss": 0.4225695729255676,
"eval_runtime": 256.265,
"eval_samples_per_second": 62.748,
"eval_steps_per_second": 10.458,
"step": 800
},
{
"epoch": 1.33,
"learning_rate": 4.166666666666667e-06,
"loss": 0.4369,
"step": 1000
},
{
"epoch": 1.33,
"eval_accuracy": 0.8149875621890548,
"eval_loss": 0.4131614863872528,
"eval_runtime": 254.6359,
"eval_samples_per_second": 63.149,
"eval_steps_per_second": 10.525,
"step": 1000
},
{
"epoch": 1.59,
"learning_rate": 3.88888888888889e-06,
"loss": 0.4232,
"step": 1200
},
{
"epoch": 1.59,
"eval_accuracy": 0.818594527363184,
"eval_loss": 0.4021635055541992,
"eval_runtime": 256.0534,
"eval_samples_per_second": 62.799,
"eval_steps_per_second": 10.467,
"step": 1200
},
{
"epoch": 1.86,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.4234,
"step": 1400
},
{
"epoch": 1.86,
"eval_accuracy": 0.8263059701492538,
"eval_loss": 0.3889436423778534,
"eval_runtime": 253.8628,
"eval_samples_per_second": 63.341,
"eval_steps_per_second": 10.557,
"step": 1400
},
{
"epoch": 2.12,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.4016,
"step": 1600
},
{
"epoch": 2.12,
"eval_accuracy": 0.8316542288557214,
"eval_loss": 0.3784995973110199,
"eval_runtime": 253.3618,
"eval_samples_per_second": 63.467,
"eval_steps_per_second": 10.578,
"step": 1600
},
{
"epoch": 2.39,
"learning_rate": 3.055555555555556e-06,
"loss": 0.3853,
"step": 1800
},
{
"epoch": 2.39,
"eval_accuracy": 0.8365671641791045,
"eval_loss": 0.3704816401004791,
"eval_runtime": 253.6776,
"eval_samples_per_second": 63.388,
"eval_steps_per_second": 10.565,
"step": 1800
},
{
"epoch": 2.65,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.3777,
"step": 2000
},
{
"epoch": 2.65,
"eval_accuracy": 0.8468905472636816,
"eval_loss": 0.35585275292396545,
"eval_runtime": 279.3911,
"eval_samples_per_second": 57.554,
"eval_steps_per_second": 9.592,
"step": 2000
},
{
"epoch": 2.92,
"learning_rate": 2.5e-06,
"loss": 0.3681,
"step": 2200
},
{
"epoch": 2.92,
"eval_accuracy": 0.8519900497512438,
"eval_loss": 0.34731847047805786,
"eval_runtime": 257.1097,
"eval_samples_per_second": 62.541,
"eval_steps_per_second": 10.424,
"step": 2200
},
{
"epoch": 3.18,
"learning_rate": 2.222222222222222e-06,
"loss": 0.3542,
"step": 2400
},
{
"epoch": 3.18,
"eval_accuracy": 0.8529228855721394,
"eval_loss": 0.3429964780807495,
"eval_runtime": 261.8479,
"eval_samples_per_second": 61.41,
"eval_steps_per_second": 10.235,
"step": 2400
},
{
"epoch": 3.45,
"learning_rate": 1.944444444444445e-06,
"loss": 0.3445,
"step": 2600
},
{
"epoch": 3.45,
"eval_accuracy": 0.8588930348258706,
"eval_loss": 0.3309912085533142,
"eval_runtime": 261.3944,
"eval_samples_per_second": 61.516,
"eval_steps_per_second": 10.253,
"step": 2600
},
{
"epoch": 3.71,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.3357,
"step": 2800
},
{
"epoch": 3.71,
"eval_accuracy": 0.8645522388059701,
"eval_loss": 0.3250305950641632,
"eval_runtime": 254.0587,
"eval_samples_per_second": 63.292,
"eval_steps_per_second": 10.549,
"step": 2800
},
{
"epoch": 3.98,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.3265,
"step": 3000
},
{
"epoch": 3.98,
"eval_accuracy": 0.8647388059701493,
"eval_loss": 0.3216043710708618,
"eval_runtime": 256.6489,
"eval_samples_per_second": 62.654,
"eval_steps_per_second": 10.442,
"step": 3000
},
{
"epoch": 4.25,
"learning_rate": 1.111111111111111e-06,
"loss": 0.3126,
"step": 3200
},
{
"epoch": 4.25,
"eval_accuracy": 0.8699004975124378,
"eval_loss": 0.3159268796443939,
"eval_runtime": 258.1285,
"eval_samples_per_second": 62.295,
"eval_steps_per_second": 10.382,
"step": 3200
},
{
"epoch": 4.51,
"learning_rate": 8.333333333333333e-07,
"loss": 0.313,
"step": 3400
},
{
"epoch": 4.51,
"eval_accuracy": 0.8715174129353234,
"eval_loss": 0.3117404282093048,
"eval_runtime": 256.5391,
"eval_samples_per_second": 62.68,
"eval_steps_per_second": 10.447,
"step": 3400
},
{
"epoch": 4.78,
"learning_rate": 5.555555555555555e-07,
"loss": 0.3122,
"step": 3600
},
{
"epoch": 4.78,
"eval_accuracy": 0.8740049751243781,
"eval_loss": 0.3079596161842346,
"eval_runtime": 255.3334,
"eval_samples_per_second": 62.976,
"eval_steps_per_second": 10.496,
"step": 3600
}
],
"logging_steps": 200,
"max_steps": 4000,
"num_train_epochs": 6,
"save_steps": 200,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}