Md Mushfiqur Rahman
Upload with huggingface_hub
d8d5734
{
"best_metric": 0.8068910256410257,
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Tamil-TTB/checkpoint-1000",
"epoch": 269.2307692307692,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 7.69,
"learning_rate": 8e-05,
"loss": 1.509,
"step": 100
},
{
"epoch": 15.38,
"learning_rate": 7.946308724832215e-05,
"loss": 0.274,
"step": 200
},
{
"epoch": 23.08,
"learning_rate": 7.89261744966443e-05,
"loss": 0.0679,
"step": 300
},
{
"epoch": 30.77,
"learning_rate": 7.838926174496645e-05,
"loss": 0.0307,
"step": 400
},
{
"epoch": 38.46,
"learning_rate": 7.78523489932886e-05,
"loss": 0.025,
"step": 500
},
{
"epoch": 38.46,
"eval_accuracy": 0.7892628205128205,
"eval_loss": 1.4348915815353394,
"eval_runtime": 0.3441,
"eval_samples_per_second": 232.482,
"eval_steps_per_second": 29.06,
"step": 500
},
{
"epoch": 46.15,
"learning_rate": 7.731543624161075e-05,
"loss": 0.018,
"step": 600
},
{
"epoch": 53.85,
"learning_rate": 7.677852348993288e-05,
"loss": 0.0152,
"step": 700
},
{
"epoch": 61.54,
"learning_rate": 7.624161073825503e-05,
"loss": 0.0113,
"step": 800
},
{
"epoch": 69.23,
"learning_rate": 7.570469798657718e-05,
"loss": 0.0081,
"step": 900
},
{
"epoch": 76.92,
"learning_rate": 7.516778523489933e-05,
"loss": 0.0092,
"step": 1000
},
{
"epoch": 76.92,
"eval_accuracy": 0.8068910256410257,
"eval_loss": 1.4408690929412842,
"eval_runtime": 0.3248,
"eval_samples_per_second": 246.316,
"eval_steps_per_second": 30.79,
"step": 1000
},
{
"epoch": 84.62,
"learning_rate": 7.463087248322148e-05,
"loss": 0.0069,
"step": 1100
},
{
"epoch": 92.31,
"learning_rate": 7.409395973154362e-05,
"loss": 0.0067,
"step": 1200
},
{
"epoch": 100.0,
"learning_rate": 7.355704697986577e-05,
"loss": 0.0069,
"step": 1300
},
{
"epoch": 107.69,
"learning_rate": 7.302013422818792e-05,
"loss": 0.0053,
"step": 1400
},
{
"epoch": 115.38,
"learning_rate": 7.248322147651007e-05,
"loss": 0.0046,
"step": 1500
},
{
"epoch": 115.38,
"eval_accuracy": 0.780448717948718,
"eval_loss": 1.9734928607940674,
"eval_runtime": 0.3276,
"eval_samples_per_second": 244.204,
"eval_steps_per_second": 30.525,
"step": 1500
},
{
"epoch": 123.08,
"learning_rate": 7.194630872483222e-05,
"loss": 0.0049,
"step": 1600
},
{
"epoch": 130.77,
"learning_rate": 7.140939597315438e-05,
"loss": 0.0045,
"step": 1700
},
{
"epoch": 138.46,
"learning_rate": 7.087248322147653e-05,
"loss": 0.0052,
"step": 1800
},
{
"epoch": 146.15,
"learning_rate": 7.033557046979866e-05,
"loss": 0.004,
"step": 1900
},
{
"epoch": 153.85,
"learning_rate": 6.979865771812081e-05,
"loss": 0.0036,
"step": 2000
},
{
"epoch": 153.85,
"eval_accuracy": 0.7996794871794872,
"eval_loss": 1.8069559335708618,
"eval_runtime": 0.3276,
"eval_samples_per_second": 244.205,
"eval_steps_per_second": 30.526,
"step": 2000
},
{
"epoch": 161.54,
"learning_rate": 6.926174496644296e-05,
"loss": 0.0057,
"step": 2100
},
{
"epoch": 169.23,
"learning_rate": 6.87248322147651e-05,
"loss": 0.0055,
"step": 2200
},
{
"epoch": 176.92,
"learning_rate": 6.818791946308725e-05,
"loss": 0.0049,
"step": 2300
},
{
"epoch": 184.62,
"learning_rate": 6.76510067114094e-05,
"loss": 0.0021,
"step": 2400
},
{
"epoch": 192.31,
"learning_rate": 6.711409395973155e-05,
"loss": 0.0042,
"step": 2500
},
{
"epoch": 192.31,
"eval_accuracy": 0.7780448717948718,
"eval_loss": 1.728083610534668,
"eval_runtime": 0.3279,
"eval_samples_per_second": 243.965,
"eval_steps_per_second": 30.496,
"step": 2500
},
{
"epoch": 200.0,
"learning_rate": 6.65771812080537e-05,
"loss": 0.0044,
"step": 2600
},
{
"epoch": 207.69,
"learning_rate": 6.604026845637585e-05,
"loss": 0.0035,
"step": 2700
},
{
"epoch": 215.38,
"learning_rate": 6.5503355704698e-05,
"loss": 0.0032,
"step": 2800
},
{
"epoch": 223.08,
"learning_rate": 6.496644295302014e-05,
"loss": 0.0033,
"step": 2900
},
{
"epoch": 230.77,
"learning_rate": 6.442953020134228e-05,
"loss": 0.0027,
"step": 3000
},
{
"epoch": 230.77,
"eval_accuracy": 0.8004807692307693,
"eval_loss": 1.849981665611267,
"eval_runtime": 0.3277,
"eval_samples_per_second": 244.159,
"eval_steps_per_second": 30.52,
"step": 3000
},
{
"epoch": 238.46,
"learning_rate": 6.389261744966443e-05,
"loss": 0.0027,
"step": 3100
},
{
"epoch": 246.15,
"learning_rate": 6.335570469798657e-05,
"loss": 0.0017,
"step": 3200
},
{
"epoch": 253.85,
"learning_rate": 6.281879194630872e-05,
"loss": 0.0016,
"step": 3300
},
{
"epoch": 261.54,
"learning_rate": 6.228187919463087e-05,
"loss": 0.0016,
"step": 3400
},
{
"epoch": 269.23,
"learning_rate": 6.174496644295302e-05,
"loss": 0.0022,
"step": 3500
},
{
"epoch": 269.23,
"eval_accuracy": 0.8060897435897436,
"eval_loss": 1.7650340795516968,
"eval_runtime": 0.3308,
"eval_samples_per_second": 241.835,
"eval_steps_per_second": 30.229,
"step": 3500
},
{
"epoch": 269.23,
"step": 3500,
"total_flos": 1.768647275040768e+16,
"train_loss": 0.05915338551998139,
"train_runtime": 490.6715,
"train_samples_per_second": 978.251,
"train_steps_per_second": 30.57
}
],
"max_steps": 15000,
"num_train_epochs": 1154,
"total_flos": 1.768647275040768e+16,
"trial_name": null,
"trial_params": null
}