Md Mushfiqur Rahman
Upload with huggingface_hub
c7e4dd5
raw
history blame
14.4 kB
{
"best_metric": 0.8567830467257253,
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Vietnamese-VTB/checkpoint-5500",
"epoch": 181.8181818181818,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.27,
"learning_rate": 8e-05,
"loss": 1.7158,
"step": 100
},
{
"epoch": 4.55,
"learning_rate": 7.946308724832215e-05,
"loss": 0.6087,
"step": 200
},
{
"epoch": 6.82,
"learning_rate": 7.89261744966443e-05,
"loss": 0.3469,
"step": 300
},
{
"epoch": 9.09,
"learning_rate": 7.838926174496645e-05,
"loss": 0.2155,
"step": 400
},
{
"epoch": 11.36,
"learning_rate": 7.78523489932886e-05,
"loss": 0.1352,
"step": 500
},
{
"epoch": 11.36,
"eval_accuracy": 0.845839847142609,
"eval_loss": 0.7401469945907593,
"eval_runtime": 3.1919,
"eval_samples_per_second": 250.631,
"eval_steps_per_second": 31.329,
"step": 500
},
{
"epoch": 13.64,
"learning_rate": 7.731543624161075e-05,
"loss": 0.0882,
"step": 600
},
{
"epoch": 15.91,
"learning_rate": 7.677852348993288e-05,
"loss": 0.0645,
"step": 700
},
{
"epoch": 18.18,
"learning_rate": 7.624161073825503e-05,
"loss": 0.0481,
"step": 800
},
{
"epoch": 20.45,
"learning_rate": 7.570469798657718e-05,
"loss": 0.0386,
"step": 900
},
{
"epoch": 22.73,
"learning_rate": 7.516778523489933e-05,
"loss": 0.0322,
"step": 1000
},
{
"epoch": 22.73,
"eval_accuracy": 0.8473163105784263,
"eval_loss": 0.9984953999519348,
"eval_runtime": 3.1616,
"eval_samples_per_second": 253.035,
"eval_steps_per_second": 31.629,
"step": 1000
},
{
"epoch": 25.0,
"learning_rate": 7.463087248322148e-05,
"loss": 0.0243,
"step": 1100
},
{
"epoch": 27.27,
"learning_rate": 7.409395973154362e-05,
"loss": 0.0221,
"step": 1200
},
{
"epoch": 29.55,
"learning_rate": 7.355704697986577e-05,
"loss": 0.0192,
"step": 1300
},
{
"epoch": 31.82,
"learning_rate": 7.302013422818792e-05,
"loss": 0.0158,
"step": 1400
},
{
"epoch": 34.09,
"learning_rate": 7.248322147651007e-05,
"loss": 0.0182,
"step": 1500
},
{
"epoch": 34.09,
"eval_accuracy": 0.8484453708528747,
"eval_loss": 1.106465220451355,
"eval_runtime": 3.1709,
"eval_samples_per_second": 252.294,
"eval_steps_per_second": 31.537,
"step": 1500
},
{
"epoch": 36.36,
"learning_rate": 7.194630872483222e-05,
"loss": 0.0173,
"step": 1600
},
{
"epoch": 38.64,
"learning_rate": 7.140939597315438e-05,
"loss": 0.0136,
"step": 1700
},
{
"epoch": 40.91,
"learning_rate": 7.087248322147653e-05,
"loss": 0.0116,
"step": 1800
},
{
"epoch": 43.18,
"learning_rate": 7.033557046979866e-05,
"loss": 0.01,
"step": 1900
},
{
"epoch": 45.45,
"learning_rate": 6.979865771812081e-05,
"loss": 0.0112,
"step": 2000
},
{
"epoch": 45.45,
"eval_accuracy": 0.8446239360778183,
"eval_loss": 1.2097970247268677,
"eval_runtime": 3.165,
"eval_samples_per_second": 252.768,
"eval_steps_per_second": 31.596,
"step": 2000
},
{
"epoch": 47.73,
"learning_rate": 6.926174496644296e-05,
"loss": 0.009,
"step": 2100
},
{
"epoch": 50.0,
"learning_rate": 6.87248322147651e-05,
"loss": 0.0103,
"step": 2200
},
{
"epoch": 52.27,
"learning_rate": 6.818791946308725e-05,
"loss": 0.0096,
"step": 2300
},
{
"epoch": 54.55,
"learning_rate": 6.76510067114094e-05,
"loss": 0.008,
"step": 2400
},
{
"epoch": 56.82,
"learning_rate": 6.711409395973155e-05,
"loss": 0.008,
"step": 2500
},
{
"epoch": 56.82,
"eval_accuracy": 0.8536564182734063,
"eval_loss": 1.282847285270691,
"eval_runtime": 3.1784,
"eval_samples_per_second": 251.698,
"eval_steps_per_second": 31.462,
"step": 2500
},
{
"epoch": 59.09,
"learning_rate": 6.65771812080537e-05,
"loss": 0.0073,
"step": 2600
},
{
"epoch": 61.36,
"learning_rate": 6.604026845637585e-05,
"loss": 0.008,
"step": 2700
},
{
"epoch": 63.64,
"learning_rate": 6.5503355704698e-05,
"loss": 0.0076,
"step": 2800
},
{
"epoch": 65.91,
"learning_rate": 6.496644295302014e-05,
"loss": 0.0072,
"step": 2900
},
{
"epoch": 68.18,
"learning_rate": 6.442953020134228e-05,
"loss": 0.0064,
"step": 3000
},
{
"epoch": 68.18,
"eval_accuracy": 0.8514851485148515,
"eval_loss": 1.265517234802246,
"eval_runtime": 3.1864,
"eval_samples_per_second": 251.068,
"eval_steps_per_second": 31.384,
"step": 3000
},
{
"epoch": 70.45,
"learning_rate": 6.389261744966443e-05,
"loss": 0.0054,
"step": 3100
},
{
"epoch": 72.73,
"learning_rate": 6.335570469798657e-05,
"loss": 0.0047,
"step": 3200
},
{
"epoch": 75.0,
"learning_rate": 6.281879194630872e-05,
"loss": 0.0064,
"step": 3300
},
{
"epoch": 77.27,
"learning_rate": 6.228187919463087e-05,
"loss": 0.0057,
"step": 3400
},
{
"epoch": 79.55,
"learning_rate": 6.174496644295302e-05,
"loss": 0.0064,
"step": 3500
},
{
"epoch": 79.55,
"eval_accuracy": 0.8527879103699844,
"eval_loss": 1.328222393989563,
"eval_runtime": 3.1799,
"eval_samples_per_second": 251.578,
"eval_steps_per_second": 31.447,
"step": 3500
},
{
"epoch": 81.82,
"learning_rate": 6.120805369127517e-05,
"loss": 0.0055,
"step": 3600
},
{
"epoch": 84.09,
"learning_rate": 6.067114093959732e-05,
"loss": 0.0056,
"step": 3700
},
{
"epoch": 86.36,
"learning_rate": 6.013422818791947e-05,
"loss": 0.0049,
"step": 3800
},
{
"epoch": 88.64,
"learning_rate": 5.959731543624162e-05,
"loss": 0.0035,
"step": 3900
},
{
"epoch": 90.91,
"learning_rate": 5.906040268456377e-05,
"loss": 0.0045,
"step": 4000
},
{
"epoch": 90.91,
"eval_accuracy": 0.8500955358693764,
"eval_loss": 1.3510583639144897,
"eval_runtime": 3.1875,
"eval_samples_per_second": 250.981,
"eval_steps_per_second": 31.373,
"step": 4000
},
{
"epoch": 93.18,
"learning_rate": 5.8523489932885916e-05,
"loss": 0.0038,
"step": 4100
},
{
"epoch": 95.45,
"learning_rate": 5.798657718120806e-05,
"loss": 0.0033,
"step": 4200
},
{
"epoch": 97.73,
"learning_rate": 5.7449664429530206e-05,
"loss": 0.004,
"step": 4300
},
{
"epoch": 100.0,
"learning_rate": 5.6912751677852354e-05,
"loss": 0.0042,
"step": 4400
},
{
"epoch": 102.27,
"learning_rate": 5.63758389261745e-05,
"loss": 0.0047,
"step": 4500
},
{
"epoch": 102.27,
"eval_accuracy": 0.8551328817092235,
"eval_loss": 1.3701601028442383,
"eval_runtime": 3.1715,
"eval_samples_per_second": 252.247,
"eval_steps_per_second": 31.531,
"step": 4500
},
{
"epoch": 104.55,
"learning_rate": 5.583892617449665e-05,
"loss": 0.004,
"step": 4600
},
{
"epoch": 106.82,
"learning_rate": 5.53020134228188e-05,
"loss": 0.0031,
"step": 4700
},
{
"epoch": 109.09,
"learning_rate": 5.476510067114095e-05,
"loss": 0.0047,
"step": 4800
},
{
"epoch": 111.36,
"learning_rate": 5.4228187919463095e-05,
"loss": 0.0028,
"step": 4900
},
{
"epoch": 113.64,
"learning_rate": 5.369127516778524e-05,
"loss": 0.0039,
"step": 5000
},
{
"epoch": 113.64,
"eval_accuracy": 0.8559145388223033,
"eval_loss": 1.4116061925888062,
"eval_runtime": 3.1845,
"eval_samples_per_second": 251.218,
"eval_steps_per_second": 31.402,
"step": 5000
},
{
"epoch": 115.91,
"learning_rate": 5.315436241610739e-05,
"loss": 0.004,
"step": 5100
},
{
"epoch": 118.18,
"learning_rate": 5.261744966442954e-05,
"loss": 0.0034,
"step": 5200
},
{
"epoch": 120.45,
"learning_rate": 5.208053691275168e-05,
"loss": 0.0027,
"step": 5300
},
{
"epoch": 122.73,
"learning_rate": 5.154362416107383e-05,
"loss": 0.0023,
"step": 5400
},
{
"epoch": 125.0,
"learning_rate": 5.100671140939598e-05,
"loss": 0.0029,
"step": 5500
},
{
"epoch": 125.0,
"eval_accuracy": 0.8567830467257253,
"eval_loss": 1.434157371520996,
"eval_runtime": 3.1462,
"eval_samples_per_second": 254.277,
"eval_steps_per_second": 31.785,
"step": 5500
},
{
"epoch": 127.27,
"learning_rate": 5.0469798657718126e-05,
"loss": 0.0027,
"step": 5600
},
{
"epoch": 129.55,
"learning_rate": 4.9932885906040274e-05,
"loss": 0.0033,
"step": 5700
},
{
"epoch": 131.82,
"learning_rate": 4.939597315436242e-05,
"loss": 0.0029,
"step": 5800
},
{
"epoch": 134.09,
"learning_rate": 4.885906040268457e-05,
"loss": 0.0023,
"step": 5900
},
{
"epoch": 136.36,
"learning_rate": 4.832214765100672e-05,
"loss": 0.0033,
"step": 6000
},
{
"epoch": 136.36,
"eval_accuracy": 0.854003821434775,
"eval_loss": 1.4268622398376465,
"eval_runtime": 3.1765,
"eval_samples_per_second": 251.849,
"eval_steps_per_second": 31.481,
"step": 6000
},
{
"epoch": 138.64,
"learning_rate": 4.779060402684564e-05,
"loss": 0.0031,
"step": 6100
},
{
"epoch": 140.91,
"learning_rate": 4.725369127516779e-05,
"loss": 0.0031,
"step": 6200
},
{
"epoch": 143.18,
"learning_rate": 4.6716778523489936e-05,
"loss": 0.0031,
"step": 6300
},
{
"epoch": 145.45,
"learning_rate": 4.6179865771812084e-05,
"loss": 0.0024,
"step": 6400
},
{
"epoch": 147.73,
"learning_rate": 4.564295302013423e-05,
"loss": 0.0033,
"step": 6500
},
{
"epoch": 147.73,
"eval_accuracy": 0.853048462741011,
"eval_loss": 1.4698809385299683,
"eval_runtime": 3.1835,
"eval_samples_per_second": 251.299,
"eval_steps_per_second": 31.412,
"step": 6500
},
{
"epoch": 150.0,
"learning_rate": 4.510604026845638e-05,
"loss": 0.0023,
"step": 6600
},
{
"epoch": 152.27,
"learning_rate": 4.456912751677853e-05,
"loss": 0.0022,
"step": 6700
},
{
"epoch": 154.55,
"learning_rate": 4.403221476510068e-05,
"loss": 0.0026,
"step": 6800
},
{
"epoch": 156.82,
"learning_rate": 4.3495302013422825e-05,
"loss": 0.002,
"step": 6900
},
{
"epoch": 159.09,
"learning_rate": 4.295838926174497e-05,
"loss": 0.0019,
"step": 7000
},
{
"epoch": 159.09,
"eval_accuracy": 0.8554802848705924,
"eval_loss": 1.4916952848434448,
"eval_runtime": 3.1771,
"eval_samples_per_second": 251.799,
"eval_steps_per_second": 31.475,
"step": 7000
},
{
"epoch": 161.36,
"learning_rate": 4.242147651006712e-05,
"loss": 0.0032,
"step": 7100
},
{
"epoch": 163.64,
"learning_rate": 4.188456375838927e-05,
"loss": 0.002,
"step": 7200
},
{
"epoch": 165.91,
"learning_rate": 4.134765100671141e-05,
"loss": 0.0022,
"step": 7300
},
{
"epoch": 168.18,
"learning_rate": 4.081073825503356e-05,
"loss": 0.0016,
"step": 7400
},
{
"epoch": 170.45,
"learning_rate": 4.027382550335571e-05,
"loss": 0.0018,
"step": 7500
},
{
"epoch": 170.45,
"eval_accuracy": 0.854872329338197,
"eval_loss": 1.4624574184417725,
"eval_runtime": 3.1705,
"eval_samples_per_second": 252.327,
"eval_steps_per_second": 31.541,
"step": 7500
},
{
"epoch": 172.73,
"learning_rate": 3.9736912751677856e-05,
"loss": 0.0017,
"step": 7600
},
{
"epoch": 175.0,
"learning_rate": 3.9200000000000004e-05,
"loss": 0.0014,
"step": 7700
},
{
"epoch": 177.27,
"learning_rate": 3.866308724832215e-05,
"loss": 0.0009,
"step": 7800
},
{
"epoch": 179.55,
"learning_rate": 3.81261744966443e-05,
"loss": 0.0014,
"step": 7900
},
{
"epoch": 181.82,
"learning_rate": 3.758926174496645e-05,
"loss": 0.001,
"step": 8000
},
{
"epoch": 181.82,
"eval_accuracy": 0.8567830467257253,
"eval_loss": 1.5747320652008057,
"eval_runtime": 3.1536,
"eval_samples_per_second": 253.674,
"eval_steps_per_second": 31.709,
"step": 8000
},
{
"epoch": 181.82,
"step": 8000,
"total_flos": 4.180403182626816e+16,
"train_loss": 0.04628073706757277,
"train_runtime": 1159.4207,
"train_samples_per_second": 414.0,
"train_steps_per_second": 12.937
}
],
"max_steps": 15000,
"num_train_epochs": 341,
"total_flos": 4.180403182626816e+16,
"trial_name": null,
"trial_params": null
}