|
{ |
|
"best_metric": 0.8567830467257253, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Vietnamese-VTB/checkpoint-5500", |
|
"epoch": 181.8181818181818, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8e-05, |
|
"loss": 1.7158, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 7.946308724832215e-05, |
|
"loss": 0.6087, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 7.89261744966443e-05, |
|
"loss": 0.3469, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.838926174496645e-05, |
|
"loss": 0.2155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 7.78523489932886e-05, |
|
"loss": 0.1352, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"eval_accuracy": 0.845839847142609, |
|
"eval_loss": 0.7401469945907593, |
|
"eval_runtime": 3.1919, |
|
"eval_samples_per_second": 250.631, |
|
"eval_steps_per_second": 31.329, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 7.731543624161075e-05, |
|
"loss": 0.0882, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 7.677852348993288e-05, |
|
"loss": 0.0645, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 7.624161073825503e-05, |
|
"loss": 0.0481, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 7.570469798657718e-05, |
|
"loss": 0.0386, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 7.516778523489933e-05, |
|
"loss": 0.0322, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_accuracy": 0.8473163105784263, |
|
"eval_loss": 0.9984953999519348, |
|
"eval_runtime": 3.1616, |
|
"eval_samples_per_second": 253.035, |
|
"eval_steps_per_second": 31.629, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 7.463087248322148e-05, |
|
"loss": 0.0243, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 7.409395973154362e-05, |
|
"loss": 0.0221, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 7.355704697986577e-05, |
|
"loss": 0.0192, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 7.302013422818792e-05, |
|
"loss": 0.0158, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 7.248322147651007e-05, |
|
"loss": 0.0182, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_accuracy": 0.8484453708528747, |
|
"eval_loss": 1.106465220451355, |
|
"eval_runtime": 3.1709, |
|
"eval_samples_per_second": 252.294, |
|
"eval_steps_per_second": 31.537, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 7.194630872483222e-05, |
|
"loss": 0.0173, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 7.140939597315438e-05, |
|
"loss": 0.0136, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 7.087248322147653e-05, |
|
"loss": 0.0116, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 43.18, |
|
"learning_rate": 7.033557046979866e-05, |
|
"loss": 0.01, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 6.979865771812081e-05, |
|
"loss": 0.0112, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_accuracy": 0.8446239360778183, |
|
"eval_loss": 1.2097970247268677, |
|
"eval_runtime": 3.165, |
|
"eval_samples_per_second": 252.768, |
|
"eval_steps_per_second": 31.596, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 6.926174496644296e-05, |
|
"loss": 0.009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 6.87248322147651e-05, |
|
"loss": 0.0103, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 52.27, |
|
"learning_rate": 6.818791946308725e-05, |
|
"loss": 0.0096, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 6.76510067114094e-05, |
|
"loss": 0.008, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"learning_rate": 6.711409395973155e-05, |
|
"loss": 0.008, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 56.82, |
|
"eval_accuracy": 0.8536564182734063, |
|
"eval_loss": 1.282847285270691, |
|
"eval_runtime": 3.1784, |
|
"eval_samples_per_second": 251.698, |
|
"eval_steps_per_second": 31.462, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 6.65771812080537e-05, |
|
"loss": 0.0073, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 61.36, |
|
"learning_rate": 6.604026845637585e-05, |
|
"loss": 0.008, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 6.5503355704698e-05, |
|
"loss": 0.0076, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 65.91, |
|
"learning_rate": 6.496644295302014e-05, |
|
"loss": 0.0072, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 6.442953020134228e-05, |
|
"loss": 0.0064, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"eval_accuracy": 0.8514851485148515, |
|
"eval_loss": 1.265517234802246, |
|
"eval_runtime": 3.1864, |
|
"eval_samples_per_second": 251.068, |
|
"eval_steps_per_second": 31.384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 70.45, |
|
"learning_rate": 6.389261744966443e-05, |
|
"loss": 0.0054, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 6.335570469798657e-05, |
|
"loss": 0.0047, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 6.281879194630872e-05, |
|
"loss": 0.0064, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 6.228187919463087e-05, |
|
"loss": 0.0057, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 79.55, |
|
"learning_rate": 6.174496644295302e-05, |
|
"loss": 0.0064, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 79.55, |
|
"eval_accuracy": 0.8527879103699844, |
|
"eval_loss": 1.328222393989563, |
|
"eval_runtime": 3.1799, |
|
"eval_samples_per_second": 251.578, |
|
"eval_steps_per_second": 31.447, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 6.120805369127517e-05, |
|
"loss": 0.0055, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 84.09, |
|
"learning_rate": 6.067114093959732e-05, |
|
"loss": 0.0056, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 6.013422818791947e-05, |
|
"loss": 0.0049, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 5.959731543624162e-05, |
|
"loss": 0.0035, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 5.906040268456377e-05, |
|
"loss": 0.0045, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_accuracy": 0.8500955358693764, |
|
"eval_loss": 1.3510583639144897, |
|
"eval_runtime": 3.1875, |
|
"eval_samples_per_second": 250.981, |
|
"eval_steps_per_second": 31.373, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 93.18, |
|
"learning_rate": 5.8523489932885916e-05, |
|
"loss": 0.0038, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 5.798657718120806e-05, |
|
"loss": 0.0033, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 97.73, |
|
"learning_rate": 5.7449664429530206e-05, |
|
"loss": 0.004, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 5.6912751677852354e-05, |
|
"loss": 0.0042, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 102.27, |
|
"learning_rate": 5.63758389261745e-05, |
|
"loss": 0.0047, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 102.27, |
|
"eval_accuracy": 0.8551328817092235, |
|
"eval_loss": 1.3701601028442383, |
|
"eval_runtime": 3.1715, |
|
"eval_samples_per_second": 252.247, |
|
"eval_steps_per_second": 31.531, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 104.55, |
|
"learning_rate": 5.583892617449665e-05, |
|
"loss": 0.004, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 106.82, |
|
"learning_rate": 5.53020134228188e-05, |
|
"loss": 0.0031, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 5.476510067114095e-05, |
|
"loss": 0.0047, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 111.36, |
|
"learning_rate": 5.4228187919463095e-05, |
|
"loss": 0.0028, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"learning_rate": 5.369127516778524e-05, |
|
"loss": 0.0039, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"eval_accuracy": 0.8559145388223033, |
|
"eval_loss": 1.4116061925888062, |
|
"eval_runtime": 3.1845, |
|
"eval_samples_per_second": 251.218, |
|
"eval_steps_per_second": 31.402, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 115.91, |
|
"learning_rate": 5.315436241610739e-05, |
|
"loss": 0.004, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 5.261744966442954e-05, |
|
"loss": 0.0034, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 120.45, |
|
"learning_rate": 5.208053691275168e-05, |
|
"loss": 0.0027, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 122.73, |
|
"learning_rate": 5.154362416107383e-05, |
|
"loss": 0.0023, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 5.100671140939598e-05, |
|
"loss": 0.0029, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.8567830467257253, |
|
"eval_loss": 1.434157371520996, |
|
"eval_runtime": 3.1462, |
|
"eval_samples_per_second": 254.277, |
|
"eval_steps_per_second": 31.785, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 5.0469798657718126e-05, |
|
"loss": 0.0027, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 129.55, |
|
"learning_rate": 4.9932885906040274e-05, |
|
"loss": 0.0033, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 131.82, |
|
"learning_rate": 4.939597315436242e-05, |
|
"loss": 0.0029, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 134.09, |
|
"learning_rate": 4.885906040268457e-05, |
|
"loss": 0.0023, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 4.832214765100672e-05, |
|
"loss": 0.0033, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"eval_accuracy": 0.854003821434775, |
|
"eval_loss": 1.4268622398376465, |
|
"eval_runtime": 3.1765, |
|
"eval_samples_per_second": 251.849, |
|
"eval_steps_per_second": 31.481, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 138.64, |
|
"learning_rate": 4.779060402684564e-05, |
|
"loss": 0.0031, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 140.91, |
|
"learning_rate": 4.725369127516779e-05, |
|
"loss": 0.0031, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 143.18, |
|
"learning_rate": 4.6716778523489936e-05, |
|
"loss": 0.0031, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 4.6179865771812084e-05, |
|
"loss": 0.0024, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"learning_rate": 4.564295302013423e-05, |
|
"loss": 0.0033, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"eval_accuracy": 0.853048462741011, |
|
"eval_loss": 1.4698809385299683, |
|
"eval_runtime": 3.1835, |
|
"eval_samples_per_second": 251.299, |
|
"eval_steps_per_second": 31.412, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 4.510604026845638e-05, |
|
"loss": 0.0023, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 152.27, |
|
"learning_rate": 4.456912751677853e-05, |
|
"loss": 0.0022, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 154.55, |
|
"learning_rate": 4.403221476510068e-05, |
|
"loss": 0.0026, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 156.82, |
|
"learning_rate": 4.3495302013422825e-05, |
|
"loss": 0.002, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"learning_rate": 4.295838926174497e-05, |
|
"loss": 0.0019, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"eval_accuracy": 0.8554802848705924, |
|
"eval_loss": 1.4916952848434448, |
|
"eval_runtime": 3.1771, |
|
"eval_samples_per_second": 251.799, |
|
"eval_steps_per_second": 31.475, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 161.36, |
|
"learning_rate": 4.242147651006712e-05, |
|
"loss": 0.0032, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"learning_rate": 4.188456375838927e-05, |
|
"loss": 0.002, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 165.91, |
|
"learning_rate": 4.134765100671141e-05, |
|
"loss": 0.0022, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 168.18, |
|
"learning_rate": 4.081073825503356e-05, |
|
"loss": 0.0016, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 170.45, |
|
"learning_rate": 4.027382550335571e-05, |
|
"loss": 0.0018, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 170.45, |
|
"eval_accuracy": 0.854872329338197, |
|
"eval_loss": 1.4624574184417725, |
|
"eval_runtime": 3.1705, |
|
"eval_samples_per_second": 252.327, |
|
"eval_steps_per_second": 31.541, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 172.73, |
|
"learning_rate": 3.9736912751677856e-05, |
|
"loss": 0.0017, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.0014, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 177.27, |
|
"learning_rate": 3.866308724832215e-05, |
|
"loss": 0.0009, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 179.55, |
|
"learning_rate": 3.81261744966443e-05, |
|
"loss": 0.0014, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"learning_rate": 3.758926174496645e-05, |
|
"loss": 0.001, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"eval_accuracy": 0.8567830467257253, |
|
"eval_loss": 1.5747320652008057, |
|
"eval_runtime": 3.1536, |
|
"eval_samples_per_second": 253.674, |
|
"eval_steps_per_second": 31.709, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"step": 8000, |
|
"total_flos": 4.180403182626816e+16, |
|
"train_loss": 0.04628073706757277, |
|
"train_runtime": 1159.4207, |
|
"train_samples_per_second": 414.0, |
|
"train_steps_per_second": 12.937 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 341, |
|
"total_flos": 4.180403182626816e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|