{ "best_metric": 0.8567830467257253, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Vietnamese-VTB/checkpoint-5500", "epoch": 181.8181818181818, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.27, "learning_rate": 8e-05, "loss": 1.7158, "step": 100 }, { "epoch": 4.55, "learning_rate": 7.946308724832215e-05, "loss": 0.6087, "step": 200 }, { "epoch": 6.82, "learning_rate": 7.89261744966443e-05, "loss": 0.3469, "step": 300 }, { "epoch": 9.09, "learning_rate": 7.838926174496645e-05, "loss": 0.2155, "step": 400 }, { "epoch": 11.36, "learning_rate": 7.78523489932886e-05, "loss": 0.1352, "step": 500 }, { "epoch": 11.36, "eval_accuracy": 0.845839847142609, "eval_loss": 0.7401469945907593, "eval_runtime": 3.1919, "eval_samples_per_second": 250.631, "eval_steps_per_second": 31.329, "step": 500 }, { "epoch": 13.64, "learning_rate": 7.731543624161075e-05, "loss": 0.0882, "step": 600 }, { "epoch": 15.91, "learning_rate": 7.677852348993288e-05, "loss": 0.0645, "step": 700 }, { "epoch": 18.18, "learning_rate": 7.624161073825503e-05, "loss": 0.0481, "step": 800 }, { "epoch": 20.45, "learning_rate": 7.570469798657718e-05, "loss": 0.0386, "step": 900 }, { "epoch": 22.73, "learning_rate": 7.516778523489933e-05, "loss": 0.0322, "step": 1000 }, { "epoch": 22.73, "eval_accuracy": 0.8473163105784263, "eval_loss": 0.9984953999519348, "eval_runtime": 3.1616, "eval_samples_per_second": 253.035, "eval_steps_per_second": 31.629, "step": 1000 }, { "epoch": 25.0, "learning_rate": 7.463087248322148e-05, "loss": 0.0243, "step": 1100 }, { "epoch": 27.27, "learning_rate": 7.409395973154362e-05, "loss": 0.0221, "step": 1200 }, { "epoch": 29.55, "learning_rate": 7.355704697986577e-05, "loss": 0.0192, "step": 1300 }, { "epoch": 31.82, "learning_rate": 7.302013422818792e-05, "loss": 0.0158, "step": 1400 }, { "epoch": 34.09, "learning_rate": 7.248322147651007e-05, "loss": 0.0182, "step": 1500 }, { "epoch": 34.09, "eval_accuracy": 0.8484453708528747, "eval_loss": 1.106465220451355, "eval_runtime": 3.1709, "eval_samples_per_second": 252.294, "eval_steps_per_second": 31.537, "step": 1500 }, { "epoch": 36.36, "learning_rate": 7.194630872483222e-05, "loss": 0.0173, "step": 1600 }, { "epoch": 38.64, "learning_rate": 7.140939597315438e-05, "loss": 0.0136, "step": 1700 }, { "epoch": 40.91, "learning_rate": 7.087248322147653e-05, "loss": 0.0116, "step": 1800 }, { "epoch": 43.18, "learning_rate": 7.033557046979866e-05, "loss": 0.01, "step": 1900 }, { "epoch": 45.45, "learning_rate": 6.979865771812081e-05, "loss": 0.0112, "step": 2000 }, { "epoch": 45.45, "eval_accuracy": 0.8446239360778183, "eval_loss": 1.2097970247268677, "eval_runtime": 3.165, "eval_samples_per_second": 252.768, "eval_steps_per_second": 31.596, "step": 2000 }, { "epoch": 47.73, "learning_rate": 6.926174496644296e-05, "loss": 0.009, "step": 2100 }, { "epoch": 50.0, "learning_rate": 6.87248322147651e-05, "loss": 0.0103, "step": 2200 }, { "epoch": 52.27, "learning_rate": 6.818791946308725e-05, "loss": 0.0096, "step": 2300 }, { "epoch": 54.55, "learning_rate": 6.76510067114094e-05, "loss": 0.008, "step": 2400 }, { "epoch": 56.82, "learning_rate": 6.711409395973155e-05, "loss": 0.008, "step": 2500 }, { "epoch": 56.82, "eval_accuracy": 0.8536564182734063, "eval_loss": 1.282847285270691, "eval_runtime": 3.1784, "eval_samples_per_second": 251.698, "eval_steps_per_second": 31.462, "step": 2500 }, { "epoch": 59.09, "learning_rate": 6.65771812080537e-05, "loss": 0.0073, "step": 2600 }, { "epoch": 61.36, "learning_rate": 6.604026845637585e-05, "loss": 0.008, "step": 2700 }, { "epoch": 63.64, "learning_rate": 6.5503355704698e-05, "loss": 0.0076, "step": 2800 }, { "epoch": 65.91, "learning_rate": 6.496644295302014e-05, "loss": 0.0072, "step": 2900 }, { "epoch": 68.18, "learning_rate": 6.442953020134228e-05, "loss": 0.0064, "step": 3000 }, { "epoch": 68.18, "eval_accuracy": 0.8514851485148515, "eval_loss": 1.265517234802246, "eval_runtime": 3.1864, "eval_samples_per_second": 251.068, "eval_steps_per_second": 31.384, "step": 3000 }, { "epoch": 70.45, "learning_rate": 6.389261744966443e-05, "loss": 0.0054, "step": 3100 }, { "epoch": 72.73, "learning_rate": 6.335570469798657e-05, "loss": 0.0047, "step": 3200 }, { "epoch": 75.0, "learning_rate": 6.281879194630872e-05, "loss": 0.0064, "step": 3300 }, { "epoch": 77.27, "learning_rate": 6.228187919463087e-05, "loss": 0.0057, "step": 3400 }, { "epoch": 79.55, "learning_rate": 6.174496644295302e-05, "loss": 0.0064, "step": 3500 }, { "epoch": 79.55, "eval_accuracy": 0.8527879103699844, "eval_loss": 1.328222393989563, "eval_runtime": 3.1799, "eval_samples_per_second": 251.578, "eval_steps_per_second": 31.447, "step": 3500 }, { "epoch": 81.82, "learning_rate": 6.120805369127517e-05, "loss": 0.0055, "step": 3600 }, { "epoch": 84.09, "learning_rate": 6.067114093959732e-05, "loss": 0.0056, "step": 3700 }, { "epoch": 86.36, "learning_rate": 6.013422818791947e-05, "loss": 0.0049, "step": 3800 }, { "epoch": 88.64, "learning_rate": 5.959731543624162e-05, "loss": 0.0035, "step": 3900 }, { "epoch": 90.91, "learning_rate": 5.906040268456377e-05, "loss": 0.0045, "step": 4000 }, { "epoch": 90.91, "eval_accuracy": 0.8500955358693764, "eval_loss": 1.3510583639144897, "eval_runtime": 3.1875, "eval_samples_per_second": 250.981, "eval_steps_per_second": 31.373, "step": 4000 }, { "epoch": 93.18, "learning_rate": 5.8523489932885916e-05, "loss": 0.0038, "step": 4100 }, { "epoch": 95.45, "learning_rate": 5.798657718120806e-05, "loss": 0.0033, "step": 4200 }, { "epoch": 97.73, "learning_rate": 5.7449664429530206e-05, "loss": 0.004, "step": 4300 }, { "epoch": 100.0, "learning_rate": 5.6912751677852354e-05, "loss": 0.0042, "step": 4400 }, { "epoch": 102.27, "learning_rate": 5.63758389261745e-05, "loss": 0.0047, "step": 4500 }, { "epoch": 102.27, "eval_accuracy": 0.8551328817092235, "eval_loss": 1.3701601028442383, "eval_runtime": 3.1715, "eval_samples_per_second": 252.247, "eval_steps_per_second": 31.531, "step": 4500 }, { "epoch": 104.55, "learning_rate": 5.583892617449665e-05, "loss": 0.004, "step": 4600 }, { "epoch": 106.82, "learning_rate": 5.53020134228188e-05, "loss": 0.0031, "step": 4700 }, { "epoch": 109.09, "learning_rate": 5.476510067114095e-05, "loss": 0.0047, "step": 4800 }, { "epoch": 111.36, "learning_rate": 5.4228187919463095e-05, "loss": 0.0028, "step": 4900 }, { "epoch": 113.64, "learning_rate": 5.369127516778524e-05, "loss": 0.0039, "step": 5000 }, { "epoch": 113.64, "eval_accuracy": 0.8559145388223033, "eval_loss": 1.4116061925888062, "eval_runtime": 3.1845, "eval_samples_per_second": 251.218, "eval_steps_per_second": 31.402, "step": 5000 }, { "epoch": 115.91, "learning_rate": 5.315436241610739e-05, "loss": 0.004, "step": 5100 }, { "epoch": 118.18, "learning_rate": 5.261744966442954e-05, "loss": 0.0034, "step": 5200 }, { "epoch": 120.45, "learning_rate": 5.208053691275168e-05, "loss": 0.0027, "step": 5300 }, { "epoch": 122.73, "learning_rate": 5.154362416107383e-05, "loss": 0.0023, "step": 5400 }, { "epoch": 125.0, "learning_rate": 5.100671140939598e-05, "loss": 0.0029, "step": 5500 }, { "epoch": 125.0, "eval_accuracy": 0.8567830467257253, "eval_loss": 1.434157371520996, "eval_runtime": 3.1462, "eval_samples_per_second": 254.277, "eval_steps_per_second": 31.785, "step": 5500 }, { "epoch": 127.27, "learning_rate": 5.0469798657718126e-05, "loss": 0.0027, "step": 5600 }, { "epoch": 129.55, "learning_rate": 4.9932885906040274e-05, "loss": 0.0033, "step": 5700 }, { "epoch": 131.82, "learning_rate": 4.939597315436242e-05, "loss": 0.0029, "step": 5800 }, { "epoch": 134.09, "learning_rate": 4.885906040268457e-05, "loss": 0.0023, "step": 5900 }, { "epoch": 136.36, "learning_rate": 4.832214765100672e-05, "loss": 0.0033, "step": 6000 }, { "epoch": 136.36, "eval_accuracy": 0.854003821434775, "eval_loss": 1.4268622398376465, "eval_runtime": 3.1765, "eval_samples_per_second": 251.849, "eval_steps_per_second": 31.481, "step": 6000 }, { "epoch": 138.64, "learning_rate": 4.779060402684564e-05, "loss": 0.0031, "step": 6100 }, { "epoch": 140.91, "learning_rate": 4.725369127516779e-05, "loss": 0.0031, "step": 6200 }, { "epoch": 143.18, "learning_rate": 4.6716778523489936e-05, "loss": 0.0031, "step": 6300 }, { "epoch": 145.45, "learning_rate": 4.6179865771812084e-05, "loss": 0.0024, "step": 6400 }, { "epoch": 147.73, "learning_rate": 4.564295302013423e-05, "loss": 0.0033, "step": 6500 }, { "epoch": 147.73, "eval_accuracy": 0.853048462741011, "eval_loss": 1.4698809385299683, "eval_runtime": 3.1835, "eval_samples_per_second": 251.299, "eval_steps_per_second": 31.412, "step": 6500 }, { "epoch": 150.0, "learning_rate": 4.510604026845638e-05, "loss": 0.0023, "step": 6600 }, { "epoch": 152.27, "learning_rate": 4.456912751677853e-05, "loss": 0.0022, "step": 6700 }, { "epoch": 154.55, "learning_rate": 4.403221476510068e-05, "loss": 0.0026, "step": 6800 }, { "epoch": 156.82, "learning_rate": 4.3495302013422825e-05, "loss": 0.002, "step": 6900 }, { "epoch": 159.09, "learning_rate": 4.295838926174497e-05, "loss": 0.0019, "step": 7000 }, { "epoch": 159.09, "eval_accuracy": 0.8554802848705924, "eval_loss": 1.4916952848434448, "eval_runtime": 3.1771, "eval_samples_per_second": 251.799, "eval_steps_per_second": 31.475, "step": 7000 }, { "epoch": 161.36, "learning_rate": 4.242147651006712e-05, "loss": 0.0032, "step": 7100 }, { "epoch": 163.64, "learning_rate": 4.188456375838927e-05, "loss": 0.002, "step": 7200 }, { "epoch": 165.91, "learning_rate": 4.134765100671141e-05, "loss": 0.0022, "step": 7300 }, { "epoch": 168.18, "learning_rate": 4.081073825503356e-05, "loss": 0.0016, "step": 7400 }, { "epoch": 170.45, "learning_rate": 4.027382550335571e-05, "loss": 0.0018, "step": 7500 }, { "epoch": 170.45, "eval_accuracy": 0.854872329338197, "eval_loss": 1.4624574184417725, "eval_runtime": 3.1705, "eval_samples_per_second": 252.327, "eval_steps_per_second": 31.541, "step": 7500 }, { "epoch": 172.73, "learning_rate": 3.9736912751677856e-05, "loss": 0.0017, "step": 7600 }, { "epoch": 175.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.0014, "step": 7700 }, { "epoch": 177.27, "learning_rate": 3.866308724832215e-05, "loss": 0.0009, "step": 7800 }, { "epoch": 179.55, "learning_rate": 3.81261744966443e-05, "loss": 0.0014, "step": 7900 }, { "epoch": 181.82, "learning_rate": 3.758926174496645e-05, "loss": 0.001, "step": 8000 }, { "epoch": 181.82, "eval_accuracy": 0.8567830467257253, "eval_loss": 1.5747320652008057, "eval_runtime": 3.1536, "eval_samples_per_second": 253.674, "eval_steps_per_second": 31.709, "step": 8000 }, { "epoch": 181.82, "step": 8000, "total_flos": 4.180403182626816e+16, "train_loss": 0.04628073706757277, "train_runtime": 1159.4207, "train_samples_per_second": 414.0, "train_steps_per_second": 12.937 } ], "max_steps": 15000, "num_train_epochs": 341, "total_flos": 4.180403182626816e+16, "trial_name": null, "trial_params": null }