{ "best_metric": 65.65919749869724, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Vietnamese-VTB/checkpoint-14000", "epoch": 340.90909090909093, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.27, "learning_rate": 7.840000000000001e-05, "loss": 3.3293, "step": 100 }, { "epoch": 4.55, "learning_rate": 7.947382550335571e-05, "loss": 0.8977, "step": 200 }, { "epoch": 6.82, "learning_rate": 7.893691275167786e-05, "loss": 0.4112, "step": 300 }, { "epoch": 9.09, "learning_rate": 7.840536912751678e-05, "loss": 0.2437, "step": 400 }, { "epoch": 11.36, "learning_rate": 7.786845637583893e-05, "loss": 0.1661, "step": 500 }, { "epoch": 11.36, "eval_las": 64.63435817265936, "eval_loss": 3.016636848449707, "eval_runtime": 5.2499, "eval_samples_per_second": 152.385, "eval_steps_per_second": 19.048, "eval_uas": 73.35417752301547, "step": 500 }, { "epoch": 13.64, "learning_rate": 7.733154362416108e-05, "loss": 0.1204, "step": 600 }, { "epoch": 15.91, "learning_rate": 7.679463087248322e-05, "loss": 0.1057, "step": 700 }, { "epoch": 18.18, "learning_rate": 7.625771812080537e-05, "loss": 0.0868, "step": 800 }, { "epoch": 20.45, "learning_rate": 7.572080536912752e-05, "loss": 0.0739, "step": 900 }, { "epoch": 22.73, "learning_rate": 7.518389261744967e-05, "loss": 0.0658, "step": 1000 }, { "epoch": 22.73, "eval_las": 64.39986103873545, "eval_loss": 3.5961220264434814, "eval_runtime": 5.2576, "eval_samples_per_second": 152.161, "eval_steps_per_second": 19.02, "eval_uas": 73.18047594233109, "step": 1000 }, { "epoch": 25.0, "learning_rate": 7.464697986577182e-05, "loss": 0.0632, "step": 1100 }, { "epoch": 27.27, "learning_rate": 7.411006711409397e-05, "loss": 0.0521, "step": 1200 }, { "epoch": 29.55, "learning_rate": 7.357315436241611e-05, "loss": 0.0514, "step": 1300 }, { "epoch": 31.82, "learning_rate": 7.303624161073826e-05, "loss": 0.052, "step": 1400 }, { "epoch": 34.09, "learning_rate": 7.249932885906041e-05, "loss": 0.0438, "step": 1500 }, { "epoch": 34.09, "eval_las": 64.4172311968039, "eval_loss": 3.8819875717163086, "eval_runtime": 5.2578, "eval_samples_per_second": 152.154, "eval_steps_per_second": 19.019, "eval_uas": 72.91992357130451, "step": 1500 }, { "epoch": 36.36, "learning_rate": 7.196241610738256e-05, "loss": 0.0431, "step": 1600 }, { "epoch": 38.64, "learning_rate": 7.142550335570471e-05, "loss": 0.0391, "step": 1700 }, { "epoch": 40.91, "learning_rate": 7.088859060402686e-05, "loss": 0.0365, "step": 1800 }, { "epoch": 43.18, "learning_rate": 7.0351677852349e-05, "loss": 0.0387, "step": 1900 }, { "epoch": 45.45, "learning_rate": 6.981476510067114e-05, "loss": 0.0326, "step": 2000 }, { "epoch": 45.45, "eval_las": 64.50408198714608, "eval_loss": 4.396294116973877, "eval_runtime": 5.2355, "eval_samples_per_second": 152.803, "eval_steps_per_second": 19.1, "eval_uas": 72.88518325516762, "step": 2000 }, { "epoch": 47.73, "learning_rate": 6.927785234899329e-05, "loss": 0.0318, "step": 2100 }, { "epoch": 50.0, "learning_rate": 6.874630872483222e-05, "loss": 0.0287, "step": 2200 }, { "epoch": 52.27, "learning_rate": 6.820939597315437e-05, "loss": 0.0283, "step": 2300 }, { "epoch": 54.55, "learning_rate": 6.767248322147652e-05, "loss": 0.0249, "step": 2400 }, { "epoch": 56.82, "learning_rate": 6.713557046979866e-05, "loss": 0.0291, "step": 2500 }, { "epoch": 56.82, "eval_las": 64.75594927913843, "eval_loss": 4.8261637687683105, "eval_runtime": 5.2311, "eval_samples_per_second": 152.931, "eval_steps_per_second": 19.116, "eval_uas": 73.19784610039952, "step": 2500 }, { "epoch": 59.09, "learning_rate": 6.659865771812081e-05, "loss": 0.0239, "step": 2600 }, { "epoch": 61.36, "learning_rate": 6.606174496644296e-05, "loss": 0.0216, "step": 2700 }, { "epoch": 63.64, "learning_rate": 6.55248322147651e-05, "loss": 0.0233, "step": 2800 }, { "epoch": 65.91, "learning_rate": 6.498791946308724e-05, "loss": 0.022, "step": 2900 }, { "epoch": 68.18, "learning_rate": 6.445100671140939e-05, "loss": 0.0234, "step": 3000 }, { "epoch": 68.18, "eval_las": 64.61698801459093, "eval_loss": 4.922646999359131, "eval_runtime": 5.2324, "eval_samples_per_second": 152.894, "eval_steps_per_second": 19.112, "eval_uas": 73.38023276011812, "step": 3000 }, { "epoch": 70.45, "learning_rate": 6.391409395973154e-05, "loss": 0.0252, "step": 3100 }, { "epoch": 72.73, "learning_rate": 6.337718120805369e-05, "loss": 0.0195, "step": 3200 }, { "epoch": 75.0, "learning_rate": 6.284026845637584e-05, "loss": 0.0196, "step": 3300 }, { "epoch": 77.27, "learning_rate": 6.230335570469799e-05, "loss": 0.0217, "step": 3400 }, { "epoch": 79.55, "learning_rate": 6.176644295302013e-05, "loss": 0.0156, "step": 3500 }, { "epoch": 79.55, "eval_las": 64.50408198714608, "eval_loss": 5.57904052734375, "eval_runtime": 5.2317, "eval_samples_per_second": 152.914, "eval_steps_per_second": 19.114, "eval_uas": 73.46708355046032, "step": 3500 }, { "epoch": 81.82, "learning_rate": 6.122953020134228e-05, "loss": 0.0181, "step": 3600 }, { "epoch": 84.09, "learning_rate": 6.069261744966444e-05, "loss": 0.0175, "step": 3700 }, { "epoch": 86.36, "learning_rate": 6.0155704697986585e-05, "loss": 0.0134, "step": 3800 }, { "epoch": 88.64, "learning_rate": 5.9618791946308734e-05, "loss": 0.0191, "step": 3900 }, { "epoch": 90.91, "learning_rate": 5.9081879194630875e-05, "loss": 0.0157, "step": 4000 }, { "epoch": 90.91, "eval_las": 64.5127670661803, "eval_loss": 5.583916664123535, "eval_runtime": 5.2413, "eval_samples_per_second": 152.634, "eval_steps_per_second": 19.079, "eval_uas": 73.11968038909154, "step": 4000 }, { "epoch": 93.18, "learning_rate": 5.854496644295302e-05, "loss": 0.012, "step": 4100 }, { "epoch": 95.45, "learning_rate": 5.800805369127517e-05, "loss": 0.0145, "step": 4200 }, { "epoch": 97.73, "learning_rate": 5.747114093959732e-05, "loss": 0.0156, "step": 4300 }, { "epoch": 100.0, "learning_rate": 5.693422818791947e-05, "loss": 0.0119, "step": 4400 }, { "epoch": 102.27, "learning_rate": 5.6397315436241616e-05, "loss": 0.0115, "step": 4500 }, { "epoch": 102.27, "eval_las": 64.7820045162411, "eval_loss": 6.002554893493652, "eval_runtime": 5.23, "eval_samples_per_second": 152.965, "eval_steps_per_second": 19.121, "eval_uas": 73.13705054715997, "step": 4500 }, { "epoch": 104.55, "learning_rate": 5.5860402684563764e-05, "loss": 0.0132, "step": 4600 }, { "epoch": 106.82, "learning_rate": 5.532348993288591e-05, "loss": 0.0136, "step": 4700 }, { "epoch": 109.09, "learning_rate": 5.478657718120806e-05, "loss": 0.0154, "step": 4800 }, { "epoch": 111.36, "learning_rate": 5.424966442953021e-05, "loss": 0.0106, "step": 4900 }, { "epoch": 113.64, "learning_rate": 5.371275167785236e-05, "loss": 0.0129, "step": 5000 }, { "epoch": 113.64, "eval_las": 64.47802675004343, "eval_loss": 5.8163933753967285, "eval_runtime": 5.2362, "eval_samples_per_second": 152.783, "eval_steps_per_second": 19.098, "eval_uas": 73.414973076255, "step": 5000 }, { "epoch": 115.91, "learning_rate": 5.31758389261745e-05, "loss": 0.0106, "step": 5100 }, { "epoch": 118.18, "learning_rate": 5.263892617449665e-05, "loss": 0.008, "step": 5200 }, { "epoch": 120.45, "learning_rate": 5.2102013422818795e-05, "loss": 0.009, "step": 5300 }, { "epoch": 122.73, "learning_rate": 5.1565100671140944e-05, "loss": 0.0111, "step": 5400 }, { "epoch": 125.0, "learning_rate": 5.102818791946309e-05, "loss": 0.0094, "step": 5500 }, { "epoch": 125.0, "eval_las": 64.67778356783047, "eval_loss": 5.964992523193359, "eval_runtime": 5.2463, "eval_samples_per_second": 152.489, "eval_steps_per_second": 19.061, "eval_uas": 73.24127149557062, "step": 5500 }, { "epoch": 127.27, "learning_rate": 5.049127516778524e-05, "loss": 0.0101, "step": 5600 }, { "epoch": 129.55, "learning_rate": 4.995436241610739e-05, "loss": 0.0086, "step": 5700 }, { "epoch": 131.82, "learning_rate": 4.941744966442954e-05, "loss": 0.0081, "step": 5800 }, { "epoch": 134.09, "learning_rate": 4.8880536912751685e-05, "loss": 0.0087, "step": 5900 }, { "epoch": 136.36, "learning_rate": 4.834362416107383e-05, "loss": 0.0067, "step": 6000 }, { "epoch": 136.36, "eval_las": 64.93833593885704, "eval_loss": 6.23219108581543, "eval_runtime": 5.2405, "eval_samples_per_second": 152.658, "eval_steps_per_second": 19.082, "eval_uas": 73.22390133750217, "step": 6000 }, { "epoch": 138.64, "learning_rate": 4.780671140939598e-05, "loss": 0.0083, "step": 6100 }, { "epoch": 140.91, "learning_rate": 4.726979865771813e-05, "loss": 0.0104, "step": 6200 }, { "epoch": 143.18, "learning_rate": 4.673288590604027e-05, "loss": 0.0056, "step": 6300 }, { "epoch": 145.45, "learning_rate": 4.619597315436242e-05, "loss": 0.0054, "step": 6400 }, { "epoch": 147.73, "learning_rate": 4.565906040268457e-05, "loss": 0.0079, "step": 6500 }, { "epoch": 147.73, "eval_las": 64.39986103873545, "eval_loss": 6.42156982421875, "eval_runtime": 5.2298, "eval_samples_per_second": 152.969, "eval_steps_per_second": 19.121, "eval_uas": 72.78096230675699, "step": 6500 }, { "epoch": 150.0, "learning_rate": 4.5122147651006716e-05, "loss": 0.0065, "step": 6600 }, { "epoch": 152.27, "learning_rate": 4.4585234899328864e-05, "loss": 0.007, "step": 6700 }, { "epoch": 154.55, "learning_rate": 4.404832214765101e-05, "loss": 0.0073, "step": 6800 }, { "epoch": 156.82, "learning_rate": 4.351140939597316e-05, "loss": 0.0044, "step": 6900 }, { "epoch": 159.09, "learning_rate": 4.297449664429531e-05, "loss": 0.0052, "step": 7000 }, { "epoch": 159.09, "eval_las": 64.4172311968039, "eval_loss": 6.679661750793457, "eval_runtime": 5.2549, "eval_samples_per_second": 152.237, "eval_steps_per_second": 19.03, "eval_uas": 72.98940420357826, "step": 7000 }, { "epoch": 161.36, "learning_rate": 4.243758389261746e-05, "loss": 0.0061, "step": 7100 }, { "epoch": 163.64, "learning_rate": 4.1900671140939605e-05, "loss": 0.0089, "step": 7200 }, { "epoch": 165.91, "learning_rate": 4.1363758389261754e-05, "loss": 0.0046, "step": 7300 }, { "epoch": 168.18, "learning_rate": 4.0826845637583895e-05, "loss": 0.0052, "step": 7400 }, { "epoch": 170.45, "learning_rate": 4.028993288590604e-05, "loss": 0.0047, "step": 7500 }, { "epoch": 170.45, "eval_las": 64.67778356783047, "eval_loss": 6.556234836578369, "eval_runtime": 5.2573, "eval_samples_per_second": 152.17, "eval_steps_per_second": 19.021, "eval_uas": 73.36286260204969, "step": 7500 }, { "epoch": 172.73, "learning_rate": 3.975302013422819e-05, "loss": 0.0047, "step": 7600 }, { "epoch": 175.0, "learning_rate": 3.921610738255034e-05, "loss": 0.0048, "step": 7700 }, { "epoch": 177.27, "learning_rate": 3.867919463087249e-05, "loss": 0.0047, "step": 7800 }, { "epoch": 179.55, "learning_rate": 3.8142281879194636e-05, "loss": 0.0047, "step": 7900 }, { "epoch": 181.82, "learning_rate": 3.7605369127516784e-05, "loss": 0.005, "step": 8000 }, { "epoch": 181.82, "eval_las": 64.84280006948063, "eval_loss": 6.436727523803711, "eval_runtime": 5.2395, "eval_samples_per_second": 152.686, "eval_steps_per_second": 19.086, "eval_uas": 73.31075212784435, "step": 8000 }, { "epoch": 184.09, "learning_rate": 3.706845637583893e-05, "loss": 0.0032, "step": 8100 }, { "epoch": 186.36, "learning_rate": 3.653154362416108e-05, "loss": 0.0044, "step": 8200 }, { "epoch": 188.64, "learning_rate": 3.599463087248322e-05, "loss": 0.0066, "step": 8300 }, { "epoch": 190.91, "learning_rate": 3.545771812080537e-05, "loss": 0.0024, "step": 8400 }, { "epoch": 193.18, "learning_rate": 3.492080536912752e-05, "loss": 0.004, "step": 8500 }, { "epoch": 193.18, "eval_las": 64.98176133402815, "eval_loss": 6.600097179412842, "eval_runtime": 5.2414, "eval_samples_per_second": 152.632, "eval_steps_per_second": 19.079, "eval_uas": 73.71026576341845, "step": 8500 }, { "epoch": 195.45, "learning_rate": 3.438389261744967e-05, "loss": 0.0021, "step": 8600 }, { "epoch": 197.73, "learning_rate": 3.3846979865771815e-05, "loss": 0.0035, "step": 8700 }, { "epoch": 200.0, "learning_rate": 3.3310067114093964e-05, "loss": 0.0033, "step": 8800 }, { "epoch": 202.27, "learning_rate": 3.277315436241611e-05, "loss": 0.0054, "step": 8900 }, { "epoch": 204.55, "learning_rate": 3.223624161073826e-05, "loss": 0.0033, "step": 9000 }, { "epoch": 204.55, "eval_las": 64.95570609692548, "eval_loss": 6.6512603759765625, "eval_runtime": 5.2201, "eval_samples_per_second": 153.255, "eval_steps_per_second": 19.157, "eval_uas": 73.4583984714261, "step": 9000 }, { "epoch": 206.82, "learning_rate": 3.169932885906041e-05, "loss": 0.0044, "step": 9100 }, { "epoch": 209.09, "learning_rate": 3.1162416107382557e-05, "loss": 0.002, "step": 9200 }, { "epoch": 211.36, "learning_rate": 3.0630872483221477e-05, "loss": 0.0018, "step": 9300 }, { "epoch": 213.64, "learning_rate": 3.0093959731543628e-05, "loss": 0.0022, "step": 9400 }, { "epoch": 215.91, "learning_rate": 2.9557046979865776e-05, "loss": 0.0023, "step": 9500 }, { "epoch": 215.91, "eval_las": 64.58224769845405, "eval_loss": 7.292083740234375, "eval_runtime": 5.2437, "eval_samples_per_second": 152.565, "eval_steps_per_second": 19.071, "eval_uas": 72.99808928261248, "step": 9500 }, { "epoch": 218.18, "learning_rate": 2.9020134228187925e-05, "loss": 0.0021, "step": 9600 }, { "epoch": 220.45, "learning_rate": 2.848322147651007e-05, "loss": 0.0025, "step": 9700 }, { "epoch": 222.73, "learning_rate": 2.7946308724832218e-05, "loss": 0.002, "step": 9800 }, { "epoch": 225.0, "learning_rate": 2.7409395973154366e-05, "loss": 0.0015, "step": 9900 }, { "epoch": 227.27, "learning_rate": 2.6872483221476514e-05, "loss": 0.0019, "step": 10000 }, { "epoch": 227.27, "eval_las": 65.11203751954143, "eval_loss": 7.293988227844238, "eval_runtime": 5.243, "eval_samples_per_second": 152.584, "eval_steps_per_second": 19.073, "eval_uas": 73.39760291818655, "step": 10000 }, { "epoch": 229.55, "learning_rate": 2.6335570469798663e-05, "loss": 0.0014, "step": 10100 }, { "epoch": 231.82, "learning_rate": 2.5798657718120804e-05, "loss": 0.0014, "step": 10200 }, { "epoch": 234.09, "learning_rate": 2.5261744966442952e-05, "loss": 0.0024, "step": 10300 }, { "epoch": 236.36, "learning_rate": 2.47248322147651e-05, "loss": 0.0013, "step": 10400 }, { "epoch": 238.64, "learning_rate": 2.4187919463087252e-05, "loss": 0.0015, "step": 10500 }, { "epoch": 238.64, "eval_las": 65.39864512767066, "eval_loss": 7.597438335418701, "eval_runtime": 5.2412, "eval_samples_per_second": 152.636, "eval_steps_per_second": 19.08, "eval_uas": 73.75369115858955, "step": 10500 }, { "epoch": 240.91, "learning_rate": 2.36510067114094e-05, "loss": 0.0015, "step": 10600 }, { "epoch": 243.18, "learning_rate": 2.311409395973155e-05, "loss": 0.0013, "step": 10700 }, { "epoch": 245.45, "learning_rate": 2.257718120805369e-05, "loss": 0.0015, "step": 10800 }, { "epoch": 247.73, "learning_rate": 2.204026845637584e-05, "loss": 0.0017, "step": 10900 }, { "epoch": 250.0, "learning_rate": 2.1503355704697987e-05, "loss": 0.0021, "step": 11000 }, { "epoch": 250.0, "eval_las": 64.95570609692548, "eval_loss": 7.345833778381348, "eval_runtime": 5.2465, "eval_samples_per_second": 152.483, "eval_steps_per_second": 19.06, "eval_uas": 73.4583984714261, "step": 11000 }, { "epoch": 252.27, "learning_rate": 2.0966442953020135e-05, "loss": 0.0009, "step": 11100 }, { "epoch": 254.55, "learning_rate": 2.0429530201342283e-05, "loss": 0.0009, "step": 11200 }, { "epoch": 256.82, "learning_rate": 1.989261744966443e-05, "loss": 0.0008, "step": 11300 }, { "epoch": 259.09, "learning_rate": 1.935570469798658e-05, "loss": 0.0011, "step": 11400 }, { "epoch": 261.36, "learning_rate": 1.8818791946308724e-05, "loss": 0.0024, "step": 11500 }, { "epoch": 261.36, "eval_las": 65.38996004863644, "eval_loss": 7.281766414642334, "eval_runtime": 5.2427, "eval_samples_per_second": 152.593, "eval_steps_per_second": 19.074, "eval_uas": 73.80580163279485, "step": 11500 }, { "epoch": 263.64, "learning_rate": 1.8281879194630873e-05, "loss": 0.0012, "step": 11600 }, { "epoch": 265.91, "learning_rate": 1.774496644295302e-05, "loss": 0.0018, "step": 11700 }, { "epoch": 268.18, "learning_rate": 1.720805369127517e-05, "loss": 0.0011, "step": 11800 }, { "epoch": 270.45, "learning_rate": 1.6671140939597317e-05, "loss": 0.0008, "step": 11900 }, { "epoch": 272.73, "learning_rate": 1.6134228187919466e-05, "loss": 0.0033, "step": 12000 }, { "epoch": 272.73, "eval_las": 65.47681083897864, "eval_loss": 7.280235767364502, "eval_runtime": 5.2413, "eval_samples_per_second": 152.633, "eval_steps_per_second": 19.079, "eval_uas": 73.65815528921314, "step": 12000 }, { "epoch": 275.0, "learning_rate": 1.559731543624161e-05, "loss": 0.0007, "step": 12100 }, { "epoch": 277.27, "learning_rate": 1.506040268456376e-05, "loss": 0.0003, "step": 12200 }, { "epoch": 279.55, "learning_rate": 1.4523489932885909e-05, "loss": 0.0008, "step": 12300 }, { "epoch": 281.82, "learning_rate": 1.3986577181208053e-05, "loss": 0.0009, "step": 12400 }, { "epoch": 284.09, "learning_rate": 1.3449664429530202e-05, "loss": 0.0005, "step": 12500 }, { "epoch": 284.09, "eval_las": 65.32916449539691, "eval_loss": 7.4806389808654785, "eval_runtime": 5.2418, "eval_samples_per_second": 152.62, "eval_steps_per_second": 19.078, "eval_uas": 73.71026576341845, "step": 12500 }, { "epoch": 286.36, "learning_rate": 1.2912751677852352e-05, "loss": 0.0005, "step": 12600 }, { "epoch": 288.64, "learning_rate": 1.2375838926174497e-05, "loss": 0.0004, "step": 12700 }, { "epoch": 290.91, "learning_rate": 1.1838926174496645e-05, "loss": 0.0005, "step": 12800 }, { "epoch": 293.18, "learning_rate": 1.1302013422818795e-05, "loss": 0.0006, "step": 12900 }, { "epoch": 295.45, "learning_rate": 1.076510067114094e-05, "loss": 0.0005, "step": 13000 }, { "epoch": 295.45, "eval_las": 65.25968386312316, "eval_loss": 7.441517353057861, "eval_runtime": 5.2481, "eval_samples_per_second": 152.435, "eval_steps_per_second": 19.054, "eval_uas": 73.53656418273407, "step": 13000 }, { "epoch": 297.73, "learning_rate": 1.0228187919463088e-05, "loss": 0.0005, "step": 13100 }, { "epoch": 300.0, "learning_rate": 9.691275167785236e-06, "loss": 0.0002, "step": 13200 }, { "epoch": 302.27, "learning_rate": 9.154362416107383e-06, "loss": 0.0002, "step": 13300 }, { "epoch": 304.55, "learning_rate": 8.617449664429531e-06, "loss": 0.0003, "step": 13400 }, { "epoch": 306.82, "learning_rate": 8.080536912751679e-06, "loss": 0.0003, "step": 13500 }, { "epoch": 306.82, "eval_las": 65.45075560187598, "eval_loss": 7.729467868804932, "eval_runtime": 5.2488, "eval_samples_per_second": 152.416, "eval_steps_per_second": 19.052, "eval_uas": 73.65815528921314, "step": 13500 }, { "epoch": 309.09, "learning_rate": 7.543624161073826e-06, "loss": 0.0002, "step": 13600 }, { "epoch": 311.36, "learning_rate": 7.006711409395974e-06, "loss": 0.0003, "step": 13700 }, { "epoch": 313.64, "learning_rate": 6.469798657718121e-06, "loss": 0.0003, "step": 13800 }, { "epoch": 315.91, "learning_rate": 5.932885906040269e-06, "loss": 0.0006, "step": 13900 }, { "epoch": 318.18, "learning_rate": 5.395973154362416e-06, "loss": 0.0002, "step": 14000 }, { "epoch": 318.18, "eval_las": 65.65919749869724, "eval_loss": 7.587600231170654, "eval_runtime": 5.2373, "eval_samples_per_second": 152.751, "eval_steps_per_second": 19.094, "eval_uas": 73.95344797637658, "step": 14000 }, { "epoch": 320.45, "learning_rate": 4.859060402684564e-06, "loss": 0.0002, "step": 14100 }, { "epoch": 322.73, "learning_rate": 4.322147651006712e-06, "loss": 0.0004, "step": 14200 }, { "epoch": 325.0, "learning_rate": 3.7852348993288595e-06, "loss": 0.0003, "step": 14300 }, { "epoch": 327.27, "learning_rate": 3.248322147651007e-06, "loss": 0.0002, "step": 14400 }, { "epoch": 329.55, "learning_rate": 2.7114093959731548e-06, "loss": 0.0004, "step": 14500 }, { "epoch": 329.55, "eval_las": 65.58971686642349, "eval_loss": 7.770998477935791, "eval_runtime": 5.2451, "eval_samples_per_second": 152.522, "eval_steps_per_second": 19.065, "eval_uas": 73.80580163279485, "step": 14500 }, { "epoch": 331.82, "learning_rate": 2.174496644295302e-06, "loss": 0.0002, "step": 14600 }, { "epoch": 334.09, "learning_rate": 1.6375838926174498e-06, "loss": 0.0002, "step": 14700 }, { "epoch": 336.36, "learning_rate": 1.1006711409395974e-06, "loss": 0.0002, "step": 14800 }, { "epoch": 338.64, "learning_rate": 5.63758389261745e-07, "loss": 0.0002, "step": 14900 }, { "epoch": 340.91, "learning_rate": 2.6845637583892618e-08, "loss": 0.0002, "step": 15000 }, { "epoch": 340.91, "eval_las": 65.53760639221818, "eval_loss": 7.7758870124816895, "eval_runtime": 5.264, "eval_samples_per_second": 151.977, "eval_steps_per_second": 18.997, "eval_uas": 73.83185686989752, "step": 15000 }, { "epoch": 340.91, "step": 15000, "total_flos": 7.969472217071616e+16, "train_loss": 0.04560325266600897, "train_runtime": 8075.9674, "train_samples_per_second": 59.436, "train_steps_per_second": 1.857 } ], "max_steps": 15000, "num_train_epochs": 341, "total_flos": 7.969472217071616e+16, "trial_name": null, "trial_params": null }