|
{ |
|
"best_metric": 74.99219127126105, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Hindi-HDTB/checkpoint-15000", |
|
"epoch": 36.05769230769231, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.52e-05, |
|
"loss": 4.0483, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.949530201342283e-05, |
|
"loss": 1.7391, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.895838926174497e-05, |
|
"loss": 1.4589, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.842147651006712e-05, |
|
"loss": 1.2986, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.788456375838927e-05, |
|
"loss": 1.1463, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_las": 68.06655876423319, |
|
"eval_loss": 1.2132718563079834, |
|
"eval_runtime": 11.8598, |
|
"eval_samples_per_second": 139.885, |
|
"eval_steps_per_second": 17.538, |
|
"eval_uas": 77.91691512621746, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.734765100671142e-05, |
|
"loss": 1.1162, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.681073825503357e-05, |
|
"loss": 1.0738, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.627382550335572e-05, |
|
"loss": 1.0485, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 7.573691275167786e-05, |
|
"loss": 0.9272, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.52e-05, |
|
"loss": 0.901, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_las": 71.78635318170203, |
|
"eval_loss": 1.053443193435669, |
|
"eval_runtime": 11.8478, |
|
"eval_samples_per_second": 140.026, |
|
"eval_steps_per_second": 17.556, |
|
"eval_uas": 80.90410881108555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.466308724832215e-05, |
|
"loss": 0.8841, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.41261744966443e-05, |
|
"loss": 0.8871, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.358926174496644e-05, |
|
"loss": 0.7905, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 7.305234899328859e-05, |
|
"loss": 0.7284, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 7.251543624161074e-05, |
|
"loss": 0.7392, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_las": 73.2231592696709, |
|
"eval_loss": 1.0388336181640625, |
|
"eval_runtime": 11.8527, |
|
"eval_samples_per_second": 139.968, |
|
"eval_steps_per_second": 17.549, |
|
"eval_uas": 81.9320214669052, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.197852348993289e-05, |
|
"loss": 0.7587, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 7.144161073825504e-05, |
|
"loss": 0.6997, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 7.090469798657718e-05, |
|
"loss": 0.5892, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.036778523489933e-05, |
|
"loss": 0.6111, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 6.983087248322148e-05, |
|
"loss": 0.6103, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_las": 73.0783428457847, |
|
"eval_loss": 1.0303574800491333, |
|
"eval_runtime": 11.8592, |
|
"eval_samples_per_second": 139.892, |
|
"eval_steps_per_second": 17.539, |
|
"eval_uas": 81.92350285373541, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 6.929395973154363e-05, |
|
"loss": 0.6054, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 6.875704697986578e-05, |
|
"loss": 0.4648, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 6.822013422818793e-05, |
|
"loss": 0.4894, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 6.768322147651007e-05, |
|
"loss": 0.5087, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 6.714630872483222e-05, |
|
"loss": 0.5131, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_las": 73.69452253173183, |
|
"eval_loss": 1.1480714082717896, |
|
"eval_runtime": 11.8558, |
|
"eval_samples_per_second": 139.932, |
|
"eval_steps_per_second": 17.544, |
|
"eval_uas": 82.44881733253825, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 6.660939597315437e-05, |
|
"loss": 0.3686, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 6.607248322147652e-05, |
|
"loss": 0.381, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 6.553557046979867e-05, |
|
"loss": 0.408, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 6.499865771812081e-05, |
|
"loss": 0.4176, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 6.446174496644296e-05, |
|
"loss": 0.3048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_las": 73.29698725047562, |
|
"eval_loss": 1.3656591176986694, |
|
"eval_runtime": 11.8603, |
|
"eval_samples_per_second": 139.878, |
|
"eval_steps_per_second": 17.537, |
|
"eval_uas": 82.40338472896612, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 6.392483221476511e-05, |
|
"loss": 0.2996, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 6.338791946308726e-05, |
|
"loss": 0.3191, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 6.28510067114094e-05, |
|
"loss": 0.3222, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 6.231409395973154e-05, |
|
"loss": 0.2624, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 6.177718120805369e-05, |
|
"loss": 0.2485, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_las": 73.33106170315473, |
|
"eval_loss": 1.4516910314559937, |
|
"eval_runtime": 11.8613, |
|
"eval_samples_per_second": 139.867, |
|
"eval_steps_per_second": 17.536, |
|
"eval_uas": 82.43461964392196, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.124026845637584e-05, |
|
"loss": 0.247, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 6.070335570469799e-05, |
|
"loss": 0.269, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 6.0166442953020136e-05, |
|
"loss": 0.2213, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 5.962953020134229e-05, |
|
"loss": 0.1917, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 5.909261744966444e-05, |
|
"loss": 0.2013, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_las": 73.58945963597127, |
|
"eval_loss": 1.5729166269302368, |
|
"eval_runtime": 11.8551, |
|
"eval_samples_per_second": 139.94, |
|
"eval_steps_per_second": 17.545, |
|
"eval_uas": 82.28980322003578, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 5.855570469798659e-05, |
|
"loss": 0.2064, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.8018791946308735e-05, |
|
"loss": 0.1845, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 5.7481879194630884e-05, |
|
"loss": 0.1576, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 5.694496644295303e-05, |
|
"loss": 0.1647, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 5.6408053691275166e-05, |
|
"loss": 0.17, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_las": 73.72859698441094, |
|
"eval_loss": 1.6659384965896606, |
|
"eval_runtime": 11.8486, |
|
"eval_samples_per_second": 140.016, |
|
"eval_steps_per_second": 17.555, |
|
"eval_uas": 82.71289434080131, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 5.5871140939597315e-05, |
|
"loss": 0.1642, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 5.533422818791946e-05, |
|
"loss": 0.1259, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 5.479731543624161e-05, |
|
"loss": 0.1365, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 5.426040268456376e-05, |
|
"loss": 0.1398, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 5.372348993288591e-05, |
|
"loss": 0.1447, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"eval_las": 74.01539029446006, |
|
"eval_loss": 1.8783390522003174, |
|
"eval_runtime": 11.8594, |
|
"eval_samples_per_second": 139.889, |
|
"eval_steps_per_second": 17.539, |
|
"eval_uas": 82.87474799102706, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 5.3186577181208056e-05, |
|
"loss": 0.1091, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 5.2649664429530204e-05, |
|
"loss": 0.1146, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 5.211275167785235e-05, |
|
"loss": 0.1186, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 5.15758389261745e-05, |
|
"loss": 0.1171, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 5.1038926174496656e-05, |
|
"loss": 0.0908, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"eval_las": 73.81094357838543, |
|
"eval_loss": 2.0186927318573, |
|
"eval_runtime": 11.8461, |
|
"eval_samples_per_second": 140.046, |
|
"eval_steps_per_second": 17.558, |
|
"eval_uas": 82.64758497316636, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 5.050201342281879e-05, |
|
"loss": 0.0969, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 4.996510067114094e-05, |
|
"loss": 0.099, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 4.942818791946309e-05, |
|
"loss": 0.0998, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.8891275167785235e-05, |
|
"loss": 0.078, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 4.835436241610738e-05, |
|
"loss": 0.0827, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_las": 74.11761365249737, |
|
"eval_loss": 2.0184013843536377, |
|
"eval_runtime": 11.8521, |
|
"eval_samples_per_second": 139.975, |
|
"eval_steps_per_second": 17.55, |
|
"eval_uas": 82.9343782832155, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 4.781744966442953e-05, |
|
"loss": 0.084, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.728053691275168e-05, |
|
"loss": 0.0891, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 4.674362416107383e-05, |
|
"loss": 0.076, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.6206711409395976e-05, |
|
"loss": 0.0747, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 4.5669798657718125e-05, |
|
"loss": 0.0738, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_las": 74.12045319022063, |
|
"eval_loss": 2.130760431289673, |
|
"eval_runtime": 11.8527, |
|
"eval_samples_per_second": 139.968, |
|
"eval_steps_per_second": 17.549, |
|
"eval_uas": 82.88042706647359, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 4.513288590604027e-05, |
|
"loss": 0.0725, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 4.459597315436242e-05, |
|
"loss": 0.0669, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 4.405906040268456e-05, |
|
"loss": 0.0632, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 4.352214765100671e-05, |
|
"loss": 0.0649, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 4.298523489932886e-05, |
|
"loss": 0.0667, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"eval_las": 74.06650197347872, |
|
"eval_loss": 2.201524019241333, |
|
"eval_runtime": 11.8493, |
|
"eval_samples_per_second": 140.008, |
|
"eval_steps_per_second": 17.554, |
|
"eval_uas": 82.75548740665019, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 4.244832214765101e-05, |
|
"loss": 0.0607, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 4.1911409395973156e-05, |
|
"loss": 0.0532, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 4.1374496644295304e-05, |
|
"loss": 0.0544, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 4.083758389261745e-05, |
|
"loss": 0.0558, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 4.03006711409396e-05, |
|
"loss": 0.0537, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_las": 74.13465087883692, |
|
"eval_loss": 2.413492202758789, |
|
"eval_runtime": 11.8547, |
|
"eval_samples_per_second": 139.944, |
|
"eval_steps_per_second": 17.546, |
|
"eval_uas": 82.8719084533038, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 3.976375838926175e-05, |
|
"loss": 0.0462, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 3.92268456375839e-05, |
|
"loss": 0.0484, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 3.8689932885906045e-05, |
|
"loss": 0.0477, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 3.815302013422819e-05, |
|
"loss": 0.0527, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 3.761610738255034e-05, |
|
"loss": 0.0394, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_las": 74.10625550160434, |
|
"eval_loss": 2.4247446060180664, |
|
"eval_runtime": 11.8548, |
|
"eval_samples_per_second": 139.943, |
|
"eval_steps_per_second": 17.546, |
|
"eval_uas": 82.85203168924099, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 3.707919463087249e-05, |
|
"loss": 0.0407, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 3.654228187919463e-05, |
|
"loss": 0.045, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 3.600536912751678e-05, |
|
"loss": 0.044, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 3.546845637583893e-05, |
|
"loss": 0.0336, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 3.4931543624161076e-05, |
|
"loss": 0.0358, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"eval_las": 74.19996024647187, |
|
"eval_loss": 2.439199209213257, |
|
"eval_runtime": 11.8496, |
|
"eval_samples_per_second": 140.005, |
|
"eval_steps_per_second": 17.553, |
|
"eval_uas": 83.01672487719, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 20.67, |
|
"learning_rate": 3.4394630872483224e-05, |
|
"loss": 0.0352, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 3.385771812080537e-05, |
|
"loss": 0.038, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 3.332080536912752e-05, |
|
"loss": 0.0314, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"learning_rate": 3.278389261744967e-05, |
|
"loss": 0.0323, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 3.224697986577182e-05, |
|
"loss": 0.0293, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"eval_las": 74.28230684044638, |
|
"eval_loss": 2.6793813705444336, |
|
"eval_runtime": 11.8495, |
|
"eval_samples_per_second": 140.006, |
|
"eval_steps_per_second": 17.553, |
|
"eval_uas": 82.96277366044808, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"learning_rate": 3.1710067114093965e-05, |
|
"loss": 0.0332, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 3.1173154362416114e-05, |
|
"loss": 0.0302, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 22.36, |
|
"learning_rate": 3.0636241610738255e-05, |
|
"loss": 0.0268, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"learning_rate": 3.0099328859060403e-05, |
|
"loss": 0.0273, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"learning_rate": 2.956241610738255e-05, |
|
"loss": 0.0275, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"eval_las": 74.25391146321378, |
|
"eval_loss": 2.634089231491089, |
|
"eval_runtime": 11.8524, |
|
"eval_samples_per_second": 139.972, |
|
"eval_steps_per_second": 17.549, |
|
"eval_uas": 82.99400857540392, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 2.90255033557047e-05, |
|
"loss": 0.0277, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 2.8488590604026848e-05, |
|
"loss": 0.0245, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 2.7951677852348996e-05, |
|
"loss": 0.0228, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 2.741476510067114e-05, |
|
"loss": 0.0242, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 2.687785234899329e-05, |
|
"loss": 0.0235, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"eval_las": 74.29366499133941, |
|
"eval_loss": 2.7188923358917236, |
|
"eval_runtime": 11.8499, |
|
"eval_samples_per_second": 140.001, |
|
"eval_steps_per_second": 17.553, |
|
"eval_uas": 83.09339239571798, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 2.6340939597315438e-05, |
|
"loss": 0.0207, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 2.5804026845637586e-05, |
|
"loss": 0.0204, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"learning_rate": 2.5267114093959734e-05, |
|
"loss": 0.0239, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2.4730201342281882e-05, |
|
"loss": 0.0218, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 2.4193288590604027e-05, |
|
"loss": 0.0167, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"eval_las": 74.49243263196752, |
|
"eval_loss": 2.821323871612549, |
|
"eval_runtime": 11.852, |
|
"eval_samples_per_second": 139.976, |
|
"eval_steps_per_second": 17.55, |
|
"eval_uas": 83.17005991424597, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 2.3656375838926175e-05, |
|
"loss": 0.0189, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 2.3119463087248324e-05, |
|
"loss": 0.0189, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"learning_rate": 2.2582550335570472e-05, |
|
"loss": 0.0202, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"learning_rate": 2.204563758389262e-05, |
|
"loss": 0.0161, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"learning_rate": 2.150872483221477e-05, |
|
"loss": 0.0157, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"eval_las": 74.57193968821876, |
|
"eval_loss": 2.849531650543213, |
|
"eval_runtime": 11.8513, |
|
"eval_samples_per_second": 139.985, |
|
"eval_steps_per_second": 17.551, |
|
"eval_uas": 83.22117159326461, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 26.68, |
|
"learning_rate": 2.0971812080536913e-05, |
|
"loss": 0.0166, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 2.043489932885906e-05, |
|
"loss": 0.0156, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"learning_rate": 1.989798657718121e-05, |
|
"loss": 0.0143, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 27.4, |
|
"learning_rate": 1.9361073825503358e-05, |
|
"loss": 0.0141, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"learning_rate": 1.8824161073825503e-05, |
|
"loss": 0.014, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"eval_las": 74.45267910384189, |
|
"eval_loss": 2.8970541954040527, |
|
"eval_runtime": 11.8508, |
|
"eval_samples_per_second": 139.991, |
|
"eval_steps_per_second": 17.552, |
|
"eval_uas": 83.05931794303886, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 27.88, |
|
"learning_rate": 1.828724832214765e-05, |
|
"loss": 0.0135, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 1.77503355704698e-05, |
|
"loss": 0.0124, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 1.7213422818791948e-05, |
|
"loss": 0.0112, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 28.61, |
|
"learning_rate": 1.6676510067114096e-05, |
|
"loss": 0.0115, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"learning_rate": 1.6139597315436244e-05, |
|
"loss": 0.0104, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 28.85, |
|
"eval_las": 74.57761876366527, |
|
"eval_loss": 2.9912426471710205, |
|
"eval_runtime": 11.851, |
|
"eval_samples_per_second": 139.988, |
|
"eval_steps_per_second": 17.551, |
|
"eval_uas": 83.14734361245989, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"learning_rate": 1.560268456375839e-05, |
|
"loss": 0.0106, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 1.5071140939597317e-05, |
|
"loss": 0.0101, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 1.4534228187919464e-05, |
|
"loss": 0.0096, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 29.81, |
|
"learning_rate": 1.3997315436241612e-05, |
|
"loss": 0.0093, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"learning_rate": 1.346040268456376e-05, |
|
"loss": 0.0093, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"eval_las": 74.54922338643269, |
|
"eval_loss": 3.005786418914795, |
|
"eval_runtime": 11.8568, |
|
"eval_samples_per_second": 139.92, |
|
"eval_steps_per_second": 17.543, |
|
"eval_uas": 83.15586222562966, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 30.29, |
|
"learning_rate": 1.2923489932885907e-05, |
|
"loss": 0.0084, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"learning_rate": 1.2386577181208055e-05, |
|
"loss": 0.0088, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 1.1849664429530204e-05, |
|
"loss": 0.0075, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 1.131275167785235e-05, |
|
"loss": 0.0094, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 1.0775838926174498e-05, |
|
"loss": 0.0071, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"eval_las": 74.84453530965159, |
|
"eval_loss": 3.0800094604492188, |
|
"eval_runtime": 11.8601, |
|
"eval_samples_per_second": 139.881, |
|
"eval_steps_per_second": 17.538, |
|
"eval_uas": 83.40574154527643, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 31.49, |
|
"learning_rate": 1.0238926174496647e-05, |
|
"loss": 0.0065, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 31.73, |
|
"learning_rate": 9.702013422818793e-06, |
|
"loss": 0.0067, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 31.97, |
|
"learning_rate": 9.165100671140941e-06, |
|
"loss": 0.0069, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 8.628187919463088e-06, |
|
"loss": 0.0056, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 8.091275167785234e-06, |
|
"loss": 0.0052, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"eval_las": 74.84453530965159, |
|
"eval_loss": 3.167156219482422, |
|
"eval_runtime": 11.8563, |
|
"eval_samples_per_second": 139.926, |
|
"eval_steps_per_second": 17.543, |
|
"eval_uas": 83.41993923389272, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 32.69, |
|
"learning_rate": 7.5543624161073835e-06, |
|
"loss": 0.0057, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 32.93, |
|
"learning_rate": 7.017449664429531e-06, |
|
"loss": 0.0057, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 33.17, |
|
"learning_rate": 6.480536912751678e-06, |
|
"loss": 0.0054, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 33.41, |
|
"learning_rate": 5.943624161073826e-06, |
|
"loss": 0.0048, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 5.406711409395974e-06, |
|
"loss": 0.0054, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"eval_las": 74.90132606411676, |
|
"eval_loss": 3.178938627243042, |
|
"eval_runtime": 11.8514, |
|
"eval_samples_per_second": 139.983, |
|
"eval_steps_per_second": 17.551, |
|
"eval_uas": 83.49376721469744, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 33.89, |
|
"learning_rate": 4.869798657718121e-06, |
|
"loss": 0.0049, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 4.332885906040269e-06, |
|
"loss": 0.0039, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 34.38, |
|
"learning_rate": 3.795973154362416e-06, |
|
"loss": 0.005, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 34.62, |
|
"learning_rate": 3.259060402684564e-06, |
|
"loss": 0.004, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"learning_rate": 2.722147651006712e-06, |
|
"loss": 0.0042, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"eval_las": 74.9325609790726, |
|
"eval_loss": 3.2010536193847656, |
|
"eval_runtime": 11.8566, |
|
"eval_samples_per_second": 139.922, |
|
"eval_steps_per_second": 17.543, |
|
"eval_uas": 83.53636028054633, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 35.1, |
|
"learning_rate": 2.185234899328859e-06, |
|
"loss": 0.0038, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 35.34, |
|
"learning_rate": 1.648322147651007e-06, |
|
"loss": 0.0045, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"learning_rate": 1.1114093959731544e-06, |
|
"loss": 0.0036, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 35.82, |
|
"learning_rate": 5.74496644295302e-07, |
|
"loss": 0.0042, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"learning_rate": 3.758389261744967e-08, |
|
"loss": 0.0037, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"eval_las": 74.99219127126105, |
|
"eval_loss": 3.1982181072235107, |
|
"eval_runtime": 11.8515, |
|
"eval_samples_per_second": 139.982, |
|
"eval_steps_per_second": 17.55, |
|
"eval_uas": 83.58747195956498, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 36.06, |
|
"step": 15000, |
|
"total_flos": 8.010080992700006e+16, |
|
"train_loss": 0.2272949548403422, |
|
"train_runtime": 8371.5337, |
|
"train_samples_per_second": 57.337, |
|
"train_steps_per_second": 1.792 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 37, |
|
"total_flos": 8.010080992700006e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|