|
{ |
|
"best_metric": 1.1290708780288696, |
|
"best_model_checkpoint": "/home/ubuntu/ML/source_code/fine-tuned-legalroberta/checkpoint-17000", |
|
"epoch": 4.779308405960078, |
|
"global_step": 17000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9437728422828227e-05, |
|
"loss": 1.5399, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.5552507638931274, |
|
"eval_runtime": 2.1088, |
|
"eval_samples_per_second": 72.078, |
|
"eval_steps_per_second": 7.587, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8875456845656453e-05, |
|
"loss": 1.3513, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.3741689920425415, |
|
"eval_runtime": 2.1146, |
|
"eval_samples_per_second": 71.881, |
|
"eval_steps_per_second": 7.566, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.831318526848468e-05, |
|
"loss": 1.2809, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.3449822664260864, |
|
"eval_runtime": 2.1138, |
|
"eval_samples_per_second": 71.909, |
|
"eval_steps_per_second": 7.569, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7750913691312905e-05, |
|
"loss": 1.2484, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3560587167739868, |
|
"eval_runtime": 2.1078, |
|
"eval_samples_per_second": 72.113, |
|
"eval_steps_per_second": 7.591, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.718864211414113e-05, |
|
"loss": 1.2016, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.3128280639648438, |
|
"eval_runtime": 2.1023, |
|
"eval_samples_per_second": 72.303, |
|
"eval_steps_per_second": 7.611, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.662637053696936e-05, |
|
"loss": 1.1724, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.3004732131958008, |
|
"eval_runtime": 2.1125, |
|
"eval_samples_per_second": 71.951, |
|
"eval_steps_per_second": 7.574, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.606409895979758e-05, |
|
"loss": 1.1664, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.2773096561431885, |
|
"eval_runtime": 2.119, |
|
"eval_samples_per_second": 71.732, |
|
"eval_steps_per_second": 7.551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.550182738262581e-05, |
|
"loss": 1.1399, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 1.2295717000961304, |
|
"eval_runtime": 2.119, |
|
"eval_samples_per_second": 71.733, |
|
"eval_steps_per_second": 7.551, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.4939555805454037e-05, |
|
"loss": 1.1136, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 1.2814128398895264, |
|
"eval_runtime": 2.1037, |
|
"eval_samples_per_second": 72.255, |
|
"eval_steps_per_second": 7.606, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4377284228282262e-05, |
|
"loss": 1.1026, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 1.2653945684432983, |
|
"eval_runtime": 2.1041, |
|
"eval_samples_per_second": 72.24, |
|
"eval_steps_per_second": 7.604, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.3815012651110486e-05, |
|
"loss": 1.0943, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 1.210158109664917, |
|
"eval_runtime": 2.1061, |
|
"eval_samples_per_second": 72.17, |
|
"eval_steps_per_second": 7.597, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3252741073938714e-05, |
|
"loss": 1.0768, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 1.1943004131317139, |
|
"eval_runtime": 2.1055, |
|
"eval_samples_per_second": 72.191, |
|
"eval_steps_per_second": 7.599, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.269046949676694e-05, |
|
"loss": 1.0741, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 1.2276432514190674, |
|
"eval_runtime": 2.1096, |
|
"eval_samples_per_second": 72.05, |
|
"eval_steps_per_second": 7.584, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.2128197919595167e-05, |
|
"loss": 1.0561, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.1935926675796509, |
|
"eval_runtime": 2.1073, |
|
"eval_samples_per_second": 72.129, |
|
"eval_steps_per_second": 7.593, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.1565926342423393e-05, |
|
"loss": 1.0384, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.2100528478622437, |
|
"eval_runtime": 2.1142, |
|
"eval_samples_per_second": 71.894, |
|
"eval_steps_per_second": 7.568, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.1003654765251617e-05, |
|
"loss": 1.037, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 1.1716400384902954, |
|
"eval_runtime": 2.1168, |
|
"eval_samples_per_second": 71.805, |
|
"eval_steps_per_second": 7.558, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0441383188079844e-05, |
|
"loss": 1.0295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.1552788019180298, |
|
"eval_runtime": 2.114, |
|
"eval_samples_per_second": 71.901, |
|
"eval_steps_per_second": 7.569, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.87911161090807e-06, |
|
"loss": 1.0255, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.211492896080017, |
|
"eval_runtime": 2.1048, |
|
"eval_samples_per_second": 72.217, |
|
"eval_steps_per_second": 7.602, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.316840033736295e-06, |
|
"loss": 1.0207, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.1855554580688477, |
|
"eval_runtime": 2.1112, |
|
"eval_samples_per_second": 71.998, |
|
"eval_steps_per_second": 7.579, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 8.754568456564521e-06, |
|
"loss": 1.0097, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 1.1690603494644165, |
|
"eval_runtime": 2.1041, |
|
"eval_samples_per_second": 72.238, |
|
"eval_steps_per_second": 7.604, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.192296879392747e-06, |
|
"loss": 0.9996, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 1.1681824922561646, |
|
"eval_runtime": 2.107, |
|
"eval_samples_per_second": 72.141, |
|
"eval_steps_per_second": 7.594, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.630025302220974e-06, |
|
"loss": 0.9912, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 1.200119972229004, |
|
"eval_runtime": 2.1358, |
|
"eval_samples_per_second": 71.168, |
|
"eval_steps_per_second": 7.491, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 7.067753725049199e-06, |
|
"loss": 0.9844, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 1.129595160484314, |
|
"eval_runtime": 2.111, |
|
"eval_samples_per_second": 72.004, |
|
"eval_steps_per_second": 7.579, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.505482147877426e-06, |
|
"loss": 0.9908, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 1.1818418502807617, |
|
"eval_runtime": 2.1165, |
|
"eval_samples_per_second": 71.817, |
|
"eval_steps_per_second": 7.56, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 5.943210570705651e-06, |
|
"loss": 0.9798, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 1.1727079153060913, |
|
"eval_runtime": 2.1056, |
|
"eval_samples_per_second": 72.188, |
|
"eval_steps_per_second": 7.599, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 5.380938993533877e-06, |
|
"loss": 0.9699, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 1.2000970840454102, |
|
"eval_runtime": 2.1003, |
|
"eval_samples_per_second": 72.372, |
|
"eval_steps_per_second": 7.618, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.818667416362104e-06, |
|
"loss": 0.9706, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 1.1695332527160645, |
|
"eval_runtime": 2.1004, |
|
"eval_samples_per_second": 72.366, |
|
"eval_steps_per_second": 7.617, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.2563958391903294e-06, |
|
"loss": 0.974, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 1.1403844356536865, |
|
"eval_runtime": 2.1018, |
|
"eval_samples_per_second": 72.318, |
|
"eval_steps_per_second": 7.612, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.694124262018555e-06, |
|
"loss": 0.9561, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 1.1319295167922974, |
|
"eval_runtime": 2.1093, |
|
"eval_samples_per_second": 72.062, |
|
"eval_steps_per_second": 7.586, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.1318526848467813e-06, |
|
"loss": 0.9598, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 1.1552999019622803, |
|
"eval_runtime": 2.116, |
|
"eval_samples_per_second": 71.835, |
|
"eval_steps_per_second": 7.562, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.569581107675007e-06, |
|
"loss": 0.9626, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 1.1351696252822876, |
|
"eval_runtime": 2.1126, |
|
"eval_samples_per_second": 71.95, |
|
"eval_steps_per_second": 7.574, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.007309530503233e-06, |
|
"loss": 0.9631, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 1.181087851524353, |
|
"eval_runtime": 2.1067, |
|
"eval_samples_per_second": 72.151, |
|
"eval_steps_per_second": 7.595, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 1.4450379533314593e-06, |
|
"loss": 0.9485, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_loss": 1.1709717512130737, |
|
"eval_runtime": 2.1077, |
|
"eval_samples_per_second": 72.118, |
|
"eval_steps_per_second": 7.591, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 8.827663761596852e-07, |
|
"loss": 0.946, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 1.1290708780288696, |
|
"eval_runtime": 2.1091, |
|
"eval_samples_per_second": 72.067, |
|
"eval_steps_per_second": 7.586, |
|
"step": 17000 |
|
} |
|
], |
|
"max_steps": 17785, |
|
"num_train_epochs": 5, |
|
"total_flos": 4.47488103892009e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|