{ "best_metric": 1.1290708780288696, "best_model_checkpoint": "/home/ubuntu/ML/source_code/fine-tuned-legalroberta/checkpoint-17000", "epoch": 4.779308405960078, "global_step": 17000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 1.9437728422828227e-05, "loss": 1.5399, "step": 500 }, { "epoch": 0.14, "eval_loss": 1.5552507638931274, "eval_runtime": 2.1088, "eval_samples_per_second": 72.078, "eval_steps_per_second": 7.587, "step": 500 }, { "epoch": 0.28, "learning_rate": 1.8875456845656453e-05, "loss": 1.3513, "step": 1000 }, { "epoch": 0.28, "eval_loss": 1.3741689920425415, "eval_runtime": 2.1146, "eval_samples_per_second": 71.881, "eval_steps_per_second": 7.566, "step": 1000 }, { "epoch": 0.42, "learning_rate": 1.831318526848468e-05, "loss": 1.2809, "step": 1500 }, { "epoch": 0.42, "eval_loss": 1.3449822664260864, "eval_runtime": 2.1138, "eval_samples_per_second": 71.909, "eval_steps_per_second": 7.569, "step": 1500 }, { "epoch": 0.56, "learning_rate": 1.7750913691312905e-05, "loss": 1.2484, "step": 2000 }, { "epoch": 0.56, "eval_loss": 1.3560587167739868, "eval_runtime": 2.1078, "eval_samples_per_second": 72.113, "eval_steps_per_second": 7.591, "step": 2000 }, { "epoch": 0.7, "learning_rate": 1.718864211414113e-05, "loss": 1.2016, "step": 2500 }, { "epoch": 0.7, "eval_loss": 1.3128280639648438, "eval_runtime": 2.1023, "eval_samples_per_second": 72.303, "eval_steps_per_second": 7.611, "step": 2500 }, { "epoch": 0.84, "learning_rate": 1.662637053696936e-05, "loss": 1.1724, "step": 3000 }, { "epoch": 0.84, "eval_loss": 1.3004732131958008, "eval_runtime": 2.1125, "eval_samples_per_second": 71.951, "eval_steps_per_second": 7.574, "step": 3000 }, { "epoch": 0.98, "learning_rate": 1.606409895979758e-05, "loss": 1.1664, "step": 3500 }, { "epoch": 0.98, "eval_loss": 1.2773096561431885, "eval_runtime": 2.119, "eval_samples_per_second": 71.732, "eval_steps_per_second": 7.551, "step": 3500 }, { "epoch": 1.12, "learning_rate": 1.550182738262581e-05, "loss": 1.1399, "step": 4000 }, { "epoch": 1.12, "eval_loss": 1.2295717000961304, "eval_runtime": 2.119, "eval_samples_per_second": 71.733, "eval_steps_per_second": 7.551, "step": 4000 }, { "epoch": 1.27, "learning_rate": 1.4939555805454037e-05, "loss": 1.1136, "step": 4500 }, { "epoch": 1.27, "eval_loss": 1.2814128398895264, "eval_runtime": 2.1037, "eval_samples_per_second": 72.255, "eval_steps_per_second": 7.606, "step": 4500 }, { "epoch": 1.41, "learning_rate": 1.4377284228282262e-05, "loss": 1.1026, "step": 5000 }, { "epoch": 1.41, "eval_loss": 1.2653945684432983, "eval_runtime": 2.1041, "eval_samples_per_second": 72.24, "eval_steps_per_second": 7.604, "step": 5000 }, { "epoch": 1.55, "learning_rate": 1.3815012651110486e-05, "loss": 1.0943, "step": 5500 }, { "epoch": 1.55, "eval_loss": 1.210158109664917, "eval_runtime": 2.1061, "eval_samples_per_second": 72.17, "eval_steps_per_second": 7.597, "step": 5500 }, { "epoch": 1.69, "learning_rate": 1.3252741073938714e-05, "loss": 1.0768, "step": 6000 }, { "epoch": 1.69, "eval_loss": 1.1943004131317139, "eval_runtime": 2.1055, "eval_samples_per_second": 72.191, "eval_steps_per_second": 7.599, "step": 6000 }, { "epoch": 1.83, "learning_rate": 1.269046949676694e-05, "loss": 1.0741, "step": 6500 }, { "epoch": 1.83, "eval_loss": 1.2276432514190674, "eval_runtime": 2.1096, "eval_samples_per_second": 72.05, "eval_steps_per_second": 7.584, "step": 6500 }, { "epoch": 1.97, "learning_rate": 1.2128197919595167e-05, "loss": 1.0561, "step": 7000 }, { "epoch": 1.97, "eval_loss": 1.1935926675796509, "eval_runtime": 2.1073, "eval_samples_per_second": 72.129, "eval_steps_per_second": 7.593, "step": 7000 }, { "epoch": 2.11, "learning_rate": 1.1565926342423393e-05, "loss": 1.0384, "step": 7500 }, { "epoch": 2.11, "eval_loss": 1.2100528478622437, "eval_runtime": 2.1142, "eval_samples_per_second": 71.894, "eval_steps_per_second": 7.568, "step": 7500 }, { "epoch": 2.25, "learning_rate": 1.1003654765251617e-05, "loss": 1.037, "step": 8000 }, { "epoch": 2.25, "eval_loss": 1.1716400384902954, "eval_runtime": 2.1168, "eval_samples_per_second": 71.805, "eval_steps_per_second": 7.558, "step": 8000 }, { "epoch": 2.39, "learning_rate": 1.0441383188079844e-05, "loss": 1.0295, "step": 8500 }, { "epoch": 2.39, "eval_loss": 1.1552788019180298, "eval_runtime": 2.114, "eval_samples_per_second": 71.901, "eval_steps_per_second": 7.569, "step": 8500 }, { "epoch": 2.53, "learning_rate": 9.87911161090807e-06, "loss": 1.0255, "step": 9000 }, { "epoch": 2.53, "eval_loss": 1.211492896080017, "eval_runtime": 2.1048, "eval_samples_per_second": 72.217, "eval_steps_per_second": 7.602, "step": 9000 }, { "epoch": 2.67, "learning_rate": 9.316840033736295e-06, "loss": 1.0207, "step": 9500 }, { "epoch": 2.67, "eval_loss": 1.1855554580688477, "eval_runtime": 2.1112, "eval_samples_per_second": 71.998, "eval_steps_per_second": 7.579, "step": 9500 }, { "epoch": 2.81, "learning_rate": 8.754568456564521e-06, "loss": 1.0097, "step": 10000 }, { "epoch": 2.81, "eval_loss": 1.1690603494644165, "eval_runtime": 2.1041, "eval_samples_per_second": 72.238, "eval_steps_per_second": 7.604, "step": 10000 }, { "epoch": 2.95, "learning_rate": 8.192296879392747e-06, "loss": 0.9996, "step": 10500 }, { "epoch": 2.95, "eval_loss": 1.1681824922561646, "eval_runtime": 2.107, "eval_samples_per_second": 72.141, "eval_steps_per_second": 7.594, "step": 10500 }, { "epoch": 3.09, "learning_rate": 7.630025302220974e-06, "loss": 0.9912, "step": 11000 }, { "epoch": 3.09, "eval_loss": 1.200119972229004, "eval_runtime": 2.1358, "eval_samples_per_second": 71.168, "eval_steps_per_second": 7.491, "step": 11000 }, { "epoch": 3.23, "learning_rate": 7.067753725049199e-06, "loss": 0.9844, "step": 11500 }, { "epoch": 3.23, "eval_loss": 1.129595160484314, "eval_runtime": 2.111, "eval_samples_per_second": 72.004, "eval_steps_per_second": 7.579, "step": 11500 }, { "epoch": 3.37, "learning_rate": 6.505482147877426e-06, "loss": 0.9908, "step": 12000 }, { "epoch": 3.37, "eval_loss": 1.1818418502807617, "eval_runtime": 2.1165, "eval_samples_per_second": 71.817, "eval_steps_per_second": 7.56, "step": 12000 }, { "epoch": 3.51, "learning_rate": 5.943210570705651e-06, "loss": 0.9798, "step": 12500 }, { "epoch": 3.51, "eval_loss": 1.1727079153060913, "eval_runtime": 2.1056, "eval_samples_per_second": 72.188, "eval_steps_per_second": 7.599, "step": 12500 }, { "epoch": 3.65, "learning_rate": 5.380938993533877e-06, "loss": 0.9699, "step": 13000 }, { "epoch": 3.65, "eval_loss": 1.2000970840454102, "eval_runtime": 2.1003, "eval_samples_per_second": 72.372, "eval_steps_per_second": 7.618, "step": 13000 }, { "epoch": 3.8, "learning_rate": 4.818667416362104e-06, "loss": 0.9706, "step": 13500 }, { "epoch": 3.8, "eval_loss": 1.1695332527160645, "eval_runtime": 2.1004, "eval_samples_per_second": 72.366, "eval_steps_per_second": 7.617, "step": 13500 }, { "epoch": 3.94, "learning_rate": 4.2563958391903294e-06, "loss": 0.974, "step": 14000 }, { "epoch": 3.94, "eval_loss": 1.1403844356536865, "eval_runtime": 2.1018, "eval_samples_per_second": 72.318, "eval_steps_per_second": 7.612, "step": 14000 }, { "epoch": 4.08, "learning_rate": 3.694124262018555e-06, "loss": 0.9561, "step": 14500 }, { "epoch": 4.08, "eval_loss": 1.1319295167922974, "eval_runtime": 2.1093, "eval_samples_per_second": 72.062, "eval_steps_per_second": 7.586, "step": 14500 }, { "epoch": 4.22, "learning_rate": 3.1318526848467813e-06, "loss": 0.9598, "step": 15000 }, { "epoch": 4.22, "eval_loss": 1.1552999019622803, "eval_runtime": 2.116, "eval_samples_per_second": 71.835, "eval_steps_per_second": 7.562, "step": 15000 }, { "epoch": 4.36, "learning_rate": 2.569581107675007e-06, "loss": 0.9626, "step": 15500 }, { "epoch": 4.36, "eval_loss": 1.1351696252822876, "eval_runtime": 2.1126, "eval_samples_per_second": 71.95, "eval_steps_per_second": 7.574, "step": 15500 }, { "epoch": 4.5, "learning_rate": 2.007309530503233e-06, "loss": 0.9631, "step": 16000 }, { "epoch": 4.5, "eval_loss": 1.181087851524353, "eval_runtime": 2.1067, "eval_samples_per_second": 72.151, "eval_steps_per_second": 7.595, "step": 16000 }, { "epoch": 4.64, "learning_rate": 1.4450379533314593e-06, "loss": 0.9485, "step": 16500 }, { "epoch": 4.64, "eval_loss": 1.1709717512130737, "eval_runtime": 2.1077, "eval_samples_per_second": 72.118, "eval_steps_per_second": 7.591, "step": 16500 }, { "epoch": 4.78, "learning_rate": 8.827663761596852e-07, "loss": 0.946, "step": 17000 }, { "epoch": 4.78, "eval_loss": 1.1290708780288696, "eval_runtime": 2.1091, "eval_samples_per_second": 72.067, "eval_steps_per_second": 7.586, "step": 17000 } ], "max_steps": 17785, "num_train_epochs": 5, "total_flos": 4.47488103892009e+16, "trial_name": null, "trial_params": null }