{ "best_metric": 0.5988770127296448, "best_model_checkpoint": "/content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-ai-tools/checkpoint-100", "epoch": 2.873563218390805, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005747126436781609, "grad_norm": 1.1171875, "learning_rate": 4.9942528735632185e-05, "loss": 0.3466, "step": 1 }, { "epoch": 0.05747126436781609, "grad_norm": 0.5625, "learning_rate": 4.9425287356321845e-05, "loss": 0.4403, "step": 10 }, { "epoch": 0.11494252873563218, "grad_norm": 0.494140625, "learning_rate": 4.885057471264368e-05, "loss": 0.3798, "step": 20 }, { "epoch": 0.1724137931034483, "grad_norm": 0.5625, "learning_rate": 4.827586206896552e-05, "loss": 0.3666, "step": 30 }, { "epoch": 0.22988505747126436, "grad_norm": 0.59765625, "learning_rate": 4.770114942528736e-05, "loss": 0.3313, "step": 40 }, { "epoch": 0.28735632183908044, "grad_norm": 0.5859375, "learning_rate": 4.7126436781609195e-05, "loss": 0.3662, "step": 50 }, { "epoch": 0.28735632183908044, "eval_loss": 0.6048163175582886, "eval_runtime": 15.3039, "eval_samples_per_second": 12.48, "eval_steps_per_second": 0.784, "step": 50 }, { "epoch": 0.3448275862068966, "grad_norm": 0.5625, "learning_rate": 4.655172413793104e-05, "loss": 0.3606, "step": 60 }, { "epoch": 0.40229885057471265, "grad_norm": 0.56640625, "learning_rate": 4.597701149425287e-05, "loss": 0.3232, "step": 70 }, { "epoch": 0.45977011494252873, "grad_norm": 0.53515625, "learning_rate": 4.5402298850574716e-05, "loss": 0.2805, "step": 80 }, { "epoch": 0.5172413793103449, "grad_norm": 0.62890625, "learning_rate": 4.482758620689655e-05, "loss": 0.3106, "step": 90 }, { "epoch": 0.5747126436781609, "grad_norm": 0.5859375, "learning_rate": 4.4252873563218394e-05, "loss": 0.357, "step": 100 }, { "epoch": 0.5747126436781609, "eval_loss": 0.5988770127296448, "eval_runtime": 15.2078, "eval_samples_per_second": 12.559, "eval_steps_per_second": 0.789, "step": 100 }, { "epoch": 0.632183908045977, "grad_norm": 0.625, "learning_rate": 4.367816091954024e-05, "loss": 0.3058, "step": 110 }, { "epoch": 0.6896551724137931, "grad_norm": 0.578125, "learning_rate": 4.3103448275862066e-05, "loss": 0.2477, "step": 120 }, { "epoch": 0.7471264367816092, "grad_norm": 0.8203125, "learning_rate": 4.252873563218391e-05, "loss": 0.2842, "step": 130 }, { "epoch": 0.8045977011494253, "grad_norm": 0.60546875, "learning_rate": 4.195402298850575e-05, "loss": 0.3173, "step": 140 }, { "epoch": 0.8620689655172413, "grad_norm": 0.64453125, "learning_rate": 4.1379310344827587e-05, "loss": 0.2785, "step": 150 }, { "epoch": 0.8620689655172413, "eval_loss": 0.60736083984375, "eval_runtime": 15.21, "eval_samples_per_second": 12.558, "eval_steps_per_second": 0.789, "step": 150 }, { "epoch": 0.9195402298850575, "grad_norm": 0.68359375, "learning_rate": 4.080459770114943e-05, "loss": 0.2478, "step": 160 }, { "epoch": 0.9770114942528736, "grad_norm": 0.66015625, "learning_rate": 4.0229885057471265e-05, "loss": 0.2232, "step": 170 }, { "epoch": 1.0344827586206897, "grad_norm": 0.8671875, "learning_rate": 3.965517241379311e-05, "loss": 0.2507, "step": 180 }, { "epoch": 1.0919540229885056, "grad_norm": 0.65625, "learning_rate": 3.908045977011495e-05, "loss": 0.2459, "step": 190 }, { "epoch": 1.1494252873563218, "grad_norm": 0.66796875, "learning_rate": 3.850574712643678e-05, "loss": 0.2052, "step": 200 }, { "epoch": 1.1494252873563218, "eval_loss": 0.630184531211853, "eval_runtime": 14.8723, "eval_samples_per_second": 12.843, "eval_steps_per_second": 0.807, "step": 200 }, { "epoch": 1.206896551724138, "grad_norm": 0.65234375, "learning_rate": 3.793103448275862e-05, "loss": 0.1732, "step": 210 }, { "epoch": 1.264367816091954, "grad_norm": 0.80078125, "learning_rate": 3.735632183908046e-05, "loss": 0.1942, "step": 220 }, { "epoch": 1.3218390804597702, "grad_norm": 0.69140625, "learning_rate": 3.67816091954023e-05, "loss": 0.2293, "step": 230 }, { "epoch": 1.3793103448275863, "grad_norm": 0.6640625, "learning_rate": 3.620689655172414e-05, "loss": 0.2122, "step": 240 }, { "epoch": 1.4367816091954024, "grad_norm": 0.671875, "learning_rate": 3.563218390804598e-05, "loss": 0.1795, "step": 250 }, { "epoch": 1.4367816091954024, "eval_loss": 0.6453364491462708, "eval_runtime": 15.2489, "eval_samples_per_second": 12.525, "eval_steps_per_second": 0.787, "step": 250 }, { "epoch": 1.4942528735632183, "grad_norm": 0.8359375, "learning_rate": 3.505747126436782e-05, "loss": 0.166, "step": 260 }, { "epoch": 1.5517241379310345, "grad_norm": 0.6328125, "learning_rate": 3.4482758620689657e-05, "loss": 0.227, "step": 270 }, { "epoch": 1.6091954022988506, "grad_norm": 0.75, "learning_rate": 3.390804597701149e-05, "loss": 0.2018, "step": 280 }, { "epoch": 1.6666666666666665, "grad_norm": 0.58984375, "learning_rate": 3.3333333333333335e-05, "loss": 0.1623, "step": 290 }, { "epoch": 1.7241379310344827, "grad_norm": 0.58984375, "learning_rate": 3.275862068965517e-05, "loss": 0.1336, "step": 300 }, { "epoch": 1.7241379310344827, "eval_loss": 0.6576523780822754, "eval_runtime": 15.2413, "eval_samples_per_second": 12.532, "eval_steps_per_second": 0.787, "step": 300 }, { "epoch": 1.7816091954022988, "grad_norm": 0.80078125, "learning_rate": 3.218390804597701e-05, "loss": 0.2007, "step": 310 }, { "epoch": 1.839080459770115, "grad_norm": 0.61328125, "learning_rate": 3.160919540229885e-05, "loss": 0.1948, "step": 320 }, { "epoch": 1.896551724137931, "grad_norm": 0.72265625, "learning_rate": 3.103448275862069e-05, "loss": 0.1634, "step": 330 }, { "epoch": 1.9540229885057472, "grad_norm": 0.578125, "learning_rate": 3.045977011494253e-05, "loss": 0.1416, "step": 340 }, { "epoch": 2.0114942528735633, "grad_norm": 0.63671875, "learning_rate": 2.988505747126437e-05, "loss": 0.151, "step": 350 }, { "epoch": 2.0114942528735633, "eval_loss": 0.6700878739356995, "eval_runtime": 14.8872, "eval_samples_per_second": 12.83, "eval_steps_per_second": 0.806, "step": 350 }, { "epoch": 2.0689655172413794, "grad_norm": 0.66015625, "learning_rate": 2.9310344827586206e-05, "loss": 0.1591, "step": 360 }, { "epoch": 2.1264367816091956, "grad_norm": 0.53125, "learning_rate": 2.8735632183908045e-05, "loss": 0.1404, "step": 370 }, { "epoch": 2.1839080459770113, "grad_norm": 0.58203125, "learning_rate": 2.8160919540229884e-05, "loss": 0.1173, "step": 380 }, { "epoch": 2.2413793103448274, "grad_norm": 0.6171875, "learning_rate": 2.7586206896551727e-05, "loss": 0.1067, "step": 390 }, { "epoch": 2.2988505747126435, "grad_norm": 0.703125, "learning_rate": 2.7011494252873566e-05, "loss": 0.1504, "step": 400 }, { "epoch": 2.2988505747126435, "eval_loss": 0.7032656073570251, "eval_runtime": 15.2246, "eval_samples_per_second": 12.545, "eval_steps_per_second": 0.788, "step": 400 }, { "epoch": 2.3563218390804597, "grad_norm": 0.6015625, "learning_rate": 2.6436781609195405e-05, "loss": 0.1334, "step": 410 }, { "epoch": 2.413793103448276, "grad_norm": 0.66796875, "learning_rate": 2.5862068965517244e-05, "loss": 0.1166, "step": 420 }, { "epoch": 2.471264367816092, "grad_norm": 0.63671875, "learning_rate": 2.5287356321839083e-05, "loss": 0.0998, "step": 430 }, { "epoch": 2.528735632183908, "grad_norm": 0.734375, "learning_rate": 2.4712643678160922e-05, "loss": 0.1299, "step": 440 }, { "epoch": 2.586206896551724, "grad_norm": 0.6328125, "learning_rate": 2.413793103448276e-05, "loss": 0.1443, "step": 450 }, { "epoch": 2.586206896551724, "eval_loss": 0.7000734210014343, "eval_runtime": 15.3232, "eval_samples_per_second": 12.465, "eval_steps_per_second": 0.783, "step": 450 }, { "epoch": 2.6436781609195403, "grad_norm": 0.6484375, "learning_rate": 2.3563218390804597e-05, "loss": 0.1129, "step": 460 }, { "epoch": 2.7011494252873565, "grad_norm": 0.6015625, "learning_rate": 2.2988505747126437e-05, "loss": 0.0966, "step": 470 }, { "epoch": 2.7586206896551726, "grad_norm": 0.69921875, "learning_rate": 2.2413793103448276e-05, "loss": 0.1044, "step": 480 }, { "epoch": 2.8160919540229887, "grad_norm": 0.6953125, "learning_rate": 2.183908045977012e-05, "loss": 0.1447, "step": 490 }, { "epoch": 2.873563218390805, "grad_norm": 0.64453125, "learning_rate": 2.1264367816091954e-05, "loss": 0.1156, "step": 500 }, { "epoch": 2.873563218390805, "eval_loss": 0.7145028710365295, "eval_runtime": 15.3052, "eval_samples_per_second": 12.479, "eval_steps_per_second": 0.784, "step": 500 } ], "logging_steps": 10, "max_steps": 870, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5051522405612032e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }