|
{ |
|
"best_metric": 0.5988770127296448, |
|
"best_model_checkpoint": "/content/drive/MyDrive/Projects/nitic-nlp-team/webnavix/checkpoints/webnavix/nitic-nlp-team/webnavix-llama-ai-tools/checkpoint-100", |
|
"epoch": 2.873563218390805, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005747126436781609, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 4.9942528735632185e-05, |
|
"loss": 0.3466, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05747126436781609, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 4.9425287356321845e-05, |
|
"loss": 0.4403, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11494252873563218, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 4.885057471264368e-05, |
|
"loss": 0.3798, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 4.827586206896552e-05, |
|
"loss": 0.3666, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22988505747126436, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 4.770114942528736e-05, |
|
"loss": 0.3313, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28735632183908044, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 4.7126436781609195e-05, |
|
"loss": 0.3662, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28735632183908044, |
|
"eval_loss": 0.6048163175582886, |
|
"eval_runtime": 15.3039, |
|
"eval_samples_per_second": 12.48, |
|
"eval_steps_per_second": 0.784, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 4.655172413793104e-05, |
|
"loss": 0.3606, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.40229885057471265, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.597701149425287e-05, |
|
"loss": 0.3232, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.45977011494252873, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 4.5402298850574716e-05, |
|
"loss": 0.2805, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 4.482758620689655e-05, |
|
"loss": 0.3106, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 4.4252873563218394e-05, |
|
"loss": 0.357, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"eval_loss": 0.5988770127296448, |
|
"eval_runtime": 15.2078, |
|
"eval_samples_per_second": 12.559, |
|
"eval_steps_per_second": 0.789, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.632183908045977, |
|
"grad_norm": 0.625, |
|
"learning_rate": 4.367816091954024e-05, |
|
"loss": 0.3058, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 4.3103448275862066e-05, |
|
"loss": 0.2477, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7471264367816092, |
|
"grad_norm": 0.8203125, |
|
"learning_rate": 4.252873563218391e-05, |
|
"loss": 0.2842, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8045977011494253, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 4.195402298850575e-05, |
|
"loss": 0.3173, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 0.2785, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"eval_loss": 0.60736083984375, |
|
"eval_runtime": 15.21, |
|
"eval_samples_per_second": 12.558, |
|
"eval_steps_per_second": 0.789, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 4.080459770114943e-05, |
|
"loss": 0.2478, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9770114942528736, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 4.0229885057471265e-05, |
|
"loss": 0.2232, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 3.965517241379311e-05, |
|
"loss": 0.2507, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0919540229885056, |
|
"grad_norm": 0.65625, |
|
"learning_rate": 3.908045977011495e-05, |
|
"loss": 0.2459, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1494252873563218, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 3.850574712643678e-05, |
|
"loss": 0.2052, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1494252873563218, |
|
"eval_loss": 0.630184531211853, |
|
"eval_runtime": 14.8723, |
|
"eval_samples_per_second": 12.843, |
|
"eval_steps_per_second": 0.807, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.206896551724138, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 3.793103448275862e-05, |
|
"loss": 0.1732, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.264367816091954, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 3.735632183908046e-05, |
|
"loss": 0.1942, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3218390804597702, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 3.67816091954023e-05, |
|
"loss": 0.2293, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 3.620689655172414e-05, |
|
"loss": 0.2122, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4367816091954024, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 3.563218390804598e-05, |
|
"loss": 0.1795, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4367816091954024, |
|
"eval_loss": 0.6453364491462708, |
|
"eval_runtime": 15.2489, |
|
"eval_samples_per_second": 12.525, |
|
"eval_steps_per_second": 0.787, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4942528735632183, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 3.505747126436782e-05, |
|
"loss": 0.166, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 0.227, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6091954022988506, |
|
"grad_norm": 0.75, |
|
"learning_rate": 3.390804597701149e-05, |
|
"loss": 0.2018, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1623, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 3.275862068965517e-05, |
|
"loss": 0.1336, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"eval_loss": 0.6576523780822754, |
|
"eval_runtime": 15.2413, |
|
"eval_samples_per_second": 12.532, |
|
"eval_steps_per_second": 0.787, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7816091954022988, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 3.218390804597701e-05, |
|
"loss": 0.2007, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.839080459770115, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 3.160919540229885e-05, |
|
"loss": 0.1948, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.896551724137931, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 3.103448275862069e-05, |
|
"loss": 0.1634, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9540229885057472, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 3.045977011494253e-05, |
|
"loss": 0.1416, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0114942528735633, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 2.988505747126437e-05, |
|
"loss": 0.151, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0114942528735633, |
|
"eval_loss": 0.6700878739356995, |
|
"eval_runtime": 14.8872, |
|
"eval_samples_per_second": 12.83, |
|
"eval_steps_per_second": 0.806, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 2.9310344827586206e-05, |
|
"loss": 0.1591, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.1264367816091956, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 2.8735632183908045e-05, |
|
"loss": 0.1404, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.1839080459770113, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 2.8160919540229884e-05, |
|
"loss": 0.1173, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 2.7586206896551727e-05, |
|
"loss": 0.1067, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.2988505747126435, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 2.7011494252873566e-05, |
|
"loss": 0.1504, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2988505747126435, |
|
"eval_loss": 0.7032656073570251, |
|
"eval_runtime": 15.2246, |
|
"eval_samples_per_second": 12.545, |
|
"eval_steps_per_second": 0.788, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.3563218390804597, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 2.6436781609195405e-05, |
|
"loss": 0.1334, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 2.5862068965517244e-05, |
|
"loss": 0.1166, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.471264367816092, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 2.5287356321839083e-05, |
|
"loss": 0.0998, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.528735632183908, |
|
"grad_norm": 0.734375, |
|
"learning_rate": 2.4712643678160922e-05, |
|
"loss": 0.1299, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.413793103448276e-05, |
|
"loss": 0.1443, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"eval_loss": 0.7000734210014343, |
|
"eval_runtime": 15.3232, |
|
"eval_samples_per_second": 12.465, |
|
"eval_steps_per_second": 0.783, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.6436781609195403, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 2.3563218390804597e-05, |
|
"loss": 0.1129, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.7011494252873565, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 2.2988505747126437e-05, |
|
"loss": 0.0966, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 2.2413793103448276e-05, |
|
"loss": 0.1044, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.8160919540229887, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 2.183908045977012e-05, |
|
"loss": 0.1447, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.873563218390805, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 2.1264367816091954e-05, |
|
"loss": 0.1156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.873563218390805, |
|
"eval_loss": 0.7145028710365295, |
|
"eval_runtime": 15.3052, |
|
"eval_samples_per_second": 12.479, |
|
"eval_steps_per_second": 0.784, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 870, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5051522405612032e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|