|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 680, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9705882352941178e-05, |
|
"loss": 1.1551, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9411764705882355e-05, |
|
"loss": 1.1371, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.911764705882353e-05, |
|
"loss": 1.0677, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8823529411764708e-05, |
|
"loss": 1.0166, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8529411764705884e-05, |
|
"loss": 0.9691, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.823529411764706e-05, |
|
"loss": 0.8892, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7941176470588237e-05, |
|
"loss": 0.8899, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.7734, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.735294117647059e-05, |
|
"loss": 0.6878, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7058823529411767e-05, |
|
"loss": 0.6863, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.6764705882352943e-05, |
|
"loss": 0.5821, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.647058823529412e-05, |
|
"loss": 0.5557, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 0.6057, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5882352941176473e-05, |
|
"loss": 0.5279, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.558823529411765e-05, |
|
"loss": 0.5189, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.5294117647058822e-05, |
|
"loss": 0.5309, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.5113, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.4705882352941179e-05, |
|
"loss": 0.4926, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.4411764705882353e-05, |
|
"loss": 0.4805, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4117647058823532e-05, |
|
"loss": 0.4077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.3823529411764706e-05, |
|
"loss": 0.4727, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3529411764705885e-05, |
|
"loss": 0.5212, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.3975, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2941176470588238e-05, |
|
"loss": 0.4245, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.2647058823529412e-05, |
|
"loss": 0.4479, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.235294117647059e-05, |
|
"loss": 0.4941, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2058823529411765e-05, |
|
"loss": 0.4887, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.4574, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1470588235294118e-05, |
|
"loss": 0.4053, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.1176470588235295e-05, |
|
"loss": 0.3969, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.0882352941176471e-05, |
|
"loss": 0.3647, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0588235294117648e-05, |
|
"loss": 0.4327, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0294117647058823e-05, |
|
"loss": 0.3734, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4301, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 9.705882352941177e-06, |
|
"loss": 0.3485, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 0.3999, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.11764705882353e-06, |
|
"loss": 0.2864, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.3797, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.529411764705883e-06, |
|
"loss": 0.3573, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 0.3832, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.941176470588236e-06, |
|
"loss": 0.3876, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.647058823529411e-06, |
|
"loss": 0.3669, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.352941176470589e-06, |
|
"loss": 0.3032, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 0.3374, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.764705882352942e-06, |
|
"loss": 0.3674, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 0.4074, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.176470588235295e-06, |
|
"loss": 0.3051, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.3102, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.588235294117647e-06, |
|
"loss": 0.2872, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 0.3004, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3081, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 0.331, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.2992, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 0.3127, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.8235294117647055e-06, |
|
"loss": 0.267, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 0.3539, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.2352941176470594e-06, |
|
"loss": 0.2743, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.3714, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.647058823529412e-06, |
|
"loss": 0.2308, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 0.2479, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.058823529411765e-06, |
|
"loss": 0.3552, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"loss": 0.2959, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 0.2703, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 0.3112, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 8.823529411764707e-07, |
|
"loss": 0.3164, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.3118, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"loss": 0.3163, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.2601, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 680, |
|
"total_flos": 1424430265224960.0, |
|
"train_loss": 0.4640252302674686, |
|
"train_runtime": 266.3348, |
|
"train_samples_per_second": 81.307, |
|
"train_steps_per_second": 2.553 |
|
} |
|
], |
|
"max_steps": 680, |
|
"num_train_epochs": 5, |
|
"total_flos": 1424430265224960.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|