|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.769062385990514, |
|
"global_step": 35000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.9428571428571435e-05, |
|
"loss": 0.7626, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.885714285714286e-05, |
|
"loss": 0.3395, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.828571428571429e-05, |
|
"loss": 0.1906, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.771428571428572e-05, |
|
"loss": 0.1287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.714285714285715e-05, |
|
"loss": 0.1061, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.6571428571428576e-05, |
|
"loss": 0.0865, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0646, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.542857142857143e-05, |
|
"loss": 0.0562, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.485714285714286e-05, |
|
"loss": 0.0685, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.4285714285714284e-05, |
|
"loss": 0.0481, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3714285714285716e-05, |
|
"loss": 0.0445, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.314285714285715e-05, |
|
"loss": 0.0345, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.257142857142857e-05, |
|
"loss": 0.0353, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0436, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.142857142857143e-05, |
|
"loss": 0.0337, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.085714285714286e-05, |
|
"loss": 0.0325, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.0285714285714288e-05, |
|
"loss": 0.0325, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.9714285714285717e-05, |
|
"loss": 0.0303, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.9142857142857146e-05, |
|
"loss": 0.0243, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.8571428571428574e-05, |
|
"loss": 0.0269, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.0252, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.742857142857143e-05, |
|
"loss": 0.0285, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 2.6857142857142857e-05, |
|
"loss": 0.0265, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.628571428571429e-05, |
|
"loss": 0.0197, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.5714285714285718e-05, |
|
"loss": 0.0253, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.5142857142857143e-05, |
|
"loss": 0.025, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.4571428571428575e-05, |
|
"loss": 0.0254, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0235, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.3428571428571433e-05, |
|
"loss": 0.025, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 0.0191, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 2.2285714285714287e-05, |
|
"loss": 0.0225, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 2.1714285714285715e-05, |
|
"loss": 0.0195, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.1142857142857144e-05, |
|
"loss": 0.017, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 2.057142857142857e-05, |
|
"loss": 0.0196, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0174, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.942857142857143e-05, |
|
"loss": 0.0205, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.885714285714286e-05, |
|
"loss": 0.0223, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.8285714285714288e-05, |
|
"loss": 0.0181, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.7714285714285717e-05, |
|
"loss": 0.026, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.0191, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.6571428571428574e-05, |
|
"loss": 0.0194, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.021, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 1.542857142857143e-05, |
|
"loss": 0.0166, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 1.4857142857142858e-05, |
|
"loss": 0.017, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.0185, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 1.3714285714285716e-05, |
|
"loss": 0.0179, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.3142857142857145e-05, |
|
"loss": 0.0179, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 1.2571428571428572e-05, |
|
"loss": 0.0175, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0146, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.0223, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 1.0857142857142858e-05, |
|
"loss": 0.0173, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.0285714285714285e-05, |
|
"loss": 0.0214, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.0185, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.021, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.0191, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0145, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 7.428571428571429e-06, |
|
"loss": 0.0187, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 6.857142857142858e-06, |
|
"loss": 0.0184, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.0171, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.0167, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 5.142857142857142e-06, |
|
"loss": 0.0193, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.571428571428572e-06, |
|
"loss": 0.0151, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0148, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.0202, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.0155, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 2.285714285714286e-06, |
|
"loss": 0.017, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 1.7142857142857145e-06, |
|
"loss": 0.022, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 1.142857142857143e-06, |
|
"loss": 0.0176, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.0158, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.0, |
|
"loss": 0.0116, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"step": 35000, |
|
"train_runtime": 11218.2271, |
|
"train_samples_per_second": 3.12 |
|
} |
|
], |
|
"max_steps": 35000, |
|
"num_train_epochs": 13, |
|
"total_flos": 72522034090868736, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|