|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991488731821041, |
|
"global_step": 27000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0019629944861784404, |
|
"loss": 0.0147, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0019259889723568812, |
|
"loss": 0.0187, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0018889834585353218, |
|
"loss": 0.0201, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0018519779447137624, |
|
"loss": 0.0176, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.001814972430892203, |
|
"loss": 0.0177, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0017779669170706436, |
|
"loss": 0.0175, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0017409614032490842, |
|
"loss": 0.0177, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0017039558894275247, |
|
"loss": 0.0185, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0016669503756059653, |
|
"loss": 0.0179, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.001629944861784406, |
|
"loss": 0.0188, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0015929393479628465, |
|
"loss": 0.0181, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0015559338341412869, |
|
"loss": 0.0192, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0015189283203197277, |
|
"loss": 0.0172, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0014819228064981683, |
|
"loss": 0.0153, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014449172926766088, |
|
"loss": 0.0154, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0014079117788550494, |
|
"loss": 0.0163, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00137090626503349, |
|
"loss": 0.0161, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0013339007512119306, |
|
"loss": 0.0149, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0012968952373903712, |
|
"loss": 0.016, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0012598897235688118, |
|
"loss": 0.015, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0012228842097472524, |
|
"loss": 0.0145, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.001185878695925693, |
|
"loss": 0.0145, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0011488731821041335, |
|
"loss": 0.0143, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0011118676682825741, |
|
"loss": 0.0136, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0010748621544610147, |
|
"loss": 0.0148, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0010378566406394553, |
|
"loss": 0.0141, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.001000851126817896, |
|
"loss": 0.0132, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0009638456129963365, |
|
"loss": 0.0128, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0009268400991747771, |
|
"loss": 0.0128, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008898345853532175, |
|
"loss": 0.0128, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0008528290715316582, |
|
"loss": 0.0123, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0008158235577100988, |
|
"loss": 0.0119, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007788180438885394, |
|
"loss": 0.0124, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00074181253006698, |
|
"loss": 0.0116, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007048070162454206, |
|
"loss": 0.0118, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0006678015024238612, |
|
"loss": 0.0114, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0006307959886023018, |
|
"loss": 0.0114, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0005937904747807424, |
|
"loss": 0.0111, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0005567849609591829, |
|
"loss": 0.0105, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0005197794471376235, |
|
"loss": 0.0107, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004827739333160641, |
|
"loss": 0.0102, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004457684194945047, |
|
"loss": 0.0102, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0004087629056729453, |
|
"loss": 0.0099, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0003717573918513859, |
|
"loss": 0.0101, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0003347518780298264, |
|
"loss": 0.0095, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00029774636420826705, |
|
"loss": 0.0097, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00026074085038670764, |
|
"loss": 0.0094, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00022373533656514823, |
|
"loss": 0.0094, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001867298227435888, |
|
"loss": 0.0095, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00014972430892202937, |
|
"loss": 0.0089, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00011271879510046997, |
|
"loss": 0.0087, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.571328127891056e-05, |
|
"loss": 0.009, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.870776745735115e-05, |
|
"loss": 0.0088, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.702253635791733e-06, |
|
"loss": 0.0088, |
|
"step": 27000 |
|
} |
|
], |
|
"max_steps": 27023, |
|
"num_train_epochs": 1, |
|
"total_flos": 8.7701487353856e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|