|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.470829068577277, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.6971721131573792e-08, |
|
"loss": 0.3252, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6884418039333188e-08, |
|
"loss": 0.3274, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.679711494709258e-08, |
|
"loss": 0.3279, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.6709811854851976e-08, |
|
"loss": 0.3214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.6622508762611372e-08, |
|
"loss": 0.3215, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.6535205670370764e-08, |
|
"loss": 0.3244, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.6447902578130157e-08, |
|
"loss": 0.3225, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.6360599485889553e-08, |
|
"loss": 0.3185, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.6273296393648945e-08, |
|
"loss": 0.3157, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.618599330140834e-08, |
|
"loss": 0.3096, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.6098690209167733e-08, |
|
"loss": 0.3208, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.601138711692713e-08, |
|
"loss": 0.3148, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.592408402468652e-08, |
|
"loss": 0.315, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.5836780932445917e-08, |
|
"loss": 0.3163, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.574947784020531e-08, |
|
"loss": 0.3136, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.5662174747964706e-08, |
|
"loss": 0.3173, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.5574871655724098e-08, |
|
"loss": 0.3168, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.548756856348349e-08, |
|
"loss": 0.32, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.5400265471242887e-08, |
|
"loss": 0.3233, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.5312962379002282e-08, |
|
"loss": 0.3216, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.5225659286761675e-08, |
|
"loss": 0.3202, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.513835619452107e-08, |
|
"loss": 0.309, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.5051053102280463e-08, |
|
"loss": 0.3077, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.4963750010039856e-08, |
|
"loss": 0.3099, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.4876446917799251e-08, |
|
"loss": 0.3123, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.4789143825558644e-08, |
|
"loss": 0.3145, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.4701840733318038e-08, |
|
"loss": 0.3198, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 1.4614537641077434e-08, |
|
"loss": 0.3143, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.4527234548836828e-08, |
|
"loss": 0.3101, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.4439931456596222e-08, |
|
"loss": 0.322, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.4352628364355615e-08, |
|
"loss": 0.3175, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 1.4265325272115009e-08, |
|
"loss": 0.3137, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 1.4178022179874403e-08, |
|
"loss": 0.315, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 1.4090719087633797e-08, |
|
"loss": 0.3162, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.4003415995393191e-08, |
|
"loss": 0.3107, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 1.3916112903152587e-08, |
|
"loss": 0.3148, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 1.382880981091198e-08, |
|
"loss": 0.319, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1.3741506718671374e-08, |
|
"loss": 0.3016, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 1.3654203626430768e-08, |
|
"loss": 0.3154, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 1.3566900534190162e-08, |
|
"loss": 0.3138, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 1.3479597441949556e-08, |
|
"loss": 0.3166, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 1.3392294349708949e-08, |
|
"loss": 0.3055, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.3304991257468344e-08, |
|
"loss": 0.3067, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 1.3217688165227739e-08, |
|
"loss": 0.3149, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 1.3130385072987133e-08, |
|
"loss": 0.3189, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 1.3043081980746527e-08, |
|
"loss": 0.3149, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 1.2955778888505921e-08, |
|
"loss": 0.3099, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 1.2868475796265314e-08, |
|
"loss": 0.3162, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 1.2781172704024708e-08, |
|
"loss": 0.3082, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 1.2693869611784102e-08, |
|
"loss": 0.3031, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 1.2606566519543498e-08, |
|
"loss": 0.3073, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.2519263427302892e-08, |
|
"loss": 0.3074, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 1.2431960335062286e-08, |
|
"loss": 0.3133, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.2344657242821678e-08, |
|
"loss": 0.3053, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 1.2257354150581073e-08, |
|
"loss": 0.3099, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 1.2170051058340467e-08, |
|
"loss": 0.3006, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 1.2082747966099861e-08, |
|
"loss": 0.3003, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 1.1995444873859255e-08, |
|
"loss": 0.3029, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 1.1908141781618651e-08, |
|
"loss": 0.3042, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"learning_rate": 1.1820838689378043e-08, |
|
"loss": 0.3057, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 1.1733535597137438e-08, |
|
"loss": 0.3055, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 1.1646232504896832e-08, |
|
"loss": 0.3041, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 1.1558929412656226e-08, |
|
"loss": 0.2991, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 1.147162632041562e-08, |
|
"loss": 0.2968, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 1.1384323228175012e-08, |
|
"loss": 0.2946, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 1.1297020135934407e-08, |
|
"loss": 0.3071, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.1209717043693802e-08, |
|
"loss": 0.2997, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 1.1122413951453197e-08, |
|
"loss": 0.3017, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 1.103511085921259e-08, |
|
"loss": 0.2969, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 1.0947807766971985e-08, |
|
"loss": 0.3043, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 1.0860504674731377e-08, |
|
"loss": 0.298, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 1.0773201582490772e-08, |
|
"loss": 0.2941, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 1.0685898490250166e-08, |
|
"loss": 0.2964, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 1.059859539800956e-08, |
|
"loss": 0.3014, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 19.19, |
|
"learning_rate": 1.0511292305768956e-08, |
|
"loss": 0.2944, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 19.45, |
|
"learning_rate": 1.042398921352835e-08, |
|
"loss": 0.3016, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 1.0336686121287742e-08, |
|
"loss": 0.2961, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 1.0249383029047136e-08, |
|
"loss": 0.2951, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 1.016207993680653e-08, |
|
"loss": 0.2965, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 1.0074776844565925e-08, |
|
"loss": 0.302, |
|
"step": 40000 |
|
} |
|
], |
|
"max_steps": 97700, |
|
"num_train_epochs": 50, |
|
"total_flos": 47561366638052736, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|