{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.470829068577277, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 1.6971721131573792e-08, "loss": 0.3252, "step": 500 }, { "epoch": 0.51, "learning_rate": 1.6884418039333188e-08, "loss": 0.3274, "step": 1000 }, { "epoch": 0.77, "learning_rate": 1.679711494709258e-08, "loss": 0.3279, "step": 1500 }, { "epoch": 1.02, "learning_rate": 1.6709811854851976e-08, "loss": 0.3214, "step": 2000 }, { "epoch": 1.28, "learning_rate": 1.6622508762611372e-08, "loss": 0.3215, "step": 2500 }, { "epoch": 1.54, "learning_rate": 1.6535205670370764e-08, "loss": 0.3244, "step": 3000 }, { "epoch": 1.79, "learning_rate": 1.6447902578130157e-08, "loss": 0.3225, "step": 3500 }, { "epoch": 2.05, "learning_rate": 1.6360599485889553e-08, "loss": 0.3185, "step": 4000 }, { "epoch": 2.3, "learning_rate": 1.6273296393648945e-08, "loss": 0.3157, "step": 4500 }, { "epoch": 2.56, "learning_rate": 1.618599330140834e-08, "loss": 0.3096, "step": 5000 }, { "epoch": 2.81, "learning_rate": 1.6098690209167733e-08, "loss": 0.3208, "step": 5500 }, { "epoch": 3.07, "learning_rate": 1.601138711692713e-08, "loss": 0.3148, "step": 6000 }, { "epoch": 3.33, "learning_rate": 1.592408402468652e-08, "loss": 0.315, "step": 6500 }, { "epoch": 3.58, "learning_rate": 1.5836780932445917e-08, "loss": 0.3163, "step": 7000 }, { "epoch": 3.84, "learning_rate": 1.574947784020531e-08, "loss": 0.3136, "step": 7500 }, { "epoch": 4.09, "learning_rate": 1.5662174747964706e-08, "loss": 0.3173, "step": 8000 }, { "epoch": 4.35, "learning_rate": 1.5574871655724098e-08, "loss": 0.3168, "step": 8500 }, { "epoch": 4.61, "learning_rate": 1.548756856348349e-08, "loss": 0.32, "step": 9000 }, { "epoch": 4.86, "learning_rate": 1.5400265471242887e-08, "loss": 0.3233, "step": 9500 }, { "epoch": 5.12, "learning_rate": 1.5312962379002282e-08, "loss": 0.3216, "step": 10000 }, { "epoch": 5.37, "learning_rate": 1.5225659286761675e-08, "loss": 0.3202, "step": 10500 }, { "epoch": 5.63, "learning_rate": 1.513835619452107e-08, "loss": 0.309, "step": 11000 }, { "epoch": 5.89, "learning_rate": 1.5051053102280463e-08, "loss": 0.3077, "step": 11500 }, { "epoch": 6.14, "learning_rate": 1.4963750010039856e-08, "loss": 0.3099, "step": 12000 }, { "epoch": 6.4, "learning_rate": 1.4876446917799251e-08, "loss": 0.3123, "step": 12500 }, { "epoch": 6.65, "learning_rate": 1.4789143825558644e-08, "loss": 0.3145, "step": 13000 }, { "epoch": 6.91, "learning_rate": 1.4701840733318038e-08, "loss": 0.3198, "step": 13500 }, { "epoch": 7.16, "learning_rate": 1.4614537641077434e-08, "loss": 0.3143, "step": 14000 }, { "epoch": 7.42, "learning_rate": 1.4527234548836828e-08, "loss": 0.3101, "step": 14500 }, { "epoch": 7.68, "learning_rate": 1.4439931456596222e-08, "loss": 0.322, "step": 15000 }, { "epoch": 7.93, "learning_rate": 1.4352628364355615e-08, "loss": 0.3175, "step": 15500 }, { "epoch": 8.19, "learning_rate": 1.4265325272115009e-08, "loss": 0.3137, "step": 16000 }, { "epoch": 8.44, "learning_rate": 1.4178022179874403e-08, "loss": 0.315, "step": 16500 }, { "epoch": 8.7, "learning_rate": 1.4090719087633797e-08, "loss": 0.3162, "step": 17000 }, { "epoch": 8.96, "learning_rate": 1.4003415995393191e-08, "loss": 0.3107, "step": 17500 }, { "epoch": 9.21, "learning_rate": 1.3916112903152587e-08, "loss": 0.3148, "step": 18000 }, { "epoch": 9.47, "learning_rate": 1.382880981091198e-08, "loss": 0.319, "step": 18500 }, { "epoch": 9.72, "learning_rate": 1.3741506718671374e-08, "loss": 0.3016, "step": 19000 }, { "epoch": 9.98, "learning_rate": 1.3654203626430768e-08, "loss": 0.3154, "step": 19500 }, { "epoch": 10.24, "learning_rate": 1.3566900534190162e-08, "loss": 0.3138, "step": 20000 }, { "epoch": 10.49, "learning_rate": 1.3479597441949556e-08, "loss": 0.3166, "step": 20500 }, { "epoch": 10.75, "learning_rate": 1.3392294349708949e-08, "loss": 0.3055, "step": 21000 }, { "epoch": 11.0, "learning_rate": 1.3304991257468344e-08, "loss": 0.3067, "step": 21500 }, { "epoch": 11.26, "learning_rate": 1.3217688165227739e-08, "loss": 0.3149, "step": 22000 }, { "epoch": 11.51, "learning_rate": 1.3130385072987133e-08, "loss": 0.3189, "step": 22500 }, { "epoch": 11.77, "learning_rate": 1.3043081980746527e-08, "loss": 0.3149, "step": 23000 }, { "epoch": 12.03, "learning_rate": 1.2955778888505921e-08, "loss": 0.3099, "step": 23500 }, { "epoch": 12.28, "learning_rate": 1.2868475796265314e-08, "loss": 0.3162, "step": 24000 }, { "epoch": 12.54, "learning_rate": 1.2781172704024708e-08, "loss": 0.3082, "step": 24500 }, { "epoch": 12.79, "learning_rate": 1.2693869611784102e-08, "loss": 0.3031, "step": 25000 }, { "epoch": 13.05, "learning_rate": 1.2606566519543498e-08, "loss": 0.3073, "step": 25500 }, { "epoch": 13.31, "learning_rate": 1.2519263427302892e-08, "loss": 0.3074, "step": 26000 }, { "epoch": 13.56, "learning_rate": 1.2431960335062286e-08, "loss": 0.3133, "step": 26500 }, { "epoch": 13.82, "learning_rate": 1.2344657242821678e-08, "loss": 0.3053, "step": 27000 }, { "epoch": 14.07, "learning_rate": 1.2257354150581073e-08, "loss": 0.3099, "step": 27500 }, { "epoch": 14.33, "learning_rate": 1.2170051058340467e-08, "loss": 0.3006, "step": 28000 }, { "epoch": 14.59, "learning_rate": 1.2082747966099861e-08, "loss": 0.3003, "step": 28500 }, { "epoch": 14.84, "learning_rate": 1.1995444873859255e-08, "loss": 0.3029, "step": 29000 }, { "epoch": 15.1, "learning_rate": 1.1908141781618651e-08, "loss": 0.3042, "step": 29500 }, { "epoch": 15.35, "learning_rate": 1.1820838689378043e-08, "loss": 0.3057, "step": 30000 }, { "epoch": 15.61, "learning_rate": 1.1733535597137438e-08, "loss": 0.3055, "step": 30500 }, { "epoch": 15.86, "learning_rate": 1.1646232504896832e-08, "loss": 0.3041, "step": 31000 }, { "epoch": 16.12, "learning_rate": 1.1558929412656226e-08, "loss": 0.2991, "step": 31500 }, { "epoch": 16.38, "learning_rate": 1.147162632041562e-08, "loss": 0.2968, "step": 32000 }, { "epoch": 16.63, "learning_rate": 1.1384323228175012e-08, "loss": 0.2946, "step": 32500 }, { "epoch": 16.89, "learning_rate": 1.1297020135934407e-08, "loss": 0.3071, "step": 33000 }, { "epoch": 17.14, "learning_rate": 1.1209717043693802e-08, "loss": 0.2997, "step": 33500 }, { "epoch": 17.4, "learning_rate": 1.1122413951453197e-08, "loss": 0.3017, "step": 34000 }, { "epoch": 17.66, "learning_rate": 1.103511085921259e-08, "loss": 0.2969, "step": 34500 }, { "epoch": 17.91, "learning_rate": 1.0947807766971985e-08, "loss": 0.3043, "step": 35000 }, { "epoch": 18.17, "learning_rate": 1.0860504674731377e-08, "loss": 0.298, "step": 35500 }, { "epoch": 18.42, "learning_rate": 1.0773201582490772e-08, "loss": 0.2941, "step": 36000 }, { "epoch": 18.68, "learning_rate": 1.0685898490250166e-08, "loss": 0.2964, "step": 36500 }, { "epoch": 18.94, "learning_rate": 1.059859539800956e-08, "loss": 0.3014, "step": 37000 }, { "epoch": 19.19, "learning_rate": 1.0511292305768956e-08, "loss": 0.2944, "step": 37500 }, { "epoch": 19.45, "learning_rate": 1.042398921352835e-08, "loss": 0.3016, "step": 38000 }, { "epoch": 19.7, "learning_rate": 1.0336686121287742e-08, "loss": 0.2961, "step": 38500 }, { "epoch": 19.96, "learning_rate": 1.0249383029047136e-08, "loss": 0.2951, "step": 39000 }, { "epoch": 20.21, "learning_rate": 1.016207993680653e-08, "loss": 0.2965, "step": 39500 }, { "epoch": 20.47, "learning_rate": 1.0074776844565925e-08, "loss": 0.302, "step": 40000 } ], "max_steps": 97700, "num_train_epochs": 50, "total_flos": 47561366638052736, "trial_name": null, "trial_params": null }