{ "best_metric": 6729.03466796875, "best_model_checkpoint": "./coco_outputs/checkpoint-640", "epoch": 5.0, "global_step": 640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 1.96875e-07, "loss": 6346.4363, "step": 10 }, { "epoch": 0.16, "learning_rate": 1.9375e-07, "loss": 6401.3863, "step": 20 }, { "epoch": 0.23, "learning_rate": 1.90625e-07, "loss": 6297.2812, "step": 30 }, { "epoch": 0.31, "learning_rate": 1.875e-07, "loss": 6283.0867, "step": 40 }, { "epoch": 0.39, "learning_rate": 1.8437499999999998e-07, "loss": 6141.998, "step": 50 }, { "epoch": 0.47, "learning_rate": 1.8124999999999999e-07, "loss": 6092.3895, "step": 60 }, { "epoch": 0.55, "learning_rate": 1.78125e-07, "loss": 6228.5742, "step": 70 }, { "epoch": 0.62, "learning_rate": 1.75e-07, "loss": 6152.1148, "step": 80 }, { "epoch": 0.7, "learning_rate": 1.71875e-07, "loss": 6431.5375, "step": 90 }, { "epoch": 0.78, "learning_rate": 1.6875e-07, "loss": 6201.1875, "step": 100 }, { "epoch": 0.86, "learning_rate": 1.65625e-07, "loss": 6414.7582, "step": 110 }, { "epoch": 0.94, "learning_rate": 1.6249999999999998e-07, "loss": 6463.6664, "step": 120 }, { "epoch": 1.0, "eval_loss": 6729.2021484375, "eval_runtime": 17.4768, "eval_samples_per_second": 14.648, "eval_steps_per_second": 1.831, "step": 128 }, { "epoch": 1.02, "learning_rate": 1.5937499999999998e-07, "loss": 6203.2496, "step": 130 }, { "epoch": 1.09, "learning_rate": 1.5624999999999999e-07, "loss": 6195.1863, "step": 140 }, { "epoch": 1.17, "learning_rate": 1.53125e-07, "loss": 6365.1023, "step": 150 }, { "epoch": 1.25, "learning_rate": 1.5e-07, "loss": 6115.4832, "step": 160 }, { "epoch": 1.33, "learning_rate": 1.46875e-07, "loss": 6507.116, "step": 170 }, { "epoch": 1.41, "learning_rate": 1.4375e-07, "loss": 6235.0008, "step": 180 }, { "epoch": 1.48, "learning_rate": 1.40625e-07, "loss": 6424.7523, "step": 190 }, { "epoch": 1.56, "learning_rate": 1.375e-07, "loss": 6032.757, "step": 200 }, { "epoch": 1.64, "learning_rate": 1.3437499999999998e-07, "loss": 5972.9164, "step": 210 }, { "epoch": 1.72, "learning_rate": 1.3125e-07, "loss": 5884.6973, "step": 220 }, { "epoch": 1.8, "learning_rate": 1.28125e-07, "loss": 6157.3324, "step": 230 }, { "epoch": 1.88, "learning_rate": 1.25e-07, "loss": 6338.2691, "step": 240 }, { "epoch": 1.95, "learning_rate": 1.21875e-07, "loss": 6242.4801, "step": 250 }, { "epoch": 2.0, "eval_loss": 6729.1201171875, "eval_runtime": 17.3704, "eval_samples_per_second": 14.738, "eval_steps_per_second": 1.842, "step": 256 }, { "epoch": 2.03, "learning_rate": 1.1875e-07, "loss": 6287.9883, "step": 260 }, { "epoch": 2.11, "learning_rate": 1.1562499999999999e-07, "loss": 6261.4707, "step": 270 }, { "epoch": 2.19, "learning_rate": 1.125e-07, "loss": 6337.7199, "step": 280 }, { "epoch": 2.27, "learning_rate": 1.09375e-07, "loss": 6193.377, "step": 290 }, { "epoch": 2.34, "learning_rate": 1.0624999999999999e-07, "loss": 5926.5789, "step": 300 }, { "epoch": 2.42, "learning_rate": 1.0312499999999999e-07, "loss": 6122.5051, "step": 310 }, { "epoch": 2.5, "learning_rate": 1e-07, "loss": 6157.1656, "step": 320 }, { "epoch": 2.58, "learning_rate": 9.6875e-08, "loss": 6162.8938, "step": 330 }, { "epoch": 2.66, "learning_rate": 9.375e-08, "loss": 6154.5023, "step": 340 }, { "epoch": 2.73, "learning_rate": 9.062499999999999e-08, "loss": 6107.1387, "step": 350 }, { "epoch": 2.81, "learning_rate": 8.75e-08, "loss": 6092.2199, "step": 360 }, { "epoch": 2.89, "learning_rate": 8.4375e-08, "loss": 6291.2375, "step": 370 }, { "epoch": 2.97, "learning_rate": 8.124999999999999e-08, "loss": 6344.1949, "step": 380 }, { "epoch": 3.0, "eval_loss": 6729.05859375, "eval_runtime": 17.488, "eval_samples_per_second": 14.639, "eval_steps_per_second": 1.83, "step": 384 }, { "epoch": 3.05, "learning_rate": 7.812499999999999e-08, "loss": 6475.0023, "step": 390 }, { "epoch": 3.12, "learning_rate": 7.5e-08, "loss": 6335.7672, "step": 400 }, { "epoch": 3.2, "learning_rate": 7.1875e-08, "loss": 6222.3957, "step": 410 }, { "epoch": 3.28, "learning_rate": 6.875e-08, "loss": 6346.1539, "step": 420 }, { "epoch": 3.36, "learning_rate": 6.5625e-08, "loss": 6196.7348, "step": 430 }, { "epoch": 3.44, "learning_rate": 6.25e-08, "loss": 6440.0867, "step": 440 }, { "epoch": 3.52, "learning_rate": 5.9375e-08, "loss": 6186.3539, "step": 450 }, { "epoch": 3.59, "learning_rate": 5.625e-08, "loss": 6364.8961, "step": 460 }, { "epoch": 3.67, "learning_rate": 5.3124999999999994e-08, "loss": 6580.9484, "step": 470 }, { "epoch": 3.75, "learning_rate": 5e-08, "loss": 6384.6832, "step": 480 }, { "epoch": 3.83, "learning_rate": 4.6875e-08, "loss": 6195.5973, "step": 490 }, { "epoch": 3.91, "learning_rate": 4.375e-08, "loss": 6409.9172, "step": 500 }, { "epoch": 3.98, "learning_rate": 4.0624999999999995e-08, "loss": 6528.6664, "step": 510 }, { "epoch": 4.0, "eval_loss": 6729.0419921875, "eval_runtime": 17.1328, "eval_samples_per_second": 14.942, "eval_steps_per_second": 1.868, "step": 512 }, { "epoch": 4.06, "learning_rate": 3.75e-08, "loss": 6024.7227, "step": 520 }, { "epoch": 4.14, "learning_rate": 3.4375e-08, "loss": 5942.5199, "step": 530 }, { "epoch": 4.22, "learning_rate": 3.125e-08, "loss": 6240.5281, "step": 540 }, { "epoch": 4.3, "learning_rate": 2.8125e-08, "loss": 6593.8445, "step": 550 }, { "epoch": 4.38, "learning_rate": 2.5e-08, "loss": 6542.1891, "step": 560 }, { "epoch": 4.45, "learning_rate": 2.1875e-08, "loss": 6388.3813, "step": 570 }, { "epoch": 4.53, "learning_rate": 1.875e-08, "loss": 6358.7457, "step": 580 }, { "epoch": 4.61, "learning_rate": 1.5625e-08, "loss": 5996.3961, "step": 590 }, { "epoch": 4.69, "learning_rate": 1.25e-08, "loss": 6177.6547, "step": 600 }, { "epoch": 4.77, "learning_rate": 9.375e-09, "loss": 6032.8859, "step": 610 }, { "epoch": 4.84, "learning_rate": 6.25e-09, "loss": 6523.9477, "step": 620 }, { "epoch": 4.92, "learning_rate": 3.125e-09, "loss": 6293.6355, "step": 630 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 5746.1234, "step": 640 }, { "epoch": 5.0, "eval_loss": 6729.03466796875, "eval_runtime": 17.3893, "eval_samples_per_second": 14.722, "eval_steps_per_second": 1.84, "step": 640 }, { "epoch": 5.0, "step": 640, "total_flos": 2.4480067682304e+18, "train_loss": 6251.508282470703, "train_runtime": 838.0786, "train_samples_per_second": 6.109, "train_steps_per_second": 0.764 } ], "max_steps": 640, "num_train_epochs": 5, "total_flos": 2.4480067682304e+18, "trial_name": null, "trial_params": null }