{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-05, "loss": 3.6346, "step": 1 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 3.602, "step": 2 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 3.5741, "step": 3 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 3.4904, "step": 4 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 3.4215, "step": 5 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 3.4568, "step": 6 }, { "epoch": 0.0, "learning_rate": 7.000000000000001e-05, "loss": 3.3978, "step": 7 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 3.4991, "step": 8 }, { "epoch": 0.0, "learning_rate": 8.999999999999999e-05, "loss": 3.5852, "step": 9 }, { "epoch": 0.0, "learning_rate": 0.0001, "loss": 3.3132, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00011, "loss": 3.2737, "step": 11 }, { "epoch": 0.0, "learning_rate": 0.00012, "loss": 3.1399, "step": 12 }, { "epoch": 0.0, "learning_rate": 0.00013000000000000002, "loss": 3.1828, "step": 13 }, { "epoch": 0.0, "learning_rate": 0.00014000000000000001, "loss": 3.0638, "step": 14 }, { "epoch": 0.0, "learning_rate": 0.00015, "loss": 3.185, "step": 15 }, { "epoch": 0.0, "learning_rate": 0.00016, "loss": 3.0669, "step": 16 }, { "epoch": 0.0, "learning_rate": 0.00017, "loss": 2.9026, "step": 17 }, { "epoch": 0.0, "learning_rate": 0.00017999999999999998, "loss": 2.9591, "step": 18 }, { "epoch": 0.0, "learning_rate": 0.00019, "loss": 2.8483, "step": 19 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.7707, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00021, "loss": 2.8564, "step": 21 }, { "epoch": 0.0, "learning_rate": 0.00022, "loss": 2.6997, "step": 22 }, { "epoch": 0.0, "learning_rate": 0.00023, "loss": 2.8052, "step": 23 }, { "epoch": 0.0, "learning_rate": 0.00024, "loss": 2.5826, "step": 24 }, { "epoch": 0.0, "learning_rate": 0.00025, "loss": 2.6266, "step": 25 }, { "epoch": 0.0, "learning_rate": 0.00026000000000000003, "loss": 2.6477, "step": 26 }, { "epoch": 0.0, "learning_rate": 0.00027, "loss": 2.6133, "step": 27 }, { "epoch": 0.0, "learning_rate": 0.00028000000000000003, "loss": 2.5395, "step": 28 }, { "epoch": 0.0, "learning_rate": 0.00029, "loss": 2.664, "step": 29 }, { "epoch": 0.0, "learning_rate": 0.0003, "loss": 2.5568, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.00031, "loss": 2.4346, "step": 31 }, { "epoch": 0.0, "learning_rate": 0.00032, "loss": 2.5765, "step": 32 }, { "epoch": 0.0, "learning_rate": 0.00033, "loss": 2.4884, "step": 33 }, { "epoch": 0.0, "learning_rate": 0.00034, "loss": 2.4257, "step": 34 }, { "epoch": 0.0, "learning_rate": 0.00035, "loss": 2.4281, "step": 35 }, { "epoch": 0.0, "learning_rate": 0.00035999999999999997, "loss": 2.4858, "step": 36 }, { "epoch": 0.0, "learning_rate": 0.00037, "loss": 2.383, "step": 37 }, { "epoch": 0.0, "learning_rate": 0.00038, "loss": 2.4865, "step": 38 }, { "epoch": 0.0, "learning_rate": 0.00039000000000000005, "loss": 2.3564, "step": 39 }, { "epoch": 0.0, "learning_rate": 0.0004, "loss": 2.4276, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.00041, "loss": 2.4225, "step": 41 }, { "epoch": 0.0, "learning_rate": 0.00042, "loss": 2.2718, "step": 42 }, { "epoch": 0.0, "learning_rate": 0.00043, "loss": 2.3566, "step": 43 }, { "epoch": 0.0, "learning_rate": 0.00044, "loss": 2.3911, "step": 44 }, { "epoch": 0.0, "learning_rate": 0.00045000000000000004, "loss": 2.3999, "step": 45 }, { "epoch": 0.0, "learning_rate": 0.00046, "loss": 2.2776, "step": 46 }, { "epoch": 0.0, "learning_rate": 0.00047, "loss": 2.3891, "step": 47 }, { "epoch": 0.0, "learning_rate": 0.00048, "loss": 2.3132, "step": 48 }, { "epoch": 0.0, "learning_rate": 0.00049, "loss": 2.3353, "step": 49 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 2.3691, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.00051, "loss": 2.3271, "step": 51 }, { "epoch": 0.01, "learning_rate": 0.0005200000000000001, "loss": 2.2881, "step": 52 }, { "epoch": 0.01, "learning_rate": 0.0005300000000000001, "loss": 2.2731, "step": 53 }, { "epoch": 0.01, "learning_rate": 0.00054, "loss": 2.3186, "step": 54 }, { "epoch": 0.01, "learning_rate": 0.00055, "loss": 2.3216, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.0005600000000000001, "loss": 2.3204, "step": 56 }, { "epoch": 0.01, "learning_rate": 0.00057, "loss": 2.1832, "step": 57 }, { "epoch": 0.01, "learning_rate": 0.00058, "loss": 2.4125, "step": 58 }, { "epoch": 0.01, "learning_rate": 0.00059, "loss": 2.2649, "step": 59 }, { "epoch": 0.01, "learning_rate": 0.0006, "loss": 2.1589, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.00061, "loss": 2.1431, "step": 61 }, { "epoch": 0.01, "learning_rate": 0.00062, "loss": 2.3738, "step": 62 }, { "epoch": 0.01, "learning_rate": 0.00063, "loss": 2.2525, "step": 63 }, { "epoch": 0.01, "learning_rate": 0.00064, "loss": 2.2778, "step": 64 }, { "epoch": 0.01, "learning_rate": 0.0006500000000000001, "loss": 2.2739, "step": 65 }, { "epoch": 0.01, "learning_rate": 0.00066, "loss": 2.2878, "step": 66 }, { "epoch": 0.01, "learning_rate": 0.00067, "loss": 2.196, "step": 67 }, { "epoch": 0.01, "learning_rate": 0.00068, "loss": 2.2384, "step": 68 }, { "epoch": 0.01, "learning_rate": 0.00069, "loss": 2.1638, "step": 69 }, { "epoch": 0.01, "learning_rate": 0.0007, "loss": 2.1578, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.00071, "loss": 2.1781, "step": 71 }, { "epoch": 0.01, "learning_rate": 0.0007199999999999999, "loss": 2.2486, "step": 72 }, { "epoch": 0.01, "learning_rate": 0.00073, "loss": 2.2475, "step": 73 }, { "epoch": 0.01, "learning_rate": 0.00074, "loss": 2.0713, "step": 74 }, { "epoch": 0.01, "learning_rate": 0.00075, "loss": 2.1261, "step": 75 }, { "epoch": 0.01, "learning_rate": 0.00076, "loss": 2.156, "step": 76 }, { "epoch": 0.01, "learning_rate": 0.0007700000000000001, "loss": 2.2273, "step": 77 }, { "epoch": 0.01, "learning_rate": 0.0007800000000000001, "loss": 2.2245, "step": 78 }, { "epoch": 0.01, "learning_rate": 0.00079, "loss": 2.1527, "step": 79 }, { "epoch": 0.01, "learning_rate": 0.0008, "loss": 2.0454, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0008100000000000001, "loss": 2.2643, "step": 81 }, { "epoch": 0.01, "learning_rate": 0.00082, "loss": 2.2447, "step": 82 }, { "epoch": 0.01, "learning_rate": 0.00083, "loss": 2.1656, "step": 83 }, { "epoch": 0.01, "learning_rate": 0.00084, "loss": 2.099, "step": 84 }, { "epoch": 0.01, "learning_rate": 0.00085, "loss": 2.2078, "step": 85 }, { "epoch": 0.01, "learning_rate": 0.00086, "loss": 2.1537, "step": 86 }, { "epoch": 0.01, "learning_rate": 0.00087, "loss": 2.1013, "step": 87 }, { "epoch": 0.01, "learning_rate": 0.00088, "loss": 2.0553, "step": 88 }, { "epoch": 0.01, "learning_rate": 0.0008900000000000001, "loss": 2.1702, "step": 89 }, { "epoch": 0.01, "learning_rate": 0.0009000000000000001, "loss": 2.0685, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.00091, "loss": 2.149, "step": 91 }, { "epoch": 0.01, "learning_rate": 0.00092, "loss": 2.1102, "step": 92 }, { "epoch": 0.01, "learning_rate": 0.00093, "loss": 2.2527, "step": 93 }, { "epoch": 0.01, "learning_rate": 0.00094, "loss": 2.0719, "step": 94 }, { "epoch": 0.01, "learning_rate": 0.00095, "loss": 2.1293, "step": 95 }, { "epoch": 0.01, "learning_rate": 0.00096, "loss": 2.1333, "step": 96 }, { "epoch": 0.01, "learning_rate": 0.0009699999999999999, "loss": 2.1497, "step": 97 }, { "epoch": 0.01, "learning_rate": 0.00098, "loss": 2.0996, "step": 98 }, { "epoch": 0.01, "learning_rate": 0.00099, "loss": 2.1576, "step": 99 }, { "epoch": 0.01, "learning_rate": 0.001, "loss": 2.1483, "step": 100 } ], "max_steps": 10000, "num_train_epochs": 9223372036854775807, "total_flos": 9.635560229376e+16, "trial_name": null, "trial_params": null }