{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.0482994566311129, | |
"global_step": 120, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 3.9383, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 4.297, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 4.4497, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 4.1237, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 3.9559, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 3.437, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 2.5411, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 2.796, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 2.3798, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 2.3777, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 3.0632, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.0002, | |
"loss": 2.553, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.6332, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.2341, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.4234, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.1505, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.6074, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.1098, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.4064, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.0261, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.3355, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.2424, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.9745, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.1781, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.9489, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.0495, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.13, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.7854, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.2759, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.8816, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.9239, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.963, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.9837, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.2882, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.501, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 2.2086, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.0002, | |
"loss": 1.8153, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.906, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.2252, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.1101, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.9331, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.2006, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.0551, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.8635, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.7884, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.8641, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.9707, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.638, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.6951, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 1.6499, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 4.0906, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 3.8166, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 3.2648, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.8758, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.4187, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.366, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.9111, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.1172, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.2097, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.214, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.2931, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.0002, | |
"loss": 2.3941, | |
"step": 62 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.0872, | |
"step": 63 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.9758, | |
"step": 64 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.1443, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.8564, | |
"step": 66 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.2959, | |
"step": 67 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.111, | |
"step": 68 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.3046, | |
"step": 69 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.7973, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.2716, | |
"step": 71 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.0394, | |
"step": 72 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.3104, | |
"step": 73 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.2478, | |
"step": 74 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.9765, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.3162, | |
"step": 76 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.1498, | |
"step": 77 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.9034, | |
"step": 78 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.1262, | |
"step": 79 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.1205, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.7249, | |
"step": 81 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.0516, | |
"step": 82 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.6536, | |
"step": 83 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 2.1003, | |
"step": 84 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.9858, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.0002, | |
"loss": 1.9845, | |
"step": 86 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.1915, | |
"step": 87 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.9285, | |
"step": 88 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.7922, | |
"step": 89 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.0, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.0684, | |
"step": 91 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.5623, | |
"step": 92 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.6741, | |
"step": 93 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.9877, | |
"step": 94 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.8461, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.7781, | |
"step": 96 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.6342, | |
"step": 97 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.5765, | |
"step": 98 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.7775, | |
"step": 99 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.7932, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 3.0298, | |
"step": 101 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.9655, | |
"step": 102 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.8451, | |
"step": 103 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.5459, | |
"step": 104 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.3641, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.1216, | |
"step": 106 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.1987, | |
"step": 107 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.9505, | |
"step": 108 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 1.6535, | |
"step": 109 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.2628, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.0002, | |
"loss": 2.0268, | |
"step": 111 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 2.0586, | |
"step": 112 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 2.1307, | |
"step": 113 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.9611, | |
"step": 114 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 2.3489, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.73, | |
"step": 116 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 2.1855, | |
"step": 117 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 2.138, | |
"step": 118 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 2.0705, | |
"step": 119 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.0002, | |
"loss": 1.8792, | |
"step": 120 | |
} | |
], | |
"max_steps": 120, | |
"num_train_epochs": 1, | |
"total_flos": 479793488348160.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |