|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9989949748743718, |
|
"eval_steps": 125, |
|
"global_step": 497, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.5221, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.548204183578491, |
|
"eval_runtime": 51.7546, |
|
"eval_samples_per_second": 24.288, |
|
"eval_steps_per_second": 1.024, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.5234, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.55, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.5209, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-05, |
|
"loss": 2.5177, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4e-05, |
|
"loss": 2.4932, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 2.495, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 2.4618, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6e-05, |
|
"loss": 2.4603, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4e-05, |
|
"loss": 2.4364, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 2.4445, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.4126, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 2.4573, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 2.4604, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6e-05, |
|
"loss": 2.4551, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 2.4235, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.800000000000001e-05, |
|
"loss": 2.442, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.2e-05, |
|
"loss": 2.4189, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.6e-05, |
|
"loss": 2.4027, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8e-05, |
|
"loss": 2.4114, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.4e-05, |
|
"loss": 2.4136, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 2.4136, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.200000000000001e-05, |
|
"loss": 2.4229, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.6e-05, |
|
"loss": 2.4245, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3903, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 2.4154, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00010800000000000001, |
|
"loss": 2.46, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 2.4382, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000116, |
|
"loss": 2.3889, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00012, |
|
"loss": 2.4225, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000124, |
|
"loss": 2.4394, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 2.4058, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000132, |
|
"loss": 2.448, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 2.4017, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00014, |
|
"loss": 2.4179, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000144, |
|
"loss": 2.4142, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000148, |
|
"loss": 2.419, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000152, |
|
"loss": 2.3895, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00015600000000000002, |
|
"loss": 2.3415, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00016, |
|
"loss": 2.4038, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000164, |
|
"loss": 2.415, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000168, |
|
"loss": 2.3996, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000172, |
|
"loss": 2.4102, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 2.4241, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018, |
|
"loss": 2.4232, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 2.4156, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000188, |
|
"loss": 2.4106, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000192, |
|
"loss": 2.4124, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000196, |
|
"loss": 2.4515, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3911, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999753025044538, |
|
"loss": 2.437, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999012112377473, |
|
"loss": 2.4317, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001999777729859618, |
|
"loss": 2.4115, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019996048644694282, |
|
"loss": 2.482, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019993826236058612, |
|
"loss": 2.4374, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019991110182465032, |
|
"loss": 2.4609, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001998790061807298, |
|
"loss": 2.4661, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001998419770141887, |
|
"loss": 2.4656, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019980001615408228, |
|
"loss": 2.4659, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019975312567306687, |
|
"loss": 2.4142, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019970130788729738, |
|
"loss": 2.4815, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019964456535631286, |
|
"loss": 2.4746, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019958290088291011, |
|
"loss": 2.4358, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019951631751300531, |
|
"loss": 2.4215, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019944481853548335, |
|
"loss": 2.4716, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019936840748203561, |
|
"loss": 2.3693, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019928708812698545, |
|
"loss": 2.4516, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001992008644871016, |
|
"loss": 2.4226, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019910974082140004, |
|
"loss": 2.4769, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001990137216309334, |
|
"loss": 2.4157, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019891281165856873, |
|
"loss": 2.5004, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019880701588875327, |
|
"loss": 2.4319, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019869633954726807, |
|
"loss": 2.4563, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019858078810097002, |
|
"loss": 2.4156, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019846036725752186, |
|
"loss": 2.409, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019833508296511002, |
|
"loss": 2.4533, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019820494141215104, |
|
"loss": 2.4374, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019806994902698573, |
|
"loss": 2.3888, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019793011247756174, |
|
"loss": 2.4201, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019778543867110426, |
|
"loss": 2.4034, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019763593475377462, |
|
"loss": 2.442, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019748160811031747, |
|
"loss": 2.3813, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019732246636369605, |
|
"loss": 2.3356, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019715851737471546, |
|
"loss": 2.3447, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019698976924163456, |
|
"loss": 2.3692, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019681623029976588, |
|
"loss": 2.3599, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019663790912106393, |
|
"loss": 2.4325, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019645481451370172, |
|
"loss": 2.4405, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019626695552163578, |
|
"loss": 2.397, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001960743414241593, |
|
"loss": 2.3989, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019587698173544396, |
|
"loss": 2.3333, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019567488620406983, |
|
"loss": 2.3499, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001954680648125438, |
|
"loss": 2.3654, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019525652777680676, |
|
"loss": 2.3604, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019504028554572864, |
|
"loss": 2.368, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019481934880059257, |
|
"loss": 2.3225, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019459372845456705, |
|
"loss": 2.3822, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019436343565216711, |
|
"loss": 2.3697, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019412848176870363, |
|
"loss": 2.349, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019388887840972164, |
|
"loss": 2.3015, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019364463741042694, |
|
"loss": 2.4336, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019339577083510144, |
|
"loss": 2.2816, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019314229097650742, |
|
"loss": 2.3033, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019288421035528028, |
|
"loss": 2.372, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019262154171931, |
|
"loss": 2.2949, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001923542980431115, |
|
"loss": 2.3972, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001920824925271838, |
|
"loss": 2.3624, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019180613859735791, |
|
"loss": 2.4214, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001915252499041338, |
|
"loss": 2.2428, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019123984032200586, |
|
"loss": 2.3559, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019094992394877794, |
|
"loss": 2.3033, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001906555151048667, |
|
"loss": 2.3734, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019035662833259432, |
|
"loss": 2.2408, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001900532783954703, |
|
"loss": 2.3528, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001897454802774621, |
|
"loss": 2.2373, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018943324918225494, |
|
"loss": 2.3138, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018911660053250103, |
|
"loss": 2.2331, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018879554996905766, |
|
"loss": 2.2188, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018847011335021449, |
|
"loss": 2.2649, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001881403067509104, |
|
"loss": 2.2905, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018780614646193942, |
|
"loss": 2.2859, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001874676489891461, |
|
"loss": 2.1971, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018712483105261005, |
|
"loss": 2.2541, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018677770958582023, |
|
"loss": 2.2179, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018642630173483832, |
|
"loss": 2.2198, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.3039979934692383, |
|
"eval_runtime": 51.9209, |
|
"eval_samples_per_second": 24.21, |
|
"eval_steps_per_second": 1.021, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018607062485745212, |
|
"loss": 2.332, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001857106965223177, |
|
"loss": 2.2744, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018534653450809197, |
|
"loss": 2.2837, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001849781568025545, |
|
"loss": 2.249, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018460558160171865, |
|
"loss": 2.3065, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001842288273089332, |
|
"loss": 2.2788, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001838479125339731, |
|
"loss": 2.2946, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018346285609212025, |
|
"loss": 2.2101, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001830736770032341, |
|
"loss": 2.2492, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001826803944908124, |
|
"loss": 2.1931, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018228302798104127, |
|
"loss": 2.196, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018188159710183594, |
|
"loss": 2.2698, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001814761216818711, |
|
"loss": 2.3152, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018106662174960153, |
|
"loss": 2.2096, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018065311753227273, |
|
"loss": 2.2467, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001802356294549218, |
|
"loss": 2.2236, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017981417813936864, |
|
"loss": 2.2537, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001793887844031972, |
|
"loss": 2.2716, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017895946925872733, |
|
"loss": 2.2136, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017852625391197674, |
|
"loss": 2.2374, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017808915976161362, |
|
"loss": 2.2258, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017764820839789964, |
|
"loss": 2.1188, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017720342160162348, |
|
"loss": 2.1268, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000176754821343025, |
|
"loss": 2.2547, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017630242978070997, |
|
"loss": 2.314, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017584626926055554, |
|
"loss": 2.3001, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001753863623146066, |
|
"loss": 2.1514, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017492273165996261, |
|
"loss": 2.2405, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001744554001976556, |
|
"loss": 2.2064, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017398439101151905, |
|
"loss": 2.1977, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001735097273670475, |
|
"loss": 2.2881, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017303143271024744, |
|
"loss": 2.1925, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017254953066647913, |
|
"loss": 2.1888, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001720640450392898, |
|
"loss": 2.2044, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017157499980923767, |
|
"loss": 2.145, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001710824191327075, |
|
"loss": 2.1138, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001705863273407174, |
|
"loss": 2.2124, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017008674893771706, |
|
"loss": 2.0964, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016958370860037717, |
|
"loss": 2.208, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016907723117637083, |
|
"loss": 2.1633, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00016856734168314583, |
|
"loss": 2.1419, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001680540653066891, |
|
"loss": 2.1253, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016753742740028277, |
|
"loss": 2.1568, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016701745348325156, |
|
"loss": 2.1131, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001664941692397025, |
|
"loss": 2.1684, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016596760051725609, |
|
"loss": 2.1378, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016543777332576976, |
|
"loss": 2.0971, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016490471383605288, |
|
"loss": 2.1363, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016436844837857417, |
|
"loss": 2.0525, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016382900344216115, |
|
"loss": 2.1323, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001632864056726917, |
|
"loss": 2.1257, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016274068187177771, |
|
"loss": 2.0854, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016219185899544154, |
|
"loss": 2.2714, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016163996415278424, |
|
"loss": 2.0961, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016108502460464666, |
|
"loss": 2.1213, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016052706776226286, |
|
"loss": 2.121, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015996612118590603, |
|
"loss": 2.1398, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015940221258352742, |
|
"loss": 2.0577, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015883536980938734, |
|
"loss": 2.0841, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015826562086267956, |
|
"loss": 2.0541, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015769299388614826, |
|
"loss": 2.0365, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015711751716469786, |
|
"loss": 2.1263, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015653921912399589, |
|
"loss": 2.0934, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001559581283290689, |
|
"loss": 2.0882, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015537427348289153, |
|
"loss": 1.9549, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001547876834249687, |
|
"loss": 2.1164, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001541983871299111, |
|
"loss": 2.0739, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000153606413706004, |
|
"loss": 2.0541, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015301179239376938, |
|
"loss": 1.9866, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001524145525645216, |
|
"loss": 2.0699, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015181472371891686, |
|
"loss": 2.0908, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001512123354854955, |
|
"loss": 1.968, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015060741761921902, |
|
"loss": 2.0859, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.9517, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014939011263122634, |
|
"loss": 2.0148, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014877778563827923, |
|
"loss": 2.0794, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000148163049267045, |
|
"loss": 1.9808, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014754593388242117, |
|
"loss": 2.0028, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001469264699668168, |
|
"loss": 2.14, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014630468811864633, |
|
"loss": 2.0264, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014568061905081875, |
|
"loss": 1.9986, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014505429358922, |
|
"loss": 2.0302, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014442574267119074, |
|
"loss": 1.894, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014379499734399798, |
|
"loss": 2.0404, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014316208876330146, |
|
"loss": 1.8718, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014252704819161496, |
|
"loss": 1.9787, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014188990699676184, |
|
"loss": 2.0504, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014125069665032574, |
|
"loss": 1.9288, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014060944872609606, |
|
"loss": 2.0359, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00013996619489850822, |
|
"loss": 2.0282, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001393209669410794, |
|
"loss": 1.9534, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001386737967248388, |
|
"loss": 1.9479, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013802471621675338, |
|
"loss": 1.9512, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013737375747814915, |
|
"loss": 1.9571, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001367209526631272, |
|
"loss": 1.9223, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00013606633401697557, |
|
"loss": 1.9252, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001354099338745764, |
|
"loss": 1.9298, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001347517846588089, |
|
"loss": 1.9366, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001340919188789477, |
|
"loss": 1.8472, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013343036912905718, |
|
"loss": 1.9318, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013276716808638126, |
|
"loss": 2.0005, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013210234850972964, |
|
"loss": 1.9785, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013143594323785927, |
|
"loss": 1.8198, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013076798518785274, |
|
"loss": 1.8197, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001300985073534919, |
|
"loss": 1.8909, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001294275428036284, |
|
"loss": 1.937, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012875512468055024, |
|
"loss": 1.8939, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012808128619834461, |
|
"loss": 1.8948, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012740606064125736, |
|
"loss": 1.9238, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012672948136204887, |
|
"loss": 1.891, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00012605158178034654, |
|
"loss": 1.8728, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012537239538099425, |
|
"loss": 1.9132, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001246919557123981, |
|
"loss": 1.8633, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012401029638486953, |
|
"loss": 1.8216, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012332745106896482, |
|
"loss": 1.8434, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012264345349382238, |
|
"loss": 1.8116, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001219583374454963, |
|
"loss": 1.8152, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00012127213676528768, |
|
"loss": 1.8103, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00012058488534807303, |
|
"loss": 1.8576, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011989661714062999, |
|
"loss": 1.8415, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011920736613996046, |
|
"loss": 1.8708, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011851716639161159, |
|
"loss": 1.8049, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001178260519879937, |
|
"loss": 1.8555, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011713405706669667, |
|
"loss": 1.8647, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011644121580880345, |
|
"loss": 1.8488, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.8219653367996216, |
|
"eval_runtime": 51.9603, |
|
"eval_samples_per_second": 24.192, |
|
"eval_steps_per_second": 1.02, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001157475624372018, |
|
"loss": 1.8457, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011505313121489383, |
|
"loss": 1.7723, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011435795644330359, |
|
"loss": 1.7381, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001136620724605827, |
|
"loss": 1.9117, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011296551363991432, |
|
"loss": 1.8667, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011226831438781518, |
|
"loss": 1.7878, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00011157050914243614, |
|
"loss": 1.7116, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00011087213237186108, |
|
"loss": 1.8073, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00011017321857240432, |
|
"loss": 1.7985, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010947380226690684, |
|
"loss": 1.8121, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010877391800303074, |
|
"loss": 1.6811, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010807360035155305, |
|
"loss": 1.7594, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010737288390465792, |
|
"loss": 1.7029, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010667180327422797, |
|
"loss": 1.7555, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010597039309013472, |
|
"loss": 1.8195, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010526868799852796, |
|
"loss": 1.7216, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010456672266012446, |
|
"loss": 1.7184, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010386453174849584, |
|
"loss": 1.7327, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001031621499483559, |
|
"loss": 1.7251, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010245961195384743, |
|
"loss": 1.7582, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010175695246682841, |
|
"loss": 1.7147, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00010105420619515798, |
|
"loss": 1.7322, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00010035140785098187, |
|
"loss": 1.7607, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.964859214901814e-05, |
|
"loss": 1.7381, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.894579380484204e-05, |
|
"loss": 1.7026, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.82430475331716e-05, |
|
"loss": 1.6587, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.754038804615257e-05, |
|
"loss": 1.7142, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.683785005164411e-05, |
|
"loss": 1.738, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.613546825150421e-05, |
|
"loss": 1.672, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.543327733987557e-05, |
|
"loss": 1.7901, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.473131200147205e-05, |
|
"loss": 1.7879, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.402960690986532e-05, |
|
"loss": 1.6247, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.332819672577206e-05, |
|
"loss": 1.7031, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.26271160953421e-05, |
|
"loss": 1.72, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.192639964844695e-05, |
|
"loss": 1.7805, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.122608199696928e-05, |
|
"loss": 1.7854, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.052619773309317e-05, |
|
"loss": 1.6424, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.982678142759566e-05, |
|
"loss": 1.6995, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.912786762813893e-05, |
|
"loss": 1.6633, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.84294908575639e-05, |
|
"loss": 1.6116, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.773168561218483e-05, |
|
"loss": 1.7391, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.70344863600857e-05, |
|
"loss": 1.5991, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.633792753941733e-05, |
|
"loss": 1.5397, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.564204355669643e-05, |
|
"loss": 1.6073, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.494686878510623e-05, |
|
"loss": 1.7106, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.425243756279824e-05, |
|
"loss": 1.6159, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.355878419119657e-05, |
|
"loss": 1.7041, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.286594293330332e-05, |
|
"loss": 1.6621, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.217394801200631e-05, |
|
"loss": 1.6209, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.148283360838844e-05, |
|
"loss": 1.6256, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.079263386003952e-05, |
|
"loss": 1.6747, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.010338285937006e-05, |
|
"loss": 1.7257, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.941511465192697e-05, |
|
"loss": 1.605, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.872786323471232e-05, |
|
"loss": 1.5881, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.804166255450373e-05, |
|
"loss": 1.6928, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.735654650617763e-05, |
|
"loss": 1.6106, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.667254893103519e-05, |
|
"loss": 1.6714, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.598970361513051e-05, |
|
"loss": 1.5388, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.53080442876019e-05, |
|
"loss": 1.546, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.462760461900576e-05, |
|
"loss": 1.5758, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.394841821965345e-05, |
|
"loss": 1.4756, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.327051863795118e-05, |
|
"loss": 1.5601, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.259393935874265e-05, |
|
"loss": 1.5874, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.191871380165538e-05, |
|
"loss": 1.6352, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.12448753194498e-05, |
|
"loss": 1.6191, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.057245719637164e-05, |
|
"loss": 1.5642, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.990149264650814e-05, |
|
"loss": 1.6193, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.923201481214732e-05, |
|
"loss": 1.5273, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.856405676214073e-05, |
|
"loss": 1.552, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.789765149027039e-05, |
|
"loss": 1.4993, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.723283191361873e-05, |
|
"loss": 1.5243, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.656963087094284e-05, |
|
"loss": 1.6382, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.590808112105232e-05, |
|
"loss": 1.6384, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.524821534119114e-05, |
|
"loss": 1.5141, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.459006612542365e-05, |
|
"loss": 1.526, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.393366598302446e-05, |
|
"loss": 1.5187, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.32790473368728e-05, |
|
"loss": 1.5672, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.262624252185087e-05, |
|
"loss": 1.5509, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.197528378324665e-05, |
|
"loss": 1.6236, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.132620327516126e-05, |
|
"loss": 1.5135, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.06790330589206e-05, |
|
"loss": 1.5453, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.0033805101491794e-05, |
|
"loss": 1.4761, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.939055127390396e-05, |
|
"loss": 1.5515, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.8749303349674254e-05, |
|
"loss": 1.5779, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.811009300323818e-05, |
|
"loss": 1.4634, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.747295180838503e-05, |
|
"loss": 1.4856, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.6837911236698536e-05, |
|
"loss": 1.4736, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.620500265600206e-05, |
|
"loss": 1.4468, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.5574257328809276e-05, |
|
"loss": 1.448, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.494570641077999e-05, |
|
"loss": 1.5192, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.431938094918132e-05, |
|
"loss": 1.4709, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.369531188135368e-05, |
|
"loss": 1.4481, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.307353003318325e-05, |
|
"loss": 1.5018, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.2454066117578815e-05, |
|
"loss": 1.5606, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.183695073295507e-05, |
|
"loss": 1.5407, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.122221436172079e-05, |
|
"loss": 1.4878, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.060988736877366e-05, |
|
"loss": 1.4904, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.3881, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.939258238078098e-05, |
|
"loss": 1.4511, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.8787664514504504e-05, |
|
"loss": 1.4791, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.818527628108317e-05, |
|
"loss": 1.323, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.758544743547839e-05, |
|
"loss": 1.4073, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.698820760623064e-05, |
|
"loss": 1.3715, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.639358629399602e-05, |
|
"loss": 1.4781, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.580161287008892e-05, |
|
"loss": 1.5291, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.521231657503132e-05, |
|
"loss": 1.4415, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.462572651710847e-05, |
|
"loss": 1.3697, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.4041871670931135e-05, |
|
"loss": 1.5058, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.346078087600412e-05, |
|
"loss": 1.3509, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.288248283530214e-05, |
|
"loss": 1.5078, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.230700611385174e-05, |
|
"loss": 1.4253, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.173437913732048e-05, |
|
"loss": 1.3528, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.116463019061269e-05, |
|
"loss": 1.4971, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.059778741647261e-05, |
|
"loss": 1.4212, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.003387881409397e-05, |
|
"loss": 1.4902, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.947293223773715e-05, |
|
"loss": 1.4026, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.8914975395353334e-05, |
|
"loss": 1.4036, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.836003584721577e-05, |
|
"loss": 1.3422, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.780814100455848e-05, |
|
"loss": 1.4615, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7259318128222276e-05, |
|
"loss": 1.4738, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.671359432730834e-05, |
|
"loss": 1.2812, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.617099655783884e-05, |
|
"loss": 1.3518, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.563155162142584e-05, |
|
"loss": 1.3794, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.509528616394716e-05, |
|
"loss": 1.344, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.456222667423028e-05, |
|
"loss": 1.383, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.3915969133377075, |
|
"eval_runtime": 52.0043, |
|
"eval_samples_per_second": 24.171, |
|
"eval_steps_per_second": 1.019, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.403239948274392e-05, |
|
"loss": 1.3758, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.350583076029754e-05, |
|
"loss": 1.3697, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.298254651674848e-05, |
|
"loss": 1.3343, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.246257259971727e-05, |
|
"loss": 1.4031, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.1945934693310896e-05, |
|
"loss": 1.4939, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.143265831685419e-05, |
|
"loss": 1.2987, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.092276882362918e-05, |
|
"loss": 1.3531, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.041629139962283e-05, |
|
"loss": 1.4016, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.9913251062282986e-05, |
|
"loss": 1.4369, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.9413672659282622e-05, |
|
"loss": 1.3164, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.8917580867292526e-05, |
|
"loss": 1.4983, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.8425000190762353e-05, |
|
"loss": 1.2823, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.793595496071021e-05, |
|
"loss": 1.3993, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7450469333520855e-05, |
|
"loss": 1.3771, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.6968567289752578e-05, |
|
"loss": 1.3688, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.6490272632952505e-05, |
|
"loss": 1.3941, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.6015608988480955e-05, |
|
"loss": 1.4538, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.5544599802344394e-05, |
|
"loss": 1.3141, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.5077268340037454e-05, |
|
"loss": 1.4652, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.4613637685393432e-05, |
|
"loss": 1.2551, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.415373073944449e-05, |
|
"loss": 1.416, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.3697570219290077e-05, |
|
"loss": 1.351, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.324517865697501e-05, |
|
"loss": 1.3533, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.2796578398376523e-05, |
|
"loss": 1.2773, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.235179160210037e-05, |
|
"loss": 1.3286, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.1910840238386398e-05, |
|
"loss": 1.3392, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.147374608802326e-05, |
|
"loss": 1.2921, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.104053074127268e-05, |
|
"loss": 1.3551, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.06112155968028e-05, |
|
"loss": 1.2801, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0185821860631394e-05, |
|
"loss": 1.3758, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9764370545078215e-05, |
|
"loss": 1.3692, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9346882467727325e-05, |
|
"loss": 1.2987, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.893337825039849e-05, |
|
"loss": 1.2623, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.852387831812893e-05, |
|
"loss": 1.4118, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.811840289816409e-05, |
|
"loss": 1.3289, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7716972018958766e-05, |
|
"loss": 1.37, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7319605509187608e-05, |
|
"loss": 1.3297, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6926322996765897e-05, |
|
"loss": 1.3106, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.653714390787979e-05, |
|
"loss": 1.2543, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.61520874660269e-05, |
|
"loss": 1.1748, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5771172691066794e-05, |
|
"loss": 1.3091, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5394418398281352e-05, |
|
"loss": 1.3359, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5021843197445528e-05, |
|
"loss": 1.361, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4653465491908003e-05, |
|
"loss": 1.3599, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4289303477682347e-05, |
|
"loss": 1.3696, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3929375142547917e-05, |
|
"loss": 1.1577, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3573698265161683e-05, |
|
"loss": 1.3131, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3222290414179794e-05, |
|
"loss": 1.2763, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2875168947389981e-05, |
|
"loss": 1.2822, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2532351010853916e-05, |
|
"loss": 1.3403, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.2193853538060595e-05, |
|
"loss": 1.2745, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1859693249089642e-05, |
|
"loss": 1.3022, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.152988664978556e-05, |
|
"loss": 1.3088, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1204450030942348e-05, |
|
"loss": 1.3025, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0883399467498956e-05, |
|
"loss": 1.3092, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.0566750817745074e-05, |
|
"loss": 1.3048, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.0254519722537947e-05, |
|
"loss": 1.2341, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.946721604529718e-06, |
|
"loss": 1.3788, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.643371667405698e-06, |
|
"loss": 1.4034, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.344484895133342e-06, |
|
"loss": 1.3039, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.050076051222067e-06, |
|
"loss": 1.3125, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.760159677994172e-06, |
|
"loss": 1.3098, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.474750095866236e-06, |
|
"loss": 1.2353, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.193861402642088e-06, |
|
"loss": 1.3627, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.91750747281621e-06, |
|
"loss": 1.4489, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.645701956888507e-06, |
|
"loss": 1.3553, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.378458280689993e-06, |
|
"loss": 1.3337, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.115789644719728e-06, |
|
"loss": 1.2856, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.857709023492587e-06, |
|
"loss": 1.3764, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.60422916489859e-06, |
|
"loss": 1.2939, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.355362589573077e-06, |
|
"loss": 1.3477, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.111121590278346e-06, |
|
"loss": 1.3298, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.8715182312963575e-06, |
|
"loss": 1.3331, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.636564347832907e-06, |
|
"loss": 1.2503, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.4062715454329726e-06, |
|
"loss": 1.2786, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.180651199407449e-06, |
|
"loss": 1.286, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.959714454271369e-06, |
|
"loss": 1.2775, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.7434722231932685e-06, |
|
"loss": 1.2779, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.531935187456216e-06, |
|
"loss": 1.315, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.325113795930203e-06, |
|
"loss": 1.3792, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.1230182645560555e-06, |
|
"loss": 1.4314, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.925658575840696e-06, |
|
"loss": 1.2617, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.7330444783642338e-06, |
|
"loss": 1.3592, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5451854862982746e-06, |
|
"loss": 1.3299, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.3620908789360863e-06, |
|
"loss": 1.3729, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.1837697002341293e-06, |
|
"loss": 1.2571, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.010230758365462e-06, |
|
"loss": 1.2179, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.841482625284564e-06, |
|
"loss": 1.2873, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.677533636303964e-06, |
|
"loss": 1.3169, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.518391889682525e-06, |
|
"loss": 1.2881, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.3640652462253886e-06, |
|
"loss": 1.2479, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.2145613288957478e-06, |
|
"loss": 1.2183, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.069887522438252e-06, |
|
"loss": 1.3322, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9300509730142855e-06, |
|
"loss": 1.2666, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7950585878489856e-06, |
|
"loss": 1.2443, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6649170348899789e-06, |
|
"loss": 1.2989, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5396327424781366e-06, |
|
"loss": 1.2325, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4192118990299707e-06, |
|
"loss": 1.3192, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3036604527319474e-06, |
|
"loss": 1.2725, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1929841112467533e-06, |
|
"loss": 1.3761, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0871883414312777e-06, |
|
"loss": 1.3084, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.862783690666178e-07, |
|
"loss": 1.2498, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.902591785999725e-07, |
|
"loss": 1.3384, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.991355128984079e-07, |
|
"loss": 1.2835, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.129118730145656e-07, |
|
"loss": 1.412, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.315925179643744e-07, |
|
"loss": 1.2562, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.55181464516652e-07, |
|
"loss": 1.3095, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.836824869946965e-07, |
|
"loss": 1.252, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.170991170898808e-07, |
|
"loss": 1.3766, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.554346436871581e-07, |
|
"loss": 1.2876, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.986921127026476e-07, |
|
"loss": 1.3465, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.468743269331442e-07, |
|
"loss": 1.3842, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9998384591773944e-07, |
|
"loss": 1.3558, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5802298581132358e-07, |
|
"loss": 1.2995, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.209938192701876e-07, |
|
"loss": 1.27, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.889817534969425e-08, |
|
"loss": 1.2425, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.173763941389465e-08, |
|
"loss": 1.2134, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.9513553057202165e-08, |
|
"loss": 1.2438, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.222701403818972e-08, |
|
"loss": 1.329, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.878876225277722e-09, |
|
"loss": 1.4423, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.469749554634415e-09, |
|
"loss": 1.2607, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.3635, |
|
"step": 497 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 497, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 3.751894162656461e+17, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|