|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993355481727575, |
|
"eval_steps": 500, |
|
"global_step": 1316, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007593735168485999, |
|
"grad_norm": 4.0, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 1.9271, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015187470336971997, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.6784, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0022781205505457997, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 1.7025, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0030374940673943995, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 1.8366, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0037968675842429997, |
|
"grad_norm": 3.859375, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.8837, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0045562411010915994, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 1.7767, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0053156146179402, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 1.6786, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006074988134788799, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 1.873, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006834361651637399, |
|
"grad_norm": 3.703125, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 1.8584, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007593735168485999, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7817, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0083531086853346, |
|
"grad_norm": 3.25, |
|
"learning_rate": 2.2e-06, |
|
"loss": 1.7579, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009112482202183199, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 1.9496, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009871855719031798, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 2.6e-06, |
|
"loss": 1.7234, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0106312292358804, |
|
"grad_norm": 4.375, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 2.003, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011390602752728999, |
|
"grad_norm": 4.125, |
|
"learning_rate": 3e-06, |
|
"loss": 1.8606, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012149976269577598, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 1.8039, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012909349786426199, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 1.936, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013668723303274798, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 1.7465, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.014428096820123398, |
|
"grad_norm": 3.5625, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 1.659, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.015187470336971999, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.7962, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015946843853820596, |
|
"grad_norm": 3.375, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 1.6802, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0167062173706692, |
|
"grad_norm": 3.875, |
|
"learning_rate": 4.4e-06, |
|
"loss": 1.8444, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0174655908875178, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 1.7685, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.018224964404366398, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 1.6768, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018984337921214997, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 5e-06, |
|
"loss": 1.8115, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.019743711438063596, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 5.2e-06, |
|
"loss": 1.7844, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.020503084954912196, |
|
"grad_norm": 3.75, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 1.8313, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0212624584717608, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 1.8675, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.022021831988609398, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 5.8e-06, |
|
"loss": 1.7895, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.022781205505457997, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 6e-06, |
|
"loss": 1.8702, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023540579022306597, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 1.6666, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.024299952539155196, |
|
"grad_norm": 4.125, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 1.9699, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.025059326056003795, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 1.6828, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.025818699572852398, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 1.8098, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.026578073089700997, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 7e-06, |
|
"loss": 1.6943, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.027337446606549597, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 1.6835, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.028096820123398196, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 7.4e-06, |
|
"loss": 1.7776, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.028856193640246795, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 1.8554, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.029615567157095395, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 2.0373, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.030374940673943997, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.7848, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.031134314190792597, |
|
"grad_norm": 3.71875, |
|
"learning_rate": 8.2e-06, |
|
"loss": 1.8114, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03189368770764119, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.9938, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0326530612244898, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 8.6e-06, |
|
"loss": 1.6792, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0334124347413384, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 8.8e-06, |
|
"loss": 1.8027, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.034171808258187, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 9e-06, |
|
"loss": 1.8655, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0349311817750356, |
|
"grad_norm": 3.75, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 1.8402, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.035690555291884196, |
|
"grad_norm": 3.75, |
|
"learning_rate": 9.4e-06, |
|
"loss": 1.8507, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.036449928808732796, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 1.7811, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.037209302325581395, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 1.873, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.037968675842429994, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7869, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.038728049359278593, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 1.02e-05, |
|
"loss": 1.847, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03948742287612719, |
|
"grad_norm": 4.0, |
|
"learning_rate": 1.04e-05, |
|
"loss": 1.9245, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04024679639297579, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 1.0600000000000002e-05, |
|
"loss": 2.0196, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04100616990982439, |
|
"grad_norm": 3.609375, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 1.7158, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.041765543426673, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 1.7212, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0425249169435216, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 1.9365, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.043284290460370196, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 1.14e-05, |
|
"loss": 1.8307, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.044043663977218796, |
|
"grad_norm": 3.296875, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.6974, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.044803037494067395, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 1.18e-05, |
|
"loss": 1.6723, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.045562411010915994, |
|
"grad_norm": 3.25, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.6466, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.046321784527764594, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 1.22e-05, |
|
"loss": 1.7542, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04708115804461319, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 1.8449, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04784053156146179, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 1.6788, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04859990507831039, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 1.8164, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04935927859515899, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 1.7674, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05011865211200759, |
|
"grad_norm": 3.296875, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 1.711, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0508780256288562, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 1.3400000000000002e-05, |
|
"loss": 1.6482, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.051637399145704796, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 1.6734, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.052396772662553395, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 1.38e-05, |
|
"loss": 1.793, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.053156146179401995, |
|
"grad_norm": 3.65625, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.8647, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.053915519696250594, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 1.4200000000000001e-05, |
|
"loss": 1.8249, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05467489321309919, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 1.88, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05543426672994779, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 1.46e-05, |
|
"loss": 1.9618, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05619364024679639, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 1.48e-05, |
|
"loss": 1.8017, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05695301376364499, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.6894, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05771238728049359, |
|
"grad_norm": 3.375, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 1.7385, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05847176079734219, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 1.54e-05, |
|
"loss": 1.631, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05923113431419079, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 1.7345, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.059990507831039395, |
|
"grad_norm": 3.25, |
|
"learning_rate": 1.58e-05, |
|
"loss": 1.6198, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.060749881347887995, |
|
"grad_norm": 3.703125, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.7695, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.061509254864736594, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 1.62e-05, |
|
"loss": 1.7828, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.062268628381585193, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 1.64e-05, |
|
"loss": 1.8845, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06302800189843379, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 1.66e-05, |
|
"loss": 1.698, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06378737541528239, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 1.6712, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.064546748932131, |
|
"grad_norm": 3.375, |
|
"learning_rate": 1.7e-05, |
|
"loss": 1.7673, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0653061224489796, |
|
"grad_norm": 3.609375, |
|
"learning_rate": 1.72e-05, |
|
"loss": 1.8005, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0660654959658282, |
|
"grad_norm": 3.5, |
|
"learning_rate": 1.7400000000000003e-05, |
|
"loss": 1.7674, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0668248694826768, |
|
"grad_norm": 3.484375, |
|
"learning_rate": 1.76e-05, |
|
"loss": 1.7665, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0675842429995254, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 1.7800000000000002e-05, |
|
"loss": 1.7815, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.068343616516374, |
|
"grad_norm": 3.25, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.6855, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0691029900332226, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 1.6461, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0698623635500712, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 1.6388, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07062173706691979, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 1.86e-05, |
|
"loss": 1.7505, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07138111058376839, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 1.88e-05, |
|
"loss": 1.6048, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07214048410061699, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.6765, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07289985761746559, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 1.634, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07365923113431419, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 1.94e-05, |
|
"loss": 1.6586, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07441860465116279, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 1.6871, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07517797816801139, |
|
"grad_norm": 3.703125, |
|
"learning_rate": 1.98e-05, |
|
"loss": 1.7815, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07593735168485999, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 2e-05, |
|
"loss": 1.8349, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07669672520170859, |
|
"grad_norm": 3.625, |
|
"learning_rate": 1.9999966626453647e-05, |
|
"loss": 1.8303, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07745609871855719, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 1.9999866506037346e-05, |
|
"loss": 1.5889, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07821547223540579, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 1.9999699639419373e-05, |
|
"loss": 1.5841, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07897484575225439, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 1.999946602771351e-05, |
|
"loss": 1.6492, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07973421926910298, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 1.999916567247905e-05, |
|
"loss": 1.6682, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08049359278595158, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 1.9998798575720776e-05, |
|
"loss": 1.522, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08125296630280018, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 1.9998364739888954e-05, |
|
"loss": 1.6903, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08201233981964878, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 1.9997864167879313e-05, |
|
"loss": 1.5823, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0827717133364974, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 1.9997296863033018e-05, |
|
"loss": 1.6105, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.083531086853346, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 1.9996662829136676e-05, |
|
"loss": 1.5877, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0842904603701946, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 1.999596207042227e-05, |
|
"loss": 1.7453, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0850498338870432, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 1.999519459156716e-05, |
|
"loss": 1.7015, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0858092074038918, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 1.999436039769405e-05, |
|
"loss": 1.6773, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08656858092074039, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 1.9993459494370938e-05, |
|
"loss": 1.6287, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08732795443758899, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 1.9992491887611095e-05, |
|
"loss": 1.7393, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08808732795443759, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 1.999145758387301e-05, |
|
"loss": 1.9157, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08884670147128619, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 1.9990356590060363e-05, |
|
"loss": 1.6195, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08960607498813479, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 1.998918891352197e-05, |
|
"loss": 1.6428, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09036544850498339, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 1.9987954562051724e-05, |
|
"loss": 1.6772, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09112482202183199, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 1.998665354388857e-05, |
|
"loss": 1.5625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09188419553868059, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 1.9985285867716423e-05, |
|
"loss": 1.6915, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09264356905552919, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 1.9983851542664125e-05, |
|
"loss": 1.6413, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09340294257237779, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 1.998235057830538e-05, |
|
"loss": 1.5844, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09416231608922639, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 1.9980782984658682e-05, |
|
"loss": 1.561, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09492168960607499, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 1.997914877218727e-05, |
|
"loss": 1.6305, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09568106312292358, |
|
"grad_norm": 2.25, |
|
"learning_rate": 1.9977447951799035e-05, |
|
"loss": 1.4409, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09644043663977218, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 1.9975680534846457e-05, |
|
"loss": 1.5723, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09719981015662078, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 1.9973846533126533e-05, |
|
"loss": 1.7338, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09795918367346938, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 1.997194595888069e-05, |
|
"loss": 1.6383, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09871855719031798, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 1.996997882479471e-05, |
|
"loss": 1.5887, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09947793070716658, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 1.9967945143998636e-05, |
|
"loss": 1.5525, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10023730422401518, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 1.99658449300667e-05, |
|
"loss": 1.4995, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1009966777408638, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 1.996367819701722e-05, |
|
"loss": 1.5085, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1017560512577124, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 1.996144495931251e-05, |
|
"loss": 1.5708, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10251542477456099, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 1.995914523185878e-05, |
|
"loss": 1.623, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10327479829140959, |
|
"grad_norm": 2.1875, |
|
"learning_rate": 1.9956779030006038e-05, |
|
"loss": 1.5378, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10403417180825819, |
|
"grad_norm": 2.5, |
|
"learning_rate": 1.9954346369548002e-05, |
|
"loss": 1.5672, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.10479354532510679, |
|
"grad_norm": 2.078125, |
|
"learning_rate": 1.995184726672197e-05, |
|
"loss": 1.5316, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10555291884195539, |
|
"grad_norm": 2.25, |
|
"learning_rate": 1.994928173820873e-05, |
|
"loss": 1.5776, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10631229235880399, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 1.994664980113243e-05, |
|
"loss": 1.6079, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10707166587565259, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 1.9943951473060488e-05, |
|
"loss": 1.5903, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10783103939250119, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 1.9941186772003463e-05, |
|
"loss": 1.6456, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10859041290934979, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 1.9938355716414933e-05, |
|
"loss": 1.5053, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10934978642619839, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 1.9935458325191365e-05, |
|
"loss": 1.5925, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11010915994304699, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 1.9932494617672007e-05, |
|
"loss": 1.6033, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11086853345989559, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 1.992946461363874e-05, |
|
"loss": 1.553, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11162790697674418, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 1.9926368333315964e-05, |
|
"loss": 1.5962, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11238728049359278, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 1.992320579737045e-05, |
|
"loss": 1.6159, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11314665401044138, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 1.991997702691121e-05, |
|
"loss": 1.4709, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11390602752728998, |
|
"grad_norm": 2.375, |
|
"learning_rate": 1.9916682043489337e-05, |
|
"loss": 1.6076, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11466540104413858, |
|
"grad_norm": 1.984375, |
|
"learning_rate": 1.9913320869097897e-05, |
|
"loss": 1.4864, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11542477456098718, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 1.9909893526171745e-05, |
|
"loss": 1.4559, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11618414807783578, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 1.990640003758741e-05, |
|
"loss": 1.5585, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11694352159468438, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 1.9902840426662897e-05, |
|
"loss": 1.4656, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11770289511153298, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 1.9899214717157588e-05, |
|
"loss": 1.5357, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11846226862838158, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 1.9895522933272028e-05, |
|
"loss": 1.5101, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11922164214523019, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 1.989176509964781e-05, |
|
"loss": 1.5287, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11998101566207879, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 1.988794124136738e-05, |
|
"loss": 1.6104, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.12074038917892739, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 1.9884051383953876e-05, |
|
"loss": 1.5313, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12149976269577599, |
|
"grad_norm": 1.703125, |
|
"learning_rate": 1.9880095553370967e-05, |
|
"loss": 1.4602, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12225913621262459, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 1.9876073776022676e-05, |
|
"loss": 1.4071, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12301850972947319, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 1.987198607875319e-05, |
|
"loss": 1.4707, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.12377788324632179, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 1.9867832488846702e-05, |
|
"loss": 1.4729, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.12453725676317039, |
|
"grad_norm": 1.6328125, |
|
"learning_rate": 1.9863613034027224e-05, |
|
"loss": 1.4967, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.12529663028001897, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 1.9859327742458387e-05, |
|
"loss": 1.4463, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12605600379686757, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 1.985497664274326e-05, |
|
"loss": 1.4763, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12681537731371617, |
|
"grad_norm": 1.6953125, |
|
"learning_rate": 1.9850559763924176e-05, |
|
"loss": 1.5175, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12757475083056477, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 1.9846077135482513e-05, |
|
"loss": 1.4363, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1283341243474134, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.9841528787338513e-05, |
|
"loss": 1.3922, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.129093497864262, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 1.983691474985108e-05, |
|
"loss": 1.4937, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1298528713811106, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.983223505381757e-05, |
|
"loss": 1.4381, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1306122448979592, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 1.9827489730473597e-05, |
|
"loss": 1.5019, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1313716184148078, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 1.982267881149281e-05, |
|
"loss": 1.3798, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1321309919316564, |
|
"grad_norm": 1.6796875, |
|
"learning_rate": 1.9817802328986696e-05, |
|
"loss": 1.5623, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.132890365448505, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 1.9812860315504362e-05, |
|
"loss": 1.4497, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1336497389653536, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 1.9807852804032306e-05, |
|
"loss": 1.4442, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1344091124822022, |
|
"grad_norm": 1.734375, |
|
"learning_rate": 1.9802779827994214e-05, |
|
"loss": 1.5552, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1351684859990508, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.9797641421250725e-05, |
|
"loss": 1.4411, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1359278595158994, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.9792437618099215e-05, |
|
"loss": 1.4569, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.136687233032748, |
|
"grad_norm": 1.40625, |
|
"learning_rate": 1.9787168453273546e-05, |
|
"loss": 1.4257, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1374466065495966, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 1.9781833961943874e-05, |
|
"loss": 1.417, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1382059800664452, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.9776434179716365e-05, |
|
"loss": 1.4831, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1389653535832938, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.977096914263301e-05, |
|
"loss": 1.3927, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1397247271001424, |
|
"grad_norm": 1.3671875, |
|
"learning_rate": 1.9765438887171327e-05, |
|
"loss": 1.431, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.140484100616991, |
|
"grad_norm": 1.4609375, |
|
"learning_rate": 1.975984345024418e-05, |
|
"loss": 1.4798, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14124347413383959, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.975418286919947e-05, |
|
"loss": 1.4939, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.14200284765068819, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 1.9748457181819937e-05, |
|
"loss": 1.4784, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.14276222116753678, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 1.9742666426322877e-05, |
|
"loss": 1.3947, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14352159468438538, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.97368106413599e-05, |
|
"loss": 1.3783, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.14428096820123398, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.9730889866016668e-05, |
|
"loss": 1.3301, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14504034171808258, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.9724904139812636e-05, |
|
"loss": 1.4403, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.14579971523493118, |
|
"grad_norm": 1.1484375, |
|
"learning_rate": 1.9718853502700783e-05, |
|
"loss": 1.4301, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14655908875177978, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.9712737995067357e-05, |
|
"loss": 1.3473, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.14731846226862838, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.970655765773159e-05, |
|
"loss": 1.3557, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14807783578547698, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 1.9700312531945444e-05, |
|
"loss": 1.3979, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14883720930232558, |
|
"grad_norm": 1.375, |
|
"learning_rate": 1.9694002659393306e-05, |
|
"loss": 1.5305, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14959658281917418, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 1.9687628082191748e-05, |
|
"loss": 1.5078, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.15035595633602278, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 1.9681188842889222e-05, |
|
"loss": 1.4817, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15111532985287138, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.9674684984465774e-05, |
|
"loss": 1.3599, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15187470336971998, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.966811655033277e-05, |
|
"loss": 1.384, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15263407688656858, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.9661483584332592e-05, |
|
"loss": 1.514, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.15339345040341718, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 1.9654786130738372e-05, |
|
"loss": 1.3908, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.15415282392026577, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.9648024234253654e-05, |
|
"loss": 1.336, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.15491219743711437, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.9641197940012136e-05, |
|
"loss": 1.3723, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.15567157095396297, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.963430729357735e-05, |
|
"loss": 1.3784, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.15643094447081157, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.9627352340942355e-05, |
|
"loss": 1.3541, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.15719031798766017, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 1.9620333128529436e-05, |
|
"loss": 1.3969, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.15794969150450877, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 1.96132497031898e-05, |
|
"loss": 1.4611, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15870906502135737, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.9606102112203243e-05, |
|
"loss": 1.3631, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.15946843853820597, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.9598890403277867e-05, |
|
"loss": 1.3605, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16022781205505457, |
|
"grad_norm": 1.078125, |
|
"learning_rate": 1.9591614624549724e-05, |
|
"loss": 1.4721, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.16098718557190317, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 1.958427482458253e-05, |
|
"loss": 1.429, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.16174655908875177, |
|
"grad_norm": 0.94921875, |
|
"learning_rate": 1.9576871052367307e-05, |
|
"loss": 1.3866, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.16250593260560037, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 1.956940335732209e-05, |
|
"loss": 1.4103, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 1.956187178929157e-05, |
|
"loss": 1.3547, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.16402467963929757, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 1.9554276398546767e-05, |
|
"loss": 1.4262, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1647840531561462, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 1.9546617235784716e-05, |
|
"loss": 1.3589, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1655434266729948, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.95388943521281e-05, |
|
"loss": 1.3694, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1663028001898434, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.953110779912492e-05, |
|
"loss": 1.3515, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.167062173706692, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.9523257628748148e-05, |
|
"loss": 1.419, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1678215472235406, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 1.9515343893395394e-05, |
|
"loss": 1.3665, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1685809207403892, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 1.9507366645888544e-05, |
|
"loss": 1.3448, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1693402942572378, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.9499325939473403e-05, |
|
"loss": 1.3186, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1700996677740864, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.9491221827819348e-05, |
|
"loss": 1.2722, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.170859041290935, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 1.948305436501897e-05, |
|
"loss": 1.4339, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1716184148077836, |
|
"grad_norm": 0.796875, |
|
"learning_rate": 1.9474823605587705e-05, |
|
"loss": 1.3838, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1723777883246322, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 1.9466529604463484e-05, |
|
"loss": 1.4411, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.17313716184148079, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.9458172417006347e-05, |
|
"loss": 1.3107, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.17389653535832938, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 1.9449752098998097e-05, |
|
"loss": 1.4422, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.17465590887517798, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.9441268706641907e-05, |
|
"loss": 1.3728, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17541528239202658, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 1.9432722296561954e-05, |
|
"loss": 1.4489, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.17617465590887518, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 1.942411292580304e-05, |
|
"loss": 1.3594, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.17693402942572378, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 1.941544065183021e-05, |
|
"loss": 1.3176, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.17769340294257238, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 1.9406705532528373e-05, |
|
"loss": 1.3331, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.17845277645942098, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 1.9397907626201915e-05, |
|
"loss": 1.3217, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.17921214997626958, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.9389046991574298e-05, |
|
"loss": 1.3825, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17997152349311818, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 1.938012368778768e-05, |
|
"loss": 1.3604, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.18073089700996678, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.9371137774402528e-05, |
|
"loss": 1.3345, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.18149027052681538, |
|
"grad_norm": 0.87890625, |
|
"learning_rate": 1.9362089311397194e-05, |
|
"loss": 1.417, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.18224964404366398, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 1.935297835916754e-05, |
|
"loss": 1.2646, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18300901756051258, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.9343804978526525e-05, |
|
"loss": 1.3089, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.18376839107736118, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 1.9334569230703794e-05, |
|
"loss": 1.2812, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.18452776459420978, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.9325271177345284e-05, |
|
"loss": 1.3355, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.18528713811105837, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 1.9315910880512792e-05, |
|
"loss": 1.3089, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.9306488402683582e-05, |
|
"loss": 1.3573, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.18680588514475557, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 1.929700380674995e-05, |
|
"loss": 1.2955, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.18756525866160417, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 1.9287457156018824e-05, |
|
"loss": 1.2819, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.18832463217845277, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.927784851421132e-05, |
|
"loss": 1.2677, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.18908400569530137, |
|
"grad_norm": 0.75, |
|
"learning_rate": 1.926817794546232e-05, |
|
"loss": 1.3524, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.18984337921214997, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 1.9258445514320064e-05, |
|
"loss": 1.4673, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19060275272899857, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 1.9248651285745708e-05, |
|
"loss": 1.3484, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.19136212624584717, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 1.9238795325112867e-05, |
|
"loss": 1.3565, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.19212149976269577, |
|
"grad_norm": 0.625, |
|
"learning_rate": 1.9228877698207227e-05, |
|
"loss": 1.3004, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.19288087327954437, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 1.921889847122605e-05, |
|
"loss": 1.3457, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.19364024679639297, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 1.9208857710777785e-05, |
|
"loss": 1.314, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.19439962031324157, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 1.9198755483881585e-05, |
|
"loss": 1.4202, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.19515899383009017, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 1.9188591857966875e-05, |
|
"loss": 1.3255, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.19591836734693877, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 1.917836690087291e-05, |
|
"loss": 1.4397, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.19667774086378736, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 1.91680806808483e-05, |
|
"loss": 1.3296, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.19743711438063596, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.9157733266550577e-05, |
|
"loss": 1.2916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19819648789748456, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.914732472704572e-05, |
|
"loss": 1.3308, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.19895586141433316, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 1.9136855131807705e-05, |
|
"loss": 1.3426, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.19971523493118176, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 1.9126324550718036e-05, |
|
"loss": 1.2745, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.20047460844803036, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 1.911573305406528e-05, |
|
"loss": 1.3073, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.201233981964879, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.9105080712544603e-05, |
|
"loss": 1.2674, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2019933554817276, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 1.909436759725728e-05, |
|
"loss": 1.3087, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2027527289985762, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 1.908359377971025e-05, |
|
"loss": 1.284, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2035121025154248, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.9072759331815602e-05, |
|
"loss": 1.2451, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.20427147603227339, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 1.9061864325890132e-05, |
|
"loss": 1.2624, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.20503084954912199, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.9050908834654834e-05, |
|
"loss": 1.2392, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.20579022306597058, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 1.9039892931234434e-05, |
|
"loss": 1.2405, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.20654959658281918, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.902881668915688e-05, |
|
"loss": 1.2509, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.20730897009966778, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.9017680182352866e-05, |
|
"loss": 1.3047, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.20806834361651638, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 1.9006483485155338e-05, |
|
"loss": 1.3492, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.20882771713336498, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 1.8995226672298993e-05, |
|
"loss": 1.2451, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.20958709065021358, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 1.898390981891979e-05, |
|
"loss": 1.3577, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.21034646416706218, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 1.897253300055443e-05, |
|
"loss": 1.3152, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.21110583768391078, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 1.896109629313987e-05, |
|
"loss": 1.3153, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.21186521120075938, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 1.8949599773012808e-05, |
|
"loss": 1.3153, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.21262458471760798, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.8938043516909173e-05, |
|
"loss": 1.2932, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.21338395823445658, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.892642760196361e-05, |
|
"loss": 1.2294, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.21414333175130518, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 1.891475210570898e-05, |
|
"loss": 1.3246, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.21490270526815378, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.890301710607582e-05, |
|
"loss": 1.2312, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.21566207878500238, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.8891222681391853e-05, |
|
"loss": 1.2243, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.21642145230185098, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 1.8879368910381423e-05, |
|
"loss": 1.2593, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.21718082581869957, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 1.8867455872165006e-05, |
|
"loss": 1.3375, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.21794019933554817, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.8855483646258677e-05, |
|
"loss": 1.2492, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.21869957285239677, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 1.8843452312573557e-05, |
|
"loss": 1.3306, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.21945894636924537, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 1.8831361951415298e-05, |
|
"loss": 1.2743, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.22021831988609397, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.881921264348355e-05, |
|
"loss": 1.3699, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.22097769340294257, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 1.880700446987141e-05, |
|
"loss": 1.3548, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.22173706691979117, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 1.879473751206489e-05, |
|
"loss": 1.3974, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.22249644043663977, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.8782411851942365e-05, |
|
"loss": 1.29, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.22325581395348837, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 1.877002757177403e-05, |
|
"loss": 1.2906, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.22401518747033697, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 1.8757584754221363e-05, |
|
"loss": 1.2135, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.22477456098718557, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.8745083482336547e-05, |
|
"loss": 1.3045, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.22553393450403417, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 1.8732523839561934e-05, |
|
"loss": 1.2641, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.22629330802088277, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 1.8719905909729493e-05, |
|
"loss": 1.2492, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.22705268153773137, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.8707229777060242e-05, |
|
"loss": 1.2867, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.22781205505457996, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 1.869449552616367e-05, |
|
"loss": 1.2946, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 1.8681703242037208e-05, |
|
"loss": 1.3014, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.22933080208827716, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.8668853010065633e-05, |
|
"loss": 1.2937, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.23009017560512576, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.86559449160205e-05, |
|
"loss": 1.2866, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.23084954912197436, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.8642979046059595e-05, |
|
"loss": 1.2542, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.23160892263882296, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.8629955486726324e-05, |
|
"loss": 1.2718, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.23236829615567156, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 1.861687432494916e-05, |
|
"loss": 1.2645, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.23312766967252016, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 1.8603735648041054e-05, |
|
"loss": 1.2895, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.23388704318936876, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.8590539543698852e-05, |
|
"loss": 1.322, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.23464641670621736, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 1.8577286100002723e-05, |
|
"loss": 1.2584, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.23540579022306596, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.856397540541554e-05, |
|
"loss": 1.2814, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23616516373991456, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.855060754878233e-05, |
|
"loss": 1.2865, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.23692453725676316, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.853718261932964e-05, |
|
"loss": 1.2597, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.23768391077361178, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.852370070666498e-05, |
|
"loss": 1.2556, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.23844328429046038, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 1.8510161900776186e-05, |
|
"loss": 1.304, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.23920265780730898, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.8496566292030864e-05, |
|
"loss": 1.2148, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.23996203132415758, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.8482913971175737e-05, |
|
"loss": 1.2887, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.24072140484100618, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 1.846920502933609e-05, |
|
"loss": 1.3276, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.24148077835785478, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.8455439558015117e-05, |
|
"loss": 1.2681, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.24224015187470338, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 1.8441617649093334e-05, |
|
"loss": 1.2898, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.24299952539155198, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 1.8427739394827976e-05, |
|
"loss": 1.2785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.24375889890840058, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.8413804887852343e-05, |
|
"loss": 1.1799, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.24451827242524918, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.839981422117523e-05, |
|
"loss": 1.1951, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.24527764594209778, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 1.8385767488180255e-05, |
|
"loss": 1.3233, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.24603701945894638, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 1.8371664782625287e-05, |
|
"loss": 1.2204, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.24679639297579498, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 1.8357506198641784e-05, |
|
"loss": 1.2763, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.24755576649264358, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 1.8343291830734176e-05, |
|
"loss": 1.3397, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.24831514000949217, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.8329021773779242e-05, |
|
"loss": 1.3029, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.24907451352634077, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 1.8314696123025456e-05, |
|
"loss": 1.2977, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.24983388704318937, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.8300314974092372e-05, |
|
"loss": 1.2915, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.25059326056003794, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 1.8285878422969982e-05, |
|
"loss": 1.2278, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.25135263407688657, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 1.827138656601807e-05, |
|
"loss": 1.2337, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.25211200759373514, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.825683949996556e-05, |
|
"loss": 1.2978, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.25287138111058377, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 1.8242237321909895e-05, |
|
"loss": 1.2512, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.25363075462743234, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 1.8227580129316368e-05, |
|
"loss": 1.2702, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.25439012814428097, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.821286802001747e-05, |
|
"loss": 1.2253, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.25514950166112954, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 1.819810109221227e-05, |
|
"loss": 1.2708, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.25590887517797817, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.81832794444657e-05, |
|
"loss": 1.2157, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2566682486948268, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 1.8168403175707958e-05, |
|
"loss": 1.3529, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.25742762221167537, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.815347238523381e-05, |
|
"loss": 1.1796, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.258186995728524, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.813848717270195e-05, |
|
"loss": 1.2568, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.25894636924537257, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 1.812344763813431e-05, |
|
"loss": 1.2732, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2597057427622212, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.8108353881915403e-05, |
|
"loss": 1.2737, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.26046511627906976, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.8093206004791673e-05, |
|
"loss": 1.2281, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2612244897959184, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 1.8078004107870797e-05, |
|
"loss": 1.3148, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.26198386331276696, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.806274829262101e-05, |
|
"loss": 1.2584, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2627432368296156, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.8047438660870447e-05, |
|
"loss": 1.2665, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.26350261034646416, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.803207531480645e-05, |
|
"loss": 1.2892, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2642619838633128, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.8016658356974885e-05, |
|
"loss": 1.2782, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.26502135738016136, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.800118789027947e-05, |
|
"loss": 1.2857, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.26578073089701, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 1.798566401798106e-05, |
|
"loss": 1.2529, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.26654010441385856, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.7970086843697e-05, |
|
"loss": 1.2445, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2672994779307072, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.7954456471400393e-05, |
|
"loss": 1.2143, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.26805885144755576, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 1.793877300541944e-05, |
|
"loss": 1.2444, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.2688182249644044, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.7923036550436706e-05, |
|
"loss": 1.2674, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.26957759848125296, |
|
"grad_norm": 0.5, |
|
"learning_rate": 1.7907247211488456e-05, |
|
"loss": 1.2926, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2703369719981016, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.789140509396394e-05, |
|
"loss": 1.2125, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.27109634551495015, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 1.7875510303604678e-05, |
|
"loss": 1.1936, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2718557190317988, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 1.7859562946503787e-05, |
|
"loss": 1.2251, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.27261509254864735, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.784356312910523e-05, |
|
"loss": 1.2829, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.273374466065496, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.7827510958203147e-05, |
|
"loss": 1.2277, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.27413383958234455, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 1.78114065409411e-05, |
|
"loss": 1.2715, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2748932130991932, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 1.7795249984811397e-05, |
|
"loss": 1.2467, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.27565258661604175, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 1.7779041397654355e-05, |
|
"loss": 1.2529, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2764119601328904, |
|
"grad_norm": 0.5, |
|
"learning_rate": 1.7762780887657576e-05, |
|
"loss": 1.2749, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.27717133364973895, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.7746468563355243e-05, |
|
"loss": 1.1978, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2779307071665876, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 1.773010453362737e-05, |
|
"loss": 1.244, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.27869008068343615, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 1.7713688907699107e-05, |
|
"loss": 1.3013, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2794494542002848, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 1.769722179513998e-05, |
|
"loss": 1.2608, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.28020882771713335, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.7680703305863177e-05, |
|
"loss": 1.1853, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.280968201233982, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.7664133550124815e-05, |
|
"loss": 1.2565, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.28172757475083055, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.7647512638523193e-05, |
|
"loss": 1.1891, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.28248694826767917, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 1.7630840681998068e-05, |
|
"loss": 1.231, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.28324632178452774, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 1.7614117791829897e-05, |
|
"loss": 1.2935, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.28400569530137637, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.759734407963911e-05, |
|
"loss": 1.2953, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.28476506881822494, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 1.7580519657385368e-05, |
|
"loss": 1.2782, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.28552444233507357, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 1.7563644637366786e-05, |
|
"loss": 1.333, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.28628381585192214, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.754671913221923e-05, |
|
"loss": 1.2813, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.28704318936877077, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 1.752974325491551e-05, |
|
"loss": 1.2581, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.28780256288561934, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.7512717118764687e-05, |
|
"loss": 1.2302, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.28856193640246797, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.7495640837411265e-05, |
|
"loss": 1.2359, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28932130991931654, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.747851452483445e-05, |
|
"loss": 1.2548, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.29008068343616517, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 1.7461338295347404e-05, |
|
"loss": 1.2752, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.29084005695301374, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.7444112263596474e-05, |
|
"loss": 1.2092, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.29159943046986236, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.74268365445604e-05, |
|
"loss": 1.3045, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.292358803986711, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.7409511253549592e-05, |
|
"loss": 1.2586, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.29311817750355956, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.7392136506205332e-05, |
|
"loss": 1.1966, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.2938775510204082, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.7374712418498997e-05, |
|
"loss": 1.1853, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.29463692453725676, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.735723910673132e-05, |
|
"loss": 1.2408, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2953962980541054, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.7339716687531564e-05, |
|
"loss": 1.163, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.29615567157095396, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.7322145277856793e-05, |
|
"loss": 1.2941, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2969150450878026, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 1.7304524994991056e-05, |
|
"loss": 1.2504, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.29767441860465116, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.7286855956544616e-05, |
|
"loss": 1.2842, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2984337921214998, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.726913828045317e-05, |
|
"loss": 1.2403, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.29919316563834836, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 1.725137208497705e-05, |
|
"loss": 1.254, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.299952539155197, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.7233557488700453e-05, |
|
"loss": 1.2395, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.30071191267204556, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 1.7215694610530624e-05, |
|
"loss": 1.2705, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3014712861888942, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 1.7197783569697084e-05, |
|
"loss": 1.2212, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.30223065970574275, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.7179824485750824e-05, |
|
"loss": 1.2975, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3029900332225914, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 1.7161817478563504e-05, |
|
"loss": 1.2402, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.30374940673943995, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.7143762668326667e-05, |
|
"loss": 1.2287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3045087802562886, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 1.712566017555092e-05, |
|
"loss": 1.2097, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.30526815377313715, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.7107510121065138e-05, |
|
"loss": 1.3114, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3060275272899858, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.7089312626015663e-05, |
|
"loss": 1.2468, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.30678690080683435, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 1.1837, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.307546274323683, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.7052775800393415e-05, |
|
"loss": 1.238, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.30830564784053155, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.703443671369333e-05, |
|
"loss": 1.217, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.3090650213573802, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.7016050674173304e-05, |
|
"loss": 1.2202, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.30982439487422875, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 1.69976178045548e-05, |
|
"loss": 1.2238, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3105837683910774, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 1.6979138227871858e-05, |
|
"loss": 1.2318, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.31134314190792595, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 1.696061206747029e-05, |
|
"loss": 1.2208, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3121025154247746, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 1.6942039447006823e-05, |
|
"loss": 1.2223, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.31286188894162315, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.6923420490448298e-05, |
|
"loss": 1.1626, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3136212624584718, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.6904755322070846e-05, |
|
"loss": 1.2768, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.31438063597532034, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 1.688604406645903e-05, |
|
"loss": 1.2694, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.31514000949216897, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.686728684850505e-05, |
|
"loss": 1.1856, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.31589938300901754, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.6848483793407874e-05, |
|
"loss": 1.2184, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.31665875652586617, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.6829635026672432e-05, |
|
"loss": 1.1899, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.31741813004271474, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.6810740674108763e-05, |
|
"loss": 1.2078, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.31817750355956337, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 1.6791800861831176e-05, |
|
"loss": 1.2226, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.31893687707641194, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 1.6772815716257414e-05, |
|
"loss": 1.2044, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.31969625059326057, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.6753785364107796e-05, |
|
"loss": 1.2699, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.32045562411010914, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.6734709932404404e-05, |
|
"loss": 1.1732, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.32121499762695777, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 1.6715589548470187e-05, |
|
"loss": 1.2655, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.32197437114380634, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.6696424339928153e-05, |
|
"loss": 1.2044, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.32273374466065496, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.6677214434700495e-05, |
|
"loss": 1.2083, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.32349311817750354, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 1.665795996100775e-05, |
|
"loss": 1.2273, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.32425249169435216, |
|
"grad_norm": 0.5, |
|
"learning_rate": 1.663866104736793e-05, |
|
"loss": 1.2407, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.32501186521120073, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.6619317822595666e-05, |
|
"loss": 1.2166, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.32577123872804936, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 1.6599930415801374e-05, |
|
"loss": 1.238, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.658049895639034e-05, |
|
"loss": 1.1813, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.32728998576174656, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.6561023574061925e-05, |
|
"loss": 1.2264, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.32804935927859513, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.6541504398808633e-05, |
|
"loss": 1.2364, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.32880873279544376, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.6521941560915284e-05, |
|
"loss": 1.2339, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3295681063122924, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.6502335190958135e-05, |
|
"loss": 1.2952, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.33032747982914096, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.648268541980401e-05, |
|
"loss": 1.195, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3310868533459896, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 1.646299237860941e-05, |
|
"loss": 1.2866, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.33184622686283816, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.6443256198819665e-05, |
|
"loss": 1.2219, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3326056003796868, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.6423477012168038e-05, |
|
"loss": 1.2458, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.33336497389653535, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.640365495067485e-05, |
|
"loss": 1.21, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.334124347413384, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 1.638379014664659e-05, |
|
"loss": 1.2286, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.33488372093023255, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.636388273267506e-05, |
|
"loss": 1.1945, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3356430944470812, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 1.6343932841636455e-05, |
|
"loss": 1.3204, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.33640246796392975, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.63239406066905e-05, |
|
"loss": 1.2361, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3371618414807784, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.6303906161279554e-05, |
|
"loss": 1.1951, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.33792121499762695, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 1.6283829639127705e-05, |
|
"loss": 1.2686, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3386805885144756, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 1.6263711174239914e-05, |
|
"loss": 1.264, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.33943996203132415, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 1.6243550900901076e-05, |
|
"loss": 1.2668, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3401993355481728, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.6223348953675163e-05, |
|
"loss": 1.1683, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.34095870906502135, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.6203105467404284e-05, |
|
"loss": 1.2147, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.34171808258187, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.6182820577207842e-05, |
|
"loss": 1.2178, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.34247745609871855, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.6162494418481574e-05, |
|
"loss": 1.2321, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3432368296155672, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.6142127126896682e-05, |
|
"loss": 1.2495, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.34399620313241575, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.612171883839891e-05, |
|
"loss": 1.1807, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3447555766492644, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.6101269689207656e-05, |
|
"loss": 1.1941, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.34551495016611294, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.6080779815815043e-05, |
|
"loss": 1.2159, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.34627432368296157, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.6060249354985023e-05, |
|
"loss": 1.222, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.34703369719981014, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.603967844375245e-05, |
|
"loss": 1.2526, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.34779307071665877, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.6019067219422178e-05, |
|
"loss": 1.1691, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.34855244423350734, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.5998415819568135e-05, |
|
"loss": 1.1933, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.34931181775035597, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.597772438203241e-05, |
|
"loss": 1.2525, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.35007119126720454, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.5956993044924334e-05, |
|
"loss": 1.2022, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.35083056478405317, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 1.593622194661956e-05, |
|
"loss": 1.2853, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.35158993830090174, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.5915411225759122e-05, |
|
"loss": 1.3113, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.35234931181775037, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 1.5894561021248535e-05, |
|
"loss": 1.246, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.35310868533459894, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.5873671472256854e-05, |
|
"loss": 1.1929, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.35386805885144756, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 1.5852742718215743e-05, |
|
"loss": 1.2469, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.35462743236829614, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 1.5831774898818558e-05, |
|
"loss": 1.1592, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.35538680588514476, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.5810768154019386e-05, |
|
"loss": 1.2145, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.35614617940199333, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.5789722624032143e-05, |
|
"loss": 1.1859, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.35690555291884196, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 1.576863844932963e-05, |
|
"loss": 1.2184, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.35766492643569053, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 1.5747515770642582e-05, |
|
"loss": 1.2126, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.35842429995253916, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 1.5726354728958736e-05, |
|
"loss": 1.2569, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.35918367346938773, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.570515546552189e-05, |
|
"loss": 1.2173, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.35994304698623636, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.568391812183097e-05, |
|
"loss": 1.1995, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.36070242050308493, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.566264283963907e-05, |
|
"loss": 1.238, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.36146179401993356, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.5641329760952514e-05, |
|
"loss": 1.2179, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.36222116753678213, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.5619979028029898e-05, |
|
"loss": 1.2148, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.36298054105363076, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.5598590783381165e-05, |
|
"loss": 1.201, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.36373991457047933, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.5577165169766627e-05, |
|
"loss": 1.2383, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.36449928808732796, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 1.5555702330196024e-05, |
|
"loss": 1.2399, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3652586616041765, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.5534202407927574e-05, |
|
"loss": 1.2565, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.36601803512102515, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.5512665546467008e-05, |
|
"loss": 1.2256, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3667774086378738, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.549109188956661e-05, |
|
"loss": 1.1796, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.36753678215472235, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 1.5469481581224274e-05, |
|
"loss": 1.2004, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.368296155671571, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.5447834765682515e-05, |
|
"loss": 1.1787, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.36905552918841955, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.5426151587427548e-05, |
|
"loss": 1.1656, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3698149027052682, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.540443219118827e-05, |
|
"loss": 1.1887, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.37057427622211675, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.5382676721935344e-05, |
|
"loss": 1.2309, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3713336497389654, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.5360885324880205e-05, |
|
"loss": 1.1869, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 1.5339058145474086e-05, |
|
"loss": 1.2477, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3728523967726626, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.5317195329407067e-05, |
|
"loss": 1.2257, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.37361177028951115, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.529529702260709e-05, |
|
"loss": 1.2565, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3743711438063598, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.5273363371238983e-05, |
|
"loss": 1.1869, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.37513051732320835, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.5251394521703496e-05, |
|
"loss": 1.2229, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.375889890840057, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.5229390620636309e-05, |
|
"loss": 1.2105, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.37664926435690554, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 1.5207351814907068e-05, |
|
"loss": 1.2271, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.37740863787375417, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.5185278251618391e-05, |
|
"loss": 1.1995, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.37816801139060274, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.51631700781049e-05, |
|
"loss": 1.1512, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.37892738490745137, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.5141027441932217e-05, |
|
"loss": 1.2129, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.37968675842429994, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.5118850490896012e-05, |
|
"loss": 1.2336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38044613194114857, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.5096639373020976e-05, |
|
"loss": 1.1947, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.38120550545799714, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 1.5074394236559871e-05, |
|
"loss": 1.2024, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.38196487897484577, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.5052115229992512e-05, |
|
"loss": 1.2024, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.38272425249169434, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.5029802502024788e-05, |
|
"loss": 1.2601, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.38348362600854297, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 1.5007456201587676e-05, |
|
"loss": 1.2082, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.38424299952539154, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.4985076477836232e-05, |
|
"loss": 1.1751, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.38500237304224016, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.4962663480148606e-05, |
|
"loss": 1.1682, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.38576174655908874, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.4940217358125042e-05, |
|
"loss": 1.222, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.38652112007593736, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.4917738261586878e-05, |
|
"loss": 1.1834, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.38728049359278593, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.489522634057555e-05, |
|
"loss": 1.1874, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.38803986710963456, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.4872681745351582e-05, |
|
"loss": 1.2168, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.38879924062648313, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.4850104626393598e-05, |
|
"loss": 1.2838, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.38955861414333176, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.4827495134397298e-05, |
|
"loss": 1.1814, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.39031798766018033, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 1.4804853420274471e-05, |
|
"loss": 1.2424, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.39107736117702896, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 1.4782179635151978e-05, |
|
"loss": 1.2785, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.39183673469387753, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.4759473930370738e-05, |
|
"loss": 1.2162, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.39259610821072616, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.473673645748473e-05, |
|
"loss": 1.2142, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.39335548172757473, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 1.4713967368259981e-05, |
|
"loss": 1.2056, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.39411485524442336, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 1.469116681467353e-05, |
|
"loss": 1.2555, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.39487422876127193, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.4668334948912455e-05, |
|
"loss": 1.1837, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.39563360227812056, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.4645471923372818e-05, |
|
"loss": 1.192, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3963929757949691, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.4622577890658668e-05, |
|
"loss": 1.2303, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.39715234931181775, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.4599653003581016e-05, |
|
"loss": 1.2871, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.3979117228286663, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 1.4576697415156818e-05, |
|
"loss": 1.2274, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.39867109634551495, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.4553711278607953e-05, |
|
"loss": 1.2148, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3994304698623635, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.4530694747360203e-05, |
|
"loss": 1.123, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.40018984337921215, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.4507647975042221e-05, |
|
"loss": 1.1685, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4009492168960607, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.4484571115484508e-05, |
|
"loss": 1.2304, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.40170859041290935, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.44614643227184e-05, |
|
"loss": 1.1826, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.402467963929758, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.4438327750975009e-05, |
|
"loss": 1.2434, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.40322733744660655, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.4415161554684239e-05, |
|
"loss": 1.177, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4039867109634552, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.4391965888473705e-05, |
|
"loss": 1.1952, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.40474608448030375, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.436874090716774e-05, |
|
"loss": 1.2767, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4055054579971524, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.434548676578634e-05, |
|
"loss": 1.2334, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.40626483151400095, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.432220361954414e-05, |
|
"loss": 1.1755, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4070242050308496, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.429889162384937e-05, |
|
"loss": 1.1615, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.40778357854769814, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.4275550934302822e-05, |
|
"loss": 1.2221, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.40854295206454677, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.4252181706696817e-05, |
|
"loss": 1.2065, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.40930232558139534, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.4228784097014156e-05, |
|
"loss": 1.2361, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.41006169909824397, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.4205358261427076e-05, |
|
"loss": 1.1413, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.41082107261509254, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 1.4181904356296225e-05, |
|
"loss": 1.1597, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.41158044613194117, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.4158422538169596e-05, |
|
"loss": 1.1972, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.41233981964878974, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.4134912963781501e-05, |
|
"loss": 1.1908, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.41309919316563837, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 1.4111375790051511e-05, |
|
"loss": 1.2195, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.41385856668248694, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 1.4087811174083422e-05, |
|
"loss": 1.2675, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.41461794019933557, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.4064219273164192e-05, |
|
"loss": 1.2397, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.41537731371618414, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.40406002447629e-05, |
|
"loss": 1.1723, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.41613668723303276, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.4016954246529697e-05, |
|
"loss": 1.1875, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.41689606074988134, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 1.3993281436294743e-05, |
|
"loss": 1.1678, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.41765543426672996, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 1.3969581972067166e-05, |
|
"loss": 1.2402, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.41841480778357854, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.3945856012034003e-05, |
|
"loss": 1.2136, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.41917418130042716, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.392210371455913e-05, |
|
"loss": 1.1965, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.41993355481727573, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.3898325238182235e-05, |
|
"loss": 1.1927, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.42069292833412436, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.3874520741617734e-05, |
|
"loss": 1.2102, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.42145230185097293, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.3850690383753718e-05, |
|
"loss": 1.2486, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.42221167536782156, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 1.1525, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.42297104888467013, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.3802952720541543e-05, |
|
"loss": 1.2107, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.42373042240151876, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.377904573382841e-05, |
|
"loss": 1.22, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.42448979591836733, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 1.3755113523083679e-05, |
|
"loss": 1.1559, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.42524916943521596, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.3731156248047903e-05, |
|
"loss": 1.2233, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.42600854295206453, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 1.3707174068628927e-05, |
|
"loss": 1.1299, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.42676791646891316, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.3683167144900833e-05, |
|
"loss": 1.182, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4275272899857617, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.3659135637102845e-05, |
|
"loss": 1.2002, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.42828666350261035, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.3635079705638298e-05, |
|
"loss": 1.2027, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4290460370194589, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.3610999511073544e-05, |
|
"loss": 1.1353, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.42980541053630755, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.3586895214136875e-05, |
|
"loss": 1.1544, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4305647840531561, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.3562766975717468e-05, |
|
"loss": 1.1621, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.43132415757000475, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 1.3538614956864297e-05, |
|
"loss": 1.1351, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.4320835310868533, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.3514439318785067e-05, |
|
"loss": 1.2011, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.43284290460370195, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.3490240222845139e-05, |
|
"loss": 1.1835, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4336022781205505, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.3466017830566433e-05, |
|
"loss": 1.1919, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.43436165163739915, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.3441772303626387e-05, |
|
"loss": 1.1314, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4351210251542477, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.3417503803856835e-05, |
|
"loss": 1.1481, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.43588039867109635, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.3393212493242964e-05, |
|
"loss": 1.2217, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4366397721879449, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.3368898533922202e-05, |
|
"loss": 1.1553, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.43739914570479355, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.3344562088183166e-05, |
|
"loss": 1.2189, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4381585192216421, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.3320203318464552e-05, |
|
"loss": 1.1301, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.43891789273849074, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.3295822387354071e-05, |
|
"loss": 1.2088, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.43967726625533937, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.3271419457587344e-05, |
|
"loss": 1.1475, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.44043663977218794, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.3246994692046837e-05, |
|
"loss": 1.16, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.44119601328903657, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.3222548253760756e-05, |
|
"loss": 1.1764, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.44195538680588514, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.319808030590197e-05, |
|
"loss": 1.206, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.44271476032273377, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.3173591011786917e-05, |
|
"loss": 1.1696, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.44347413383958234, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.3149080534874519e-05, |
|
"loss": 1.1935, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.44423350735643097, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.3124549038765078e-05, |
|
"loss": 1.1915, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.44499288087327954, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.3099996687199203e-05, |
|
"loss": 1.159, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.44575225439012817, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.3075423644056699e-05, |
|
"loss": 1.2283, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.44651162790697674, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.305083007335549e-05, |
|
"loss": 1.1949, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.44727100142382537, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.3026216139250505e-05, |
|
"loss": 1.1641, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.44803037494067394, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.3001582006032601e-05, |
|
"loss": 1.2071, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.44878974845752256, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.2976927838127453e-05, |
|
"loss": 1.16, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.44954912197437114, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.2952253800094467e-05, |
|
"loss": 1.2239, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.45030849549121976, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.2927560056625672e-05, |
|
"loss": 1.1955, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.45106786900806833, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.2902846772544625e-05, |
|
"loss": 1.1833, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.45182724252491696, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.2878114112805315e-05, |
|
"loss": 1.212, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.45258661604176553, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.2853362242491054e-05, |
|
"loss": 1.1979, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.45334598955861416, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.2828591326813382e-05, |
|
"loss": 1.1222, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.45410536307546273, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.2803801531110956e-05, |
|
"loss": 1.1922, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.45486473659231136, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.2778993020848457e-05, |
|
"loss": 1.1596, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.45562411010915993, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.2754165961615482e-05, |
|
"loss": 1.2171, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45638348362600856, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.2729320519125426e-05, |
|
"loss": 1.1937, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.2704456859214397e-05, |
|
"loss": 1.1604, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.45790223065970576, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.2679575147840102e-05, |
|
"loss": 1.1724, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4586616041765543, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.2654675551080724e-05, |
|
"loss": 1.1699, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.45942097769340295, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.2629758235133838e-05, |
|
"loss": 1.1697, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4601803512102515, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.2604823366315273e-05, |
|
"loss": 1.1973, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.46093972472710015, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 1.2579871111058042e-05, |
|
"loss": 1.2494, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4616990982439487, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.2554901635911188e-05, |
|
"loss": 1.1515, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.46245847176079735, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.2529915107538698e-05, |
|
"loss": 1.1638, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4632178452776459, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.2504911692718387e-05, |
|
"loss": 1.2225, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.46397721879449455, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.2479891558340777e-05, |
|
"loss": 1.1996, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4647365923113431, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 1.2454854871407993e-05, |
|
"loss": 1.2728, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.46549596582819175, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.242980179903264e-05, |
|
"loss": 1.1579, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4662553393450403, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.2404732508436693e-05, |
|
"loss": 1.2026, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.46701471286188895, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.2379647166950381e-05, |
|
"loss": 1.1719, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4677740863787375, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.2354545942011058e-05, |
|
"loss": 1.1853, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.46853345989558615, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.2329429001162114e-05, |
|
"loss": 1.1524, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4692928334124347, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 1.2304296512051814e-05, |
|
"loss": 1.2056, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.47005220692928334, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.2279148642432229e-05, |
|
"loss": 1.187, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4708115804461319, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.2253985560158064e-05, |
|
"loss": 1.1578, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.47157095396298054, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.2228807433185588e-05, |
|
"loss": 1.1355, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4723303274798291, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.2203614429571475e-05, |
|
"loss": 1.1617, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.47308970099667774, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 1.2178406717471702e-05, |
|
"loss": 1.1254, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4738490745135263, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.2153184465140413e-05, |
|
"loss": 1.1904, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.47460844803037494, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.2127947840928816e-05, |
|
"loss": 1.158, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.47536782154722357, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.2102697013284035e-05, |
|
"loss": 1.1188, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.47612719506407214, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 1.207743215074801e-05, |
|
"loss": 1.1458, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.47688656858092077, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.2052153421956343e-05, |
|
"loss": 1.1472, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.47764594209776934, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.2026860995637211e-05, |
|
"loss": 1.2092, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.47840531561461797, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.2001555040610197e-05, |
|
"loss": 1.1966, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.47916468913146654, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.1976235725785202e-05, |
|
"loss": 1.094, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.47992406264831516, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 1.1950903220161286e-05, |
|
"loss": 1.1493, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.48068343616516374, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 1.1925557692825558e-05, |
|
"loss": 1.2334, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.48144280968201236, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 1.1900199312952047e-05, |
|
"loss": 1.1418, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.48220218319886093, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.1874828249800565e-05, |
|
"loss": 1.144, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.48296155671570956, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.1849444672715587e-05, |
|
"loss": 1.1465, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.48372093023255813, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 1.1824048751125101e-05, |
|
"loss": 1.2054, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.48448030374940676, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 1.1798640654539511e-05, |
|
"loss": 1.1376, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.48523967726625533, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 1.1773220552550463e-05, |
|
"loss": 1.1574, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.48599905078310396, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 1.1747788614829758e-05, |
|
"loss": 1.2302, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.48675842429995253, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 1.1722345011128183e-05, |
|
"loss": 1.1259, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.48751779781680116, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.1696889911274394e-05, |
|
"loss": 1.1542, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.48827717133364973, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.1671423485173783e-05, |
|
"loss": 1.23, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.48903654485049836, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 1.164594590280734e-05, |
|
"loss": 1.2568, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 1.162045733423052e-05, |
|
"loss": 1.1619, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.49055529188419555, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 1.159495794957211e-05, |
|
"loss": 1.2003, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.4913146654010441, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.1569447919033086e-05, |
|
"loss": 1.2507, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.49207403891789275, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.1543927412885489e-05, |
|
"loss": 1.1381, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4928334124347413, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.1518396601471273e-05, |
|
"loss": 1.1715, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.49359278595158995, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.149285565520119e-05, |
|
"loss": 1.1947, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4943521594684385, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.1467304744553618e-05, |
|
"loss": 1.1499, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.49511153298528715, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 1.1441744040073469e-05, |
|
"loss": 1.1873, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.4958709065021357, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.1416173712371008e-05, |
|
"loss": 1.1398, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.49663028001898435, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.1390593932120742e-05, |
|
"loss": 1.2044, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.4973896535358329, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.1365004870060266e-05, |
|
"loss": 1.1856, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.49814902705268155, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.1339406696989128e-05, |
|
"loss": 1.1601, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.4989084005695301, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.1313799583767693e-05, |
|
"loss": 1.2261, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.49966777408637875, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.1288183701315996e-05, |
|
"loss": 1.1504, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5004271476032274, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.1262559220612602e-05, |
|
"loss": 1.1967, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5011865211200759, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.123692631269348e-05, |
|
"loss": 1.1724, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5019458946369245, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.1211285148650826e-05, |
|
"loss": 1.158, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5027052681537731, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.1185635899631963e-05, |
|
"loss": 1.1994, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5034646416706218, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.1159978736838169e-05, |
|
"loss": 1.1844, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5042240151874703, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.1134313831523547e-05, |
|
"loss": 1.151, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5049833887043189, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.1108641354993876e-05, |
|
"loss": 1.1455, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5057427622211675, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 1.1082961478605476e-05, |
|
"loss": 1.1656, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5065021357380162, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 1.1057274373764056e-05, |
|
"loss": 1.141, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5072615092548647, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.103158021192357e-05, |
|
"loss": 1.136, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5080208827717133, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.1005879164585083e-05, |
|
"loss": 1.1902, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5087802562885619, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.098017140329561e-05, |
|
"loss": 1.1535, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5095396298054106, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 1.0954457099646981e-05, |
|
"loss": 1.1909, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5102990033222591, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.0928736425274702e-05, |
|
"loss": 1.1445, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5110583768391077, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.0903009551856795e-05, |
|
"loss": 1.1776, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5118177503559563, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.0877276651112662e-05, |
|
"loss": 1.1799, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.512577123872805, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 1.0851537894801935e-05, |
|
"loss": 1.2681, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5133364973896536, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 1.0825793454723325e-05, |
|
"loss": 1.1858, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5140958709065021, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.0800043502713486e-05, |
|
"loss": 1.2268, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5148552444233507, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.0774288210645862e-05, |
|
"loss": 1.1628, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5156146179401994, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.0748527750429545e-05, |
|
"loss": 1.2508, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.516373991457048, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 1.0722762294008107e-05, |
|
"loss": 1.1958, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5171333649738965, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.069699201335849e-05, |
|
"loss": 1.13, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5178927384907451, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.0671217080489816e-05, |
|
"loss": 1.2132, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5186521120075938, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.0645437667442273e-05, |
|
"loss": 1.2433, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5194114855244424, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 1.0619653946285948e-05, |
|
"loss": 1.1013, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5201708590412909, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.0593866089119683e-05, |
|
"loss": 1.171, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5209302325581395, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.0568074268069928e-05, |
|
"loss": 1.1771, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5216896060749882, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 1.0542278655289588e-05, |
|
"loss": 1.1808, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5224489795918368, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.0516479422956882e-05, |
|
"loss": 1.1398, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5232083531086853, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.0490676743274181e-05, |
|
"loss": 1.1954, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5239677266255339, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 1.0464870788466875e-05, |
|
"loss": 1.1792, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5247271001423826, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.0439061730782207e-05, |
|
"loss": 1.1585, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5254864736592312, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.0413249742488132e-05, |
|
"loss": 1.1658, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5262458471760797, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.0387434995872174e-05, |
|
"loss": 1.1443, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5270052206929283, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.0361617663240253e-05, |
|
"loss": 1.176, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.527764594209777, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.0335797916915568e-05, |
|
"loss": 1.2121, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5285239677266256, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 1.0309975929237408e-05, |
|
"loss": 1.209, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5292833412434741, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.0284151872560042e-05, |
|
"loss": 1.1629, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5300427147603227, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 1.0258325919251537e-05, |
|
"loss": 1.1606, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5308020882771713, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.0232498241692625e-05, |
|
"loss": 1.1405, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.53156146179402, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 1.0206669012275546e-05, |
|
"loss": 1.1829, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5323208353108685, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.018083840340289e-05, |
|
"loss": 1.1182, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5330802088277171, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.0155006587486468e-05, |
|
"loss": 1.2416, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5338395823445657, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.0129173736946143e-05, |
|
"loss": 1.1733, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5345989558614144, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 1.0103340024208674e-05, |
|
"loss": 1.1117, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5353583293782629, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.007750562170659e-05, |
|
"loss": 1.2096, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5361177028951115, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.0051670701877011e-05, |
|
"loss": 1.1615, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5368770764119601, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.0025835437160523e-05, |
|
"loss": 1.181, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5376364499288088, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2599, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5383958234456573, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 9.97416456283948e-06, |
|
"loss": 1.1557, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5391551969625059, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.948329298122989e-06, |
|
"loss": 1.1486, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5399145704793545, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 9.922494378293414e-06, |
|
"loss": 1.146, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5406739439962032, |
|
"grad_norm": 0.375, |
|
"learning_rate": 9.89665997579133e-06, |
|
"loss": 1.1826, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5414333175130517, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 9.870826263053859e-06, |
|
"loss": 1.1607, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5421926910299003, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 9.844993412513533e-06, |
|
"loss": 1.1287, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5429520645467489, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 9.819161596597112e-06, |
|
"loss": 1.3019, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5437114380635976, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 9.79333098772446e-06, |
|
"loss": 1.1456, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5444708115804461, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.767501758307376e-06, |
|
"loss": 1.1532, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5452301850972947, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 9.741674080748465e-06, |
|
"loss": 1.1244, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5459895586141433, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.715848127439958e-06, |
|
"loss": 1.1617, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.546748932130992, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 9.690024070762597e-06, |
|
"loss": 1.2031, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5475083056478405, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 9.664202083084437e-06, |
|
"loss": 1.1701, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5482676791646891, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 9.638382336759749e-06, |
|
"loss": 1.1756, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5490270526815377, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 9.612565004127828e-06, |
|
"loss": 1.192, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5497864261983864, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 9.586750257511868e-06, |
|
"loss": 1.1673, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.550545799715235, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 9.560938269217798e-06, |
|
"loss": 1.1835, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5513051732320835, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 9.53512921153313e-06, |
|
"loss": 1.2177, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5520645467489321, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 9.50932325672582e-06, |
|
"loss": 1.1675, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5528239202657808, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 9.483520577043121e-06, |
|
"loss": 1.104, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5535832937826294, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 9.457721344710412e-06, |
|
"loss": 1.126, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5543426672994779, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 9.431925731930079e-06, |
|
"loss": 1.1852, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5551020408163265, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 9.406133910880319e-06, |
|
"loss": 1.1576, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5558614143331752, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 9.380346053714055e-06, |
|
"loss": 1.0863, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5566207878500238, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 9.354562332557728e-06, |
|
"loss": 1.1338, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5573801613668723, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 9.328782919510186e-06, |
|
"loss": 1.2238, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 9.303007986641515e-06, |
|
"loss": 1.1432, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5588989084005695, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 9.277237705991895e-06, |
|
"loss": 1.15, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5596582819174182, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 9.251472249570458e-06, |
|
"loss": 1.1075, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5604176554342667, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 9.225711789354138e-06, |
|
"loss": 1.1256, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5611770289511153, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 9.199956497286517e-06, |
|
"loss": 1.1923, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.561936402467964, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 9.174206545276678e-06, |
|
"loss": 1.1069, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5626957759848126, |
|
"grad_norm": 0.375, |
|
"learning_rate": 9.148462105198068e-06, |
|
"loss": 1.2118, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5634551495016611, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 9.12272334888734e-06, |
|
"loss": 1.2203, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5642145230185097, |
|
"grad_norm": 0.375, |
|
"learning_rate": 9.096990448143203e-06, |
|
"loss": 1.1714, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5649738965353583, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 9.0712635747253e-06, |
|
"loss": 1.1562, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.565733270052207, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 9.045542900353022e-06, |
|
"loss": 1.138, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5664926435690555, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 9.019828596704394e-06, |
|
"loss": 1.2036, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5672520170859041, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 8.99412083541492e-06, |
|
"loss": 1.2011, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5680113906027527, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 8.968419788076431e-06, |
|
"loss": 1.2146, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5687707641196014, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 8.942725626235949e-06, |
|
"loss": 1.1499, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5695301376364499, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 8.917038521394526e-06, |
|
"loss": 1.1884, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5702895111532985, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 8.891358645006126e-06, |
|
"loss": 1.1455, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5710488846701471, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 8.865686168476458e-06, |
|
"loss": 1.1044, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5718082581869958, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 8.840021263161831e-06, |
|
"loss": 1.1989, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5725676317038443, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 8.81436410036804e-06, |
|
"loss": 1.1432, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5733270052206929, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 8.788714851349177e-06, |
|
"loss": 1.1265, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5740863787375415, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 8.763073687306523e-06, |
|
"loss": 1.1427, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5748457522543902, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 8.737440779387398e-06, |
|
"loss": 1.1363, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5756051257712387, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 8.711816298684011e-06, |
|
"loss": 1.1628, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5763644992880873, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 8.686200416232314e-06, |
|
"loss": 1.2075, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5771238728049359, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 8.660593303010876e-06, |
|
"loss": 1.1384, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5778832463217846, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 8.634995129939737e-06, |
|
"loss": 1.1354, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5786426198386331, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 8.609406067879258e-06, |
|
"loss": 1.1626, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5794019933554817, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 8.583826287628996e-06, |
|
"loss": 1.2072, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5801613668723303, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 8.558255959926533e-06, |
|
"loss": 1.1492, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.580920740389179, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 8.532695255446384e-06, |
|
"loss": 1.1948, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5816801139060275, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 8.507144344798814e-06, |
|
"loss": 1.1786, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5824394874228761, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 8.481603398528727e-06, |
|
"loss": 1.172, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5831988609397247, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 8.456072587114516e-06, |
|
"loss": 1.1431, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5839582344565734, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 8.430552080966918e-06, |
|
"loss": 1.2079, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.584717607973422, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 8.405042050427891e-06, |
|
"loss": 1.1885, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5854769814902705, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 8.37954266576948e-06, |
|
"loss": 1.1858, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5862363550071191, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 8.35405409719266e-06, |
|
"loss": 1.2242, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5869957285239678, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 8.328576514826222e-06, |
|
"loss": 1.1984, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5877551020408164, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 8.30311008872561e-06, |
|
"loss": 1.1178, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5885144755576649, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 8.277654988871819e-06, |
|
"loss": 1.1126, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5892738490745135, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 8.252211385170242e-06, |
|
"loss": 1.1394, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5900332225913621, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 8.226779447449538e-06, |
|
"loss": 1.1999, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5907925961082108, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 8.201359345460496e-06, |
|
"loss": 1.1602, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5915519696250593, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 8.175951248874902e-06, |
|
"loss": 1.1864, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5923113431419079, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 8.150555327284417e-06, |
|
"loss": 1.1053, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5930707166587565, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 8.125171750199436e-06, |
|
"loss": 1.1004, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5938300901756052, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 8.099800687047958e-06, |
|
"loss": 1.1189, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5945894636924537, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 8.074442307174445e-06, |
|
"loss": 1.2653, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5953488372093023, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 8.04909677983872e-06, |
|
"loss": 1.1253, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5961082107261509, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 8.023764274214802e-06, |
|
"loss": 1.1351, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5968675842429996, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 7.998444959389803e-06, |
|
"loss": 1.145, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5976269577598481, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 7.973139004362794e-06, |
|
"loss": 1.1679, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5983863312766967, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 7.947846578043658e-06, |
|
"loss": 1.1475, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5991457047935453, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 7.922567849251995e-06, |
|
"loss": 1.1941, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.599905078310394, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 7.897302986715967e-06, |
|
"loss": 1.1754, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6006644518272425, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 7.872052159071186e-06, |
|
"loss": 1.1762, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6014238253440911, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 7.846815534859592e-06, |
|
"loss": 1.1361, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6021831988609397, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 7.821593282528301e-06, |
|
"loss": 1.2727, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6029425723777884, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 7.796385570428527e-06, |
|
"loss": 1.1568, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6037019458946369, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 7.771192566814412e-06, |
|
"loss": 1.2494, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6044613194114855, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 7.746014439841941e-06, |
|
"loss": 1.223, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6052206929283341, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 7.720851357567778e-06, |
|
"loss": 1.1366, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6059800664451828, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 7.69570348794819e-06, |
|
"loss": 1.1451, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6067394399620313, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 7.670570998837889e-06, |
|
"loss": 1.1189, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6074988134788799, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 7.645454057988942e-06, |
|
"loss": 1.1005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6082581869957285, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 7.6203528330496245e-06, |
|
"loss": 1.1741, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6090175605125772, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 7.595267491563311e-06, |
|
"loss": 1.1124, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6097769340294257, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 7.570198200967363e-06, |
|
"loss": 1.1459, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6105363075462743, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 7.545145128592009e-06, |
|
"loss": 1.1668, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6112956810631229, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 7.520108441659223e-06, |
|
"loss": 1.1384, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6120550545799716, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 7.495088307281619e-06, |
|
"loss": 1.1462, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6128144280968201, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 7.470084892461305e-06, |
|
"loss": 1.1645, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6135738016136687, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 7.445098364088815e-06, |
|
"loss": 1.1709, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6143331751305173, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 7.420128888941958e-06, |
|
"loss": 1.1914, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.615092548647366, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 7.395176633684726e-06, |
|
"loss": 1.2529, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6158519221642145, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 7.370241764866169e-06, |
|
"loss": 1.1245, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6166112956810631, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 7.34532444891928e-06, |
|
"loss": 1.1952, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6173706691979117, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 7.3204248521599e-06, |
|
"loss": 1.1247, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6181300427147604, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 7.295543140785604e-06, |
|
"loss": 1.1417, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6188894162316089, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 7.27067948087458e-06, |
|
"loss": 1.1264, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6196487897484575, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 7.245834038384523e-06, |
|
"loss": 1.176, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6204081632653061, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 7.221006979151546e-06, |
|
"loss": 1.1171, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6211675367821547, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 7.196198468889047e-06, |
|
"loss": 1.1906, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6219269102990034, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 7.171408673186619e-06, |
|
"loss": 1.1394, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6226862838158519, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 7.14663775750895e-06, |
|
"loss": 1.1334, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6234456573327005, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 7.1218858871946885e-06, |
|
"loss": 1.149, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6242050308495491, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 7.097153227455379e-06, |
|
"loss": 1.1593, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6249644043663978, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 7.072439943374331e-06, |
|
"loss": 1.1399, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6257237778832463, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 7.0477461999055365e-06, |
|
"loss": 1.2022, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6264831514000949, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 7.023072161872551e-06, |
|
"loss": 1.1374, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6272425249169435, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 6.998417993967403e-06, |
|
"loss": 1.1267, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6280018984337922, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 6.973783860749499e-06, |
|
"loss": 1.179, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6287612719506407, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 6.949169926644513e-06, |
|
"loss": 1.1685, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6295206454674893, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 6.9245763559432996e-06, |
|
"loss": 1.2012, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6302800189843379, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 6.9000033128008e-06, |
|
"loss": 1.187, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6310393925011866, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 6.875450961234924e-06, |
|
"loss": 1.1949, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6317987660180351, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 6.8509194651254825e-06, |
|
"loss": 1.1995, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6325581395348837, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 6.826408988213083e-06, |
|
"loss": 1.1705, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6333175130517323, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 6.801919694098034e-06, |
|
"loss": 1.1469, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.634076886568581, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 6.777451746239249e-06, |
|
"loss": 1.1363, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6348362600854295, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.7530053079531664e-06, |
|
"loss": 1.1968, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6355956336022781, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 6.7285805424126585e-06, |
|
"loss": 1.2189, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6363550071191267, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 6.70417761264593e-06, |
|
"loss": 1.1232, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6371143806359754, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.679796681535451e-06, |
|
"loss": 1.1898, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6378737541528239, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 6.655437911816838e-06, |
|
"loss": 1.1666, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6386331276696725, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 6.631101466077801e-06, |
|
"loss": 1.146, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6393925011865211, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 6.60678750675704e-06, |
|
"loss": 1.1723, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6401518747033698, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 6.582496196143167e-06, |
|
"loss": 1.1488, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6409112482202183, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 6.558227696373617e-06, |
|
"loss": 1.1899, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6416706217370669, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 6.533982169433568e-06, |
|
"loss": 1.1478, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6424299952539155, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 6.509759777154864e-06, |
|
"loss": 1.1353, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6431893687707642, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 6.485560681214933e-06, |
|
"loss": 1.1481, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6439487422876127, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 6.461385043135704e-06, |
|
"loss": 1.1222, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6447081158044613, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 6.437233024282538e-06, |
|
"loss": 1.1029, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6454674893213099, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 6.413104785863128e-06, |
|
"loss": 1.192, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6462268628381586, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 6.389000488926459e-06, |
|
"loss": 1.2227, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6469862363550071, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 6.364920294361701e-06, |
|
"loss": 1.0898, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6477456098718557, |
|
"grad_norm": 0.375, |
|
"learning_rate": 6.3408643628971585e-06, |
|
"loss": 1.1882, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6485049833887043, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.316832855099173e-06, |
|
"loss": 1.1572, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.649264356905553, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 6.292825931371075e-06, |
|
"loss": 1.1056, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6500237304224015, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 6.2688437519521e-06, |
|
"loss": 1.1232, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6507831039392501, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 6.244886476916325e-06, |
|
"loss": 1.1479, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6515424774560987, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 6.220954266171597e-06, |
|
"loss": 1.1355, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6523018509729474, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 6.197047279458459e-06, |
|
"loss": 1.185, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 6.173165676349103e-06, |
|
"loss": 1.141, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6538205980066445, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 6.149309616246285e-06, |
|
"loss": 1.129, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6545799715234931, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 6.125479258382268e-06, |
|
"loss": 1.1517, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6553393450403417, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 6.101674761817769e-06, |
|
"loss": 1.0984, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6560987185571903, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 6.077896285440874e-06, |
|
"loss": 1.175, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6568580920740389, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 6.054143987966001e-06, |
|
"loss": 1.1625, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6576174655908875, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 6.030418027932835e-06, |
|
"loss": 1.2025, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6583768391077361, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 6.006718563705258e-06, |
|
"loss": 1.1843, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6591362126245848, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 5.983045753470308e-06, |
|
"loss": 1.1775, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6598955861414333, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 5.959399755237103e-06, |
|
"loss": 1.1727, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6606549596582819, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 5.935780726835811e-06, |
|
"loss": 1.1502, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6614143331751305, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 5.91218882591658e-06, |
|
"loss": 1.1346, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6621737066919792, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 5.888624209948495e-06, |
|
"loss": 1.1899, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6629330802088277, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 5.865087036218504e-06, |
|
"loss": 1.1826, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6636924537256763, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 5.841577461830408e-06, |
|
"loss": 1.1627, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6644518272425249, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 5.818095643703779e-06, |
|
"loss": 1.1732, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6652112007593736, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 5.794641738572925e-06, |
|
"loss": 1.1294, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6659705742762221, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 5.771215902985848e-06, |
|
"loss": 1.1594, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6667299477930707, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 5.747818293303185e-06, |
|
"loss": 1.1273, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6674893213099193, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 5.724449065697182e-06, |
|
"loss": 1.1463, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.668248694826768, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 5.701108376150635e-06, |
|
"loss": 1.1557, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6690080683436165, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 5.677796380455862e-06, |
|
"loss": 1.1537, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6697674418604651, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 5.654513234213663e-06, |
|
"loss": 1.1203, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6705268153773137, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 5.631259092832265e-06, |
|
"loss": 1.1744, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6712861888941624, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 5.608034111526298e-06, |
|
"loss": 1.1531, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6720455624110109, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 5.584838445315764e-06, |
|
"loss": 1.1989, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6728049359278595, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 5.561672249024988e-06, |
|
"loss": 1.2282, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6735643094447081, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 5.538535677281608e-06, |
|
"loss": 1.186, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6743236829615568, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 5.515428884515495e-06, |
|
"loss": 1.1552, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6750830564784053, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 5.492352024957781e-06, |
|
"loss": 1.1389, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6758424299952539, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 5.4693052526397965e-06, |
|
"loss": 1.133, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6766018035121025, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 5.446288721392048e-06, |
|
"loss": 1.2011, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6773611770289512, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 5.423302584843186e-06, |
|
"loss": 1.1344, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6781205505457997, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 5.400346996418988e-06, |
|
"loss": 1.161, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6788799240626483, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 5.377422109341332e-06, |
|
"loss": 1.1067, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6796392975794969, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 5.354528076627185e-06, |
|
"loss": 1.1321, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6803986710963456, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 5.331665051087549e-06, |
|
"loss": 1.1952, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6811580446131941, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 5.308833185326472e-06, |
|
"loss": 1.1063, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6819174181300427, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 5.286032631740023e-06, |
|
"loss": 1.19, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6826767916468913, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 5.263263542515273e-06, |
|
"loss": 1.1727, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.68343616516374, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 5.240526069629265e-06, |
|
"loss": 1.172, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6841955386805885, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 5.217820364848027e-06, |
|
"loss": 1.1787, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6849549121974371, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 5.19514657972553e-06, |
|
"loss": 1.2442, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 5.172504865602701e-06, |
|
"loss": 1.1876, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6864736592311343, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 5.149895373606405e-06, |
|
"loss": 1.2092, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6872330327479829, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 5.127318254648418e-06, |
|
"loss": 1.1086, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6879924062648315, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 5.104773659424453e-06, |
|
"loss": 1.1276, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6887517797816801, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 5.082261738413124e-06, |
|
"loss": 1.2118, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6895111532985287, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 5.059782641874962e-06, |
|
"loss": 1.1634, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6902705268153773, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 5.037336519851397e-06, |
|
"loss": 1.1525, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6910299003322259, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 5.014923522163773e-06, |
|
"loss": 1.1586, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6917892738490745, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 4.992543798412327e-06, |
|
"loss": 1.185, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6925486473659231, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 4.970197497975216e-06, |
|
"loss": 1.1233, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6933080208827717, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 4.947884770007491e-06, |
|
"loss": 1.1646, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6940673943996203, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 4.92560576344013e-06, |
|
"loss": 1.1766, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6948267679164689, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 4.903360626979026e-06, |
|
"loss": 1.1797, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6955861414333175, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 4.881149509103993e-06, |
|
"loss": 1.1327, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6963455149501662, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 4.858972558067784e-06, |
|
"loss": 1.1353, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6971048884670147, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 4.836829921895103e-06, |
|
"loss": 1.1603, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6978642619838633, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 4.814721748381608e-06, |
|
"loss": 1.1768, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6986236355007119, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 4.7926481850929376e-06, |
|
"loss": 1.1515, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6993830090175606, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 4.770609379363694e-06, |
|
"loss": 1.2258, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7001423825344091, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.748605478296508e-06, |
|
"loss": 1.1553, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.7009017560512577, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 4.726636628761018e-06, |
|
"loss": 1.1856, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7016611295681063, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 4.704702977392914e-06, |
|
"loss": 1.172, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.702420503084955, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 4.682804670592937e-06, |
|
"loss": 1.145, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7031798766018035, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.660941854525917e-06, |
|
"loss": 1.1645, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7039392501186521, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 4.639114675119797e-06, |
|
"loss": 1.1369, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7046986236355007, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 4.617323278064657e-06, |
|
"loss": 1.1206, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7054579971523494, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 4.595567808811735e-06, |
|
"loss": 1.1056, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.7062173706691979, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 4.573848412572458e-06, |
|
"loss": 1.1796, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7069767441860465, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.552165234317486e-06, |
|
"loss": 1.1623, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7077361177028951, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.530518418775734e-06, |
|
"loss": 1.1729, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7084954912197438, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 4.508908110433393e-06, |
|
"loss": 1.1316, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7092548647365923, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.487334453532998e-06, |
|
"loss": 1.198, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.7100142382534409, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 4.465797592072428e-06, |
|
"loss": 1.2132, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7107736117702895, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.444297669803981e-06, |
|
"loss": 1.1731, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7115329852871382, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 4.422834830233378e-06, |
|
"loss": 1.119, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7122923588039867, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 4.4014092166188375e-06, |
|
"loss": 1.1435, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.7130517323208353, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 4.3800209719701055e-06, |
|
"loss": 1.1884, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7138111058376839, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 4.35867023904749e-06, |
|
"loss": 1.1715, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7145704793545326, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 4.337357160360931e-06, |
|
"loss": 1.1819, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7153298528713811, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.3160818781690286e-06, |
|
"loss": 1.165, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7160892263882297, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 4.294844534478107e-06, |
|
"loss": 1.0917, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7168485999050783, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 4.2736452710412645e-06, |
|
"loss": 1.1302, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.717607973421927, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 4.25248422935742e-06, |
|
"loss": 1.1528, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7183673469387755, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.2313615506703685e-06, |
|
"loss": 1.1557, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.7191267204556241, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.210277375967855e-06, |
|
"loss": 1.2004, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7198860939724727, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.189231845980618e-06, |
|
"loss": 1.1886, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7206454674893213, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 4.168225101181449e-06, |
|
"loss": 1.1163, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7214048410061699, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 4.147257281784257e-06, |
|
"loss": 1.1078, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7221642145230185, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 4.1263285277431465e-06, |
|
"loss": 1.1385, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7229235880398671, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 4.105438978751465e-06, |
|
"loss": 1.1829, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7236829615567157, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 4.084588774240884e-06, |
|
"loss": 1.1458, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7244423350735643, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 4.063778053380446e-06, |
|
"loss": 1.1388, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7252017085904129, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 4.043006955075667e-06, |
|
"loss": 1.1234, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7259610821072615, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 4.0222756179675915e-06, |
|
"loss": 1.171, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7267204556241101, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 4.001584180431869e-06, |
|
"loss": 1.1435, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7274798291409587, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.980932780577827e-06, |
|
"loss": 1.1021, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7282392026578073, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 3.960321556247552e-06, |
|
"loss": 1.1885, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7289985761746559, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 3.939750645014977e-06, |
|
"loss": 1.1244, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7297579496915045, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 3.919220184184959e-06, |
|
"loss": 1.1245, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.730517323208353, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 3.898730310792346e-06, |
|
"loss": 1.1353, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7312766967252017, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 3.878281161601094e-06, |
|
"loss": 1.1653, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7320360702420503, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 3.857872873103322e-06, |
|
"loss": 1.1238, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7327954437588989, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 3.837505581518429e-06, |
|
"loss": 1.1952, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7335548172757476, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 3.8171794227921585e-06, |
|
"loss": 1.2425, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7343141907925961, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 3.7968945325957175e-06, |
|
"loss": 1.099, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7350735643094447, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 3.776651046324843e-06, |
|
"loss": 1.151, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7358329378262933, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 3.7564490990989276e-06, |
|
"loss": 1.1206, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.736592311343142, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 3.7362888257600894e-06, |
|
"loss": 1.1203, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7373516848599905, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 3.716170360872294e-06, |
|
"loss": 1.19, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7381110583768391, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 3.69609383872045e-06, |
|
"loss": 1.1872, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7388704318936877, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 3.676059393309499e-06, |
|
"loss": 1.1264, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7396298054105364, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 3.6560671583635467e-06, |
|
"loss": 1.1832, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7403891789273849, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 3.636117267324941e-06, |
|
"loss": 1.1855, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7411485524442335, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 3.6162098533534095e-06, |
|
"loss": 1.2236, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7419079259610821, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 3.5963450493251552e-06, |
|
"loss": 1.1248, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7426672994779308, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 3.576522987831965e-06, |
|
"loss": 1.0895, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7434266729947793, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 3.5567438011803356e-06, |
|
"loss": 1.1789, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 3.5370076213905904e-06, |
|
"loss": 1.1332, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7449454200284765, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 3.5173145801959942e-06, |
|
"loss": 1.1575, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7457047935453252, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 3.4976648090418685e-06, |
|
"loss": 1.1542, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7464641670621737, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 3.4780584390847193e-06, |
|
"loss": 1.2163, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7472235405790223, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 3.4584956011913693e-06, |
|
"loss": 1.1658, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7479829140958709, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 3.4389764259380754e-06, |
|
"loss": 1.1344, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7487422876127195, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 3.4195010436096622e-06, |
|
"loss": 1.1608, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7495016611295681, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 3.400069584198633e-06, |
|
"loss": 1.2214, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7502610346464167, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 3.380682177404335e-06, |
|
"loss": 1.1724, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7510204081632653, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 3.361338952632074e-06, |
|
"loss": 1.1665, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.751779781680114, |
|
"grad_norm": 0.375, |
|
"learning_rate": 3.3420400389922535e-06, |
|
"loss": 1.2119, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7525391551969625, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 3.32278556529951e-06, |
|
"loss": 1.1508, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7532985287138111, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 3.3035756600718515e-06, |
|
"loss": 1.1584, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7540579022306597, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 3.284410451529816e-06, |
|
"loss": 1.1329, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7548172757475083, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 3.2652900675956e-06, |
|
"loss": 1.1675, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7555766492643569, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 3.2462146358922033e-06, |
|
"loss": 1.1203, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7563360227812055, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 3.2271842837425917e-06, |
|
"loss": 1.1085, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7570953962980541, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 3.208199138168826e-06, |
|
"loss": 1.1281, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7578547698149027, |
|
"grad_norm": 0.375, |
|
"learning_rate": 3.1892593258912407e-06, |
|
"loss": 1.1927, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7586141433317513, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 3.1703649733275697e-06, |
|
"loss": 1.1877, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7593735168485999, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 3.151516206592128e-06, |
|
"loss": 1.1486, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7601328903654485, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 3.132713151494955e-06, |
|
"loss": 1.1856, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7608922638822971, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 3.113955933540973e-06, |
|
"loss": 1.1627, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7616516373991457, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 3.0952446779291577e-06, |
|
"loss": 1.1441, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7624110109159943, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 3.0765795095517026e-06, |
|
"loss": 1.1066, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7631703844328429, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 3.0579605529931832e-06, |
|
"loss": 1.1927, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7639297579496915, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 3.0393879325297136e-06, |
|
"loss": 1.1468, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.76468913146654, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 3.020861772128145e-06, |
|
"loss": 1.1106, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7654485049833887, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 3.0023821954452036e-06, |
|
"loss": 1.1217, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7662078785002373, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 2.983949325826696e-06, |
|
"loss": 1.156, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7669672520170859, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 2.9655632863066696e-06, |
|
"loss": 1.1315, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7677266255339346, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 2.9472241996065897e-06, |
|
"loss": 1.1651, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7684859990507831, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 1.1209, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7692453725676317, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 2.910687373984339e-06, |
|
"loss": 1.1137, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.7700047460844803, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 2.8924898789348645e-06, |
|
"loss": 1.1695, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.770764119601329, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 2.874339824449085e-06, |
|
"loss": 1.1603, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7715234931181775, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 2.856237331673336e-06, |
|
"loss": 1.1263, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7722828666350261, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 2.838182521436498e-06, |
|
"loss": 1.1512, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7730422401518747, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 2.8201755142491814e-06, |
|
"loss": 1.2103, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7738016136687234, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 2.8022164303029186e-06, |
|
"loss": 1.1234, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7745609871855719, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 2.7843053894693805e-06, |
|
"loss": 1.1291, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7753203607024205, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 2.76644251129955e-06, |
|
"loss": 1.1616, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7760797342192691, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 2.74862791502295e-06, |
|
"loss": 1.1467, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7768391077361178, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.7308617195468336e-06, |
|
"loss": 1.1435, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.7775984812529663, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 2.713144043455388e-06, |
|
"loss": 1.1323, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7783578547698149, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 2.695475005008946e-06, |
|
"loss": 1.1765, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7791172282866635, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 2.6778547221432063e-06, |
|
"loss": 1.1441, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7798766018035121, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 2.660283312468438e-06, |
|
"loss": 1.1428, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7806359753203607, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 2.642760893268684e-06, |
|
"loss": 1.1243, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7813953488372093, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 2.625287581501006e-06, |
|
"loss": 1.1824, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.7821547223540579, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 2.6078634937946724e-06, |
|
"loss": 1.1663, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7829140958709065, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 2.5904887464504115e-06, |
|
"loss": 1.1911, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7836734693877551, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 2.573163455439601e-06, |
|
"loss": 1.1811, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7844328429046037, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 2.5558877364035286e-06, |
|
"loss": 1.2266, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.7851922164214523, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 2.538661704652595e-06, |
|
"loss": 1.1456, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7859515899383009, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 2.521485475165555e-06, |
|
"loss": 1.177, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7867109634551495, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 2.504359162588741e-06, |
|
"loss": 1.18, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7874703369719981, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 2.4872828812353146e-06, |
|
"loss": 1.1414, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7882297104888467, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 2.470256745084488e-06, |
|
"loss": 1.1995, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7889890840056953, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 2.4532808677807772e-06, |
|
"loss": 1.1283, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7897484575225439, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 2.4363553626332157e-06, |
|
"loss": 1.1844, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7905078310393925, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 2.419480342614635e-06, |
|
"loss": 1.1947, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7912672045562411, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 2.402655920360889e-06, |
|
"loss": 1.1751, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7920265780730897, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 2.385882208170106e-06, |
|
"loss": 1.1976, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7927859515899383, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 2.369159318001937e-06, |
|
"loss": 1.1705, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7935453251067869, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 2.3524873614768085e-06, |
|
"loss": 1.1149, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7943046986236355, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 2.335866449875185e-06, |
|
"loss": 1.1556, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7950640721404841, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 2.3192966941368247e-06, |
|
"loss": 1.1266, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7958234456573327, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 2.3027782048600247e-06, |
|
"loss": 1.0954, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7965828191741813, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 2.2863110923008958e-06, |
|
"loss": 1.1715, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7973421926910299, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 2.26989546637263e-06, |
|
"loss": 1.2394, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7981015662078785, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 2.2535314366447625e-06, |
|
"loss": 1.1812, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.798860939724727, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 2.237219112342426e-06, |
|
"loss": 1.146, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.7996203132415757, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 2.2209586023456495e-06, |
|
"loss": 1.1245, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8003796867584243, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 2.2047500151886047e-06, |
|
"loss": 1.1608, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.8011390602752729, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 2.1885934590589008e-06, |
|
"loss": 1.1919, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8018984337921214, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.172489041796856e-06, |
|
"loss": 1.1411, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8026578073089701, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 2.156436870894767e-06, |
|
"loss": 1.1685, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.8034171808258187, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 2.140437053496214e-06, |
|
"loss": 1.1709, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8041765543426673, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 2.124489696395321e-06, |
|
"loss": 1.1552, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.804935927859516, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 2.1085949060360654e-06, |
|
"loss": 1.1587, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8056953013763645, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 2.092752788511546e-06, |
|
"loss": 1.1752, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8064546748932131, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 2.0769634495632986e-06, |
|
"loss": 1.1594, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8072140484100617, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 2.061226994580563e-06, |
|
"loss": 1.1164, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8079734219269104, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 2.045543528599607e-06, |
|
"loss": 1.0982, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.8087327954437589, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 2.0299131563030016e-06, |
|
"loss": 1.1587, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8094921689606075, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 2.0143359820189403e-06, |
|
"loss": 1.1613, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8102515424774561, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.998812109720535e-06, |
|
"loss": 1.1486, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8110109159943047, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.983341643025117e-06, |
|
"loss": 1.1652, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8117702895111533, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.967924685193552e-06, |
|
"loss": 1.1593, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8125296630280019, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.952561339129554e-06, |
|
"loss": 1.1904, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8132890365448505, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.93725170737899e-06, |
|
"loss": 1.151, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8140484100616991, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 1.921995892129208e-06, |
|
"loss": 1.1097, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8148077835785477, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.906793995208328e-06, |
|
"loss": 1.1875, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8155671570953963, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.8916461180845968e-06, |
|
"loss": 1.2437, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.8765523618656923e-06, |
|
"loss": 1.1949, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8170859041290935, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.861512827298051e-06, |
|
"loss": 1.1321, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8178452776459421, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 1.8465276147661905e-06, |
|
"loss": 1.1811, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8186046511627907, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 1.8315968242920446e-06, |
|
"loss": 1.2074, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8193640246796393, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.8167205555343027e-06, |
|
"loss": 1.1378, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8201233981964879, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 1.8018989077877368e-06, |
|
"loss": 1.1401, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8208827717133365, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.7871319799825316e-06, |
|
"loss": 1.1455, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8216421452301851, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.7724198706836372e-06, |
|
"loss": 1.1678, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8224015187470337, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.757762678090107e-06, |
|
"loss": 1.1541, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8231608922638823, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.743160500034443e-06, |
|
"loss": 1.1924, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.8239202657807309, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 1.7286134339819337e-06, |
|
"loss": 1.1414, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8246796392975795, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.7141215770300202e-06, |
|
"loss": 1.1341, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8254390128144281, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.6996850259076303e-06, |
|
"loss": 1.1874, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8261983863312767, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.6853038769745466e-06, |
|
"loss": 1.1982, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8269577598481253, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.670978226220762e-06, |
|
"loss": 1.2065, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8277171333649739, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.6567081692658238e-06, |
|
"loss": 1.148, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8284765068818225, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 1.642493801358218e-06, |
|
"loss": 1.1179, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8292358803986711, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.6283352173747148e-06, |
|
"loss": 1.1411, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8299952539155196, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.6142325118197488e-06, |
|
"loss": 1.1431, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8307546274323683, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.6001857788247755e-06, |
|
"loss": 1.1494, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8315140009492169, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.5861951121476571e-06, |
|
"loss": 1.1864, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8322733744660655, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 1.5722606051720268e-06, |
|
"loss": 1.1363, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.833032747982914, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.5583823509066665e-06, |
|
"loss": 1.1366, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8337921214997627, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.5445604419848858e-06, |
|
"loss": 1.1422, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8345514950166113, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 1.5307949706639114e-06, |
|
"loss": 1.1861, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8353108685334599, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 1.5170860288242638e-06, |
|
"loss": 1.1732, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8360702420503084, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.503433707969142e-06, |
|
"loss": 1.1638, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8368296155671571, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 1.489838099223816e-06, |
|
"loss": 1.1235, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8375889890840057, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.476299293335024e-06, |
|
"loss": 1.1356, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8383483626008543, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 1.4628173806703594e-06, |
|
"loss": 1.1142, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8391077361177028, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 1.4493924512176748e-06, |
|
"loss": 1.1373, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8398671096345515, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 1.436024594584461e-06, |
|
"loss": 1.2117, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8406264831514001, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.4227138999972801e-06, |
|
"loss": 1.077, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8413858566682487, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.409460456301147e-06, |
|
"loss": 1.1294, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8421452301850973, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.3962643519589502e-06, |
|
"loss": 1.1354, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8429046037019459, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 1.3831256750508449e-06, |
|
"loss": 1.1973, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8436639772187945, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.3700445132736795e-06, |
|
"loss": 1.1396, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8444233507356431, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.3570209539404067e-06, |
|
"loss": 1.1354, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8451827242524917, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.3440550839795008e-06, |
|
"loss": 1.1847, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8459420977693403, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 1.3311469899343698e-06, |
|
"loss": 1.1425, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8467014712861889, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.3182967579627948e-06, |
|
"loss": 1.1266, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8474608448030375, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.305504473836331e-06, |
|
"loss": 1.1409, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8482202183198861, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 1.2927702229397633e-06, |
|
"loss": 1.1686, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8489795918367347, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.2800940902705072e-06, |
|
"loss": 1.1655, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8497389653535833, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.2674761604380692e-06, |
|
"loss": 1.1476, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8504983388704319, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.2549165176634582e-06, |
|
"loss": 1.2241, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8512577123872805, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.2424152457786408e-06, |
|
"loss": 1.1283, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8520170859041291, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.2299724282259685e-06, |
|
"loss": 1.1519, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8527764594209777, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 1.2175881480576347e-06, |
|
"loss": 1.1268, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8535358329378263, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 1.2052624879351105e-06, |
|
"loss": 1.0941, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8542952064546749, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 1.1929955301285889e-06, |
|
"loss": 1.1533, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8550545799715235, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.1807873565164507e-06, |
|
"loss": 1.1927, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8558139534883721, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 1.1686380485847027e-06, |
|
"loss": 1.1902, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8565733270052207, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 1.1565476874264448e-06, |
|
"loss": 1.1152, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8573327005220693, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 1.144516353741324e-06, |
|
"loss": 1.1328, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8580920740389179, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 1.1325441278349935e-06, |
|
"loss": 1.1626, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8588514475557665, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.120631089618579e-06, |
|
"loss": 1.1927, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.8596108210726151, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.1087773186081474e-06, |
|
"loss": 1.2139, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8603701945894637, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.0969828939241779e-06, |
|
"loss": 1.1491, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8611295681063122, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 1.0852478942910228e-06, |
|
"loss": 1.156, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8618889416231609, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.0735723980363921e-06, |
|
"loss": 1.1736, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8626483151400095, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.0619564830908303e-06, |
|
"loss": 1.1818, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8634076886568581, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 1.0504002269871927e-06, |
|
"loss": 1.1886, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8641670621737066, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.0389037068601325e-06, |
|
"loss": 1.2172, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.8649264356905553, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.027466999445572e-06, |
|
"loss": 1.1286, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8656858092074039, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.0160901810802114e-06, |
|
"loss": 1.1688, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8664451827242525, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 1.0047733277010064e-06, |
|
"loss": 1.2127, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.867204556241101, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 9.935165148446658e-07, |
|
"loss": 1.1628, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8679639297579497, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 9.823198176471381e-07, |
|
"loss": 1.1454, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8687233032747983, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 9.711833108431234e-07, |
|
"loss": 1.1546, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8694826767916469, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 9.601070687655667e-07, |
|
"loss": 1.1958, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8702420503084954, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 9.490911653451651e-07, |
|
"loss": 1.1511, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.8710014238253441, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 9.381356741098702e-07, |
|
"loss": 1.148, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8717607973421927, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 9.272406681844015e-07, |
|
"loss": 1.1383, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.8725201708590413, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 9.164062202897539e-07, |
|
"loss": 1.137, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.8732795443758898, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 9.05632402742721e-07, |
|
"loss": 1.1381, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8740389178927385, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 8.949192874553991e-07, |
|
"loss": 1.1854, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.8747982914095871, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 8.842669459347186e-07, |
|
"loss": 1.199, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8755576649264357, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 8.736754492819655e-07, |
|
"loss": 1.1787, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8763170384432842, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 8.631448681922994e-07, |
|
"loss": 1.1742, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8770764119601329, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 8.526752729542831e-07, |
|
"loss": 1.1326, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8778357854769815, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 8.42266733449425e-07, |
|
"loss": 1.1984, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8785951589938301, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 8.319193191517016e-07, |
|
"loss": 1.1403, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8793545325106787, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 8.216330991270916e-07, |
|
"loss": 1.1532, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8801139060275273, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 8.114081420331266e-07, |
|
"loss": 1.1398, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.8808732795443759, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 8.012445161184179e-07, |
|
"loss": 1.1201, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8816326530612245, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 7.911422892222165e-07, |
|
"loss": 1.1367, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8823920265780731, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 7.81101528773951e-07, |
|
"loss": 1.1888, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8831514000949217, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 7.711223017927783e-07, |
|
"loss": 1.1283, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8839107736117703, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 7.612046748871327e-07, |
|
"loss": 1.114, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.8846701471286189, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 7.513487142542941e-07, |
|
"loss": 1.1995, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8854295206454675, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 7.415544856799362e-07, |
|
"loss": 1.1137, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.886188894162316, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 7.318220545376842e-07, |
|
"loss": 1.1919, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8869482676791647, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 7.221514857886857e-07, |
|
"loss": 1.1217, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8877076411960133, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 7.125428439811765e-07, |
|
"loss": 1.1266, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.8884670147128619, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 7.029961932500506e-07, |
|
"loss": 1.159, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8892263882297105, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.935115973164208e-07, |
|
"loss": 1.1782, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8899857617465591, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 6.840891194872112e-07, |
|
"loss": 1.109, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8907451352634077, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 6.7472882265472e-07, |
|
"loss": 1.2068, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8915045087802563, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 6.65430769296207e-07, |
|
"loss": 1.1619, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8922638822971048, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 6.56195021473478e-07, |
|
"loss": 1.1534, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8930232558139535, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 6.470216408324626e-07, |
|
"loss": 1.1999, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.8937826293308021, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 6.379106886028086e-07, |
|
"loss": 1.1417, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8945420028476507, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 6.288622255974741e-07, |
|
"loss": 1.1552, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8953013763644992, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 6.198763122123208e-07, |
|
"loss": 1.1639, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8960607498813479, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 6.109530084257043e-07, |
|
"loss": 1.1234, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8968201233981965, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 6.020923737980877e-07, |
|
"loss": 1.1633, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8975794969150451, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 5.932944674716279e-07, |
|
"loss": 1.1606, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8983388704318936, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 5.845593481697931e-07, |
|
"loss": 1.1113, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8990982439487423, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 5.758870741969635e-07, |
|
"loss": 1.1429, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8998576174655909, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 5.672777034380483e-07, |
|
"loss": 1.1521, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9006169909824395, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 5.587312933580946e-07, |
|
"loss": 1.1341, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.901376364499288, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 5.502479010019046e-07, |
|
"loss": 1.143, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9021357380161367, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 5.418275829936537e-07, |
|
"loss": 1.1586, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9028951115329853, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 5.334703955365183e-07, |
|
"loss": 1.1349, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.9036544850498339, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 5.251763944122956e-07, |
|
"loss": 1.2187, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9044138585666824, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 5.169456349810342e-07, |
|
"loss": 1.2073, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9051732320835311, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 5.087781721806539e-07, |
|
"loss": 1.162, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9059326056003797, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 5.00674060526598e-07, |
|
"loss": 1.1938, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.9066919791172283, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.926333541114558e-07, |
|
"loss": 1.1564, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9074513526340768, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 4.846561066046063e-07, |
|
"loss": 1.2107, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9082107261509255, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 4.7674237125185597e-07, |
|
"loss": 1.2019, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9089700996677741, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 4.6889220087508514e-07, |
|
"loss": 1.1731, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9097294731846227, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.611056478719023e-07, |
|
"loss": 1.1591, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9104888467014712, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 4.5338276421528435e-07, |
|
"loss": 1.1698, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9112482202183199, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 4.45723601453234e-07, |
|
"loss": 1.179, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9120075937351685, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 4.3812821070843394e-07, |
|
"loss": 1.1383, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.9127669672520171, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.305966426779118e-07, |
|
"loss": 1.118, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9135263407688657, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 4.2312894763269385e-07, |
|
"loss": 1.1147, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 4.1572517541747294e-07, |
|
"loss": 1.2228, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9150450878025629, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 4.0838537545027755e-07, |
|
"loss": 1.144, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9158044613194115, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 4.0110959672213676e-07, |
|
"loss": 1.1403, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9165638348362601, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 3.9389788779675806e-07, |
|
"loss": 1.1552, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9173232083531087, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 3.867502968102055e-07, |
|
"loss": 1.1785, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9180825818699573, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 3.7966687147056533e-07, |
|
"loss": 1.1487, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.9188419553868059, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 3.7264765905764776e-07, |
|
"loss": 1.1304, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9196013289036545, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 3.656927064226512e-07, |
|
"loss": 1.1109, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.920360702420503, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 3.588020599878639e-07, |
|
"loss": 1.148, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9211200759373517, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 3.519757657463474e-07, |
|
"loss": 1.1745, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9218794494542003, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 3.4521386926163134e-07, |
|
"loss": 1.1452, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9226388229710489, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 3.3851641566740813e-07, |
|
"loss": 1.1598, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9233981964878974, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 3.3188344966723516e-07, |
|
"loss": 1.1889, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9241575700047461, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 3.2531501553422884e-07, |
|
"loss": 1.1822, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9249169435215947, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 3.1881115711077994e-07, |
|
"loss": 1.1675, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9256763170384433, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 3.123719178082529e-07, |
|
"loss": 1.1539, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9264356905552918, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 3.059973406066963e-07, |
|
"loss": 1.1554, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9271950640721405, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 2.996874680545603e-07, |
|
"loss": 1.1506, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9279544375889891, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 2.9344234226840964e-07, |
|
"loss": 1.167, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9287138111058377, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 2.872620049326436e-07, |
|
"loss": 1.1533, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9294731846226862, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 2.811464972992195e-07, |
|
"loss": 1.1686, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 2.7509586018736764e-07, |
|
"loss": 1.1638, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9309919316563835, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 2.6911013398333464e-07, |
|
"loss": 1.1969, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9317513051732321, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 2.6318935864010133e-07, |
|
"loss": 1.1527, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9325106786900806, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 2.573335736771254e-07, |
|
"loss": 1.1725, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9332700522069293, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 2.51542818180065e-07, |
|
"loss": 1.0826, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9340294257237779, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.458171308005308e-07, |
|
"loss": 1.1372, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9347887992406265, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 2.4015654975582225e-07, |
|
"loss": 1.1359, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.935548172757475, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 2.3456111282867178e-07, |
|
"loss": 1.1214, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9363075462743237, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 2.2903085736699414e-07, |
|
"loss": 1.0865, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9370669197911723, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 2.2356582028363548e-07, |
|
"loss": 1.1849, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.9378262933080209, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 2.1816603805613012e-07, |
|
"loss": 1.137, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9385856668248694, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 2.1283154672645522e-07, |
|
"loss": 1.1179, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9393450403417181, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 2.0756238190078991e-07, |
|
"loss": 1.1576, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9401044138585667, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 2.0235857874927655e-07, |
|
"loss": 1.1685, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9408637873754153, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.9722017200578757e-07, |
|
"loss": 1.167, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9416231608922638, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.921471959676957e-07, |
|
"loss": 1.0967, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9423825344091125, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 1.8713968449564079e-07, |
|
"loss": 1.185, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9431419079259611, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 1.8219767101330442e-07, |
|
"loss": 1.1248, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9439012814428097, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 1.7732118850719237e-07, |
|
"loss": 1.1056, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9446606549596582, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.7251026952640583e-07, |
|
"loss": 1.1053, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9454200284765069, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 1.6776494618243156e-07, |
|
"loss": 1.1511, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9461794019933555, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 1.6308525014892217e-07, |
|
"loss": 1.1568, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9469387755102041, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.5847121266148847e-07, |
|
"loss": 1.1354, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9476981490270526, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.539228645174895e-07, |
|
"loss": 1.2015, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9484575225439013, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 1.4944023607582737e-07, |
|
"loss": 1.1045, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9492168960607499, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.4502335725674165e-07, |
|
"loss": 1.1576, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9499762695775985, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 1.406722575416164e-07, |
|
"loss": 1.1525, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9507356430944471, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.3638696597277678e-07, |
|
"loss": 1.1828, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.9514950166112957, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.3216751115329718e-07, |
|
"loss": 1.1428, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.9522543901281443, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 1.2801392124681233e-07, |
|
"loss": 1.1528, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9530137636449929, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.2392622397732756e-07, |
|
"loss": 1.1491, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9537731371618415, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.1990444662903445e-07, |
|
"loss": 1.2012, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.95453251067869, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.159486160461265e-07, |
|
"loss": 1.1128, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9552918841955387, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 1.1205875863262272e-07, |
|
"loss": 1.1725, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9560512577123873, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 1.0823490035218986e-07, |
|
"loss": 1.1942, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.9568106312292359, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.0447706672797264e-07, |
|
"loss": 1.1906, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9575700047460844, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.0078528284241606e-07, |
|
"loss": 1.1831, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9583293782629331, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 9.715957333710447e-08, |
|
"loss": 1.1504, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.9590887517797817, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 9.359996241259384e-08, |
|
"loss": 1.1406, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.9598481252966303, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 9.010647382825421e-08, |
|
"loss": 1.1464, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9606074988134788, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 8.667913090210534e-08, |
|
"loss": 1.1418, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9613668723303275, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 8.331795651066455e-08, |
|
"loss": 1.1785, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.9621262458471761, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 8.002297308879359e-08, |
|
"loss": 1.1703, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.9628856193640247, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 7.679420262954984e-08, |
|
"loss": 1.1569, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.9636449928808732, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 7.363166668403643e-08, |
|
"loss": 1.1488, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.9644043663977219, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 7.053538636126123e-08, |
|
"loss": 1.1948, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9651637399145705, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 6.750538232799586e-08, |
|
"loss": 1.1496, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.9659231134314191, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 6.454167480863694e-08, |
|
"loss": 1.1463, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.9666824869482676, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 6.164428358506947e-08, |
|
"loss": 1.1507, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.9674418604651163, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 5.881322799653699e-08, |
|
"loss": 1.1549, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9682012339819649, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 5.6048526939512794e-08, |
|
"loss": 1.1406, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9689606074988135, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 5.3350198867574424e-08, |
|
"loss": 1.1267, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.969719981015662, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 5.0718261791274924e-08, |
|
"loss": 1.147, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9704793545325107, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 4.815273327803183e-08, |
|
"loss": 1.1504, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.9712387280493593, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 4.5653630451998335e-08, |
|
"loss": 1.1471, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9719981015662079, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 4.32209699939623e-08, |
|
"loss": 1.1204, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9727574750830564, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 4.085476814122413e-08, |
|
"loss": 1.1692, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9735168485999051, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 3.8555040687493494e-08, |
|
"loss": 1.1089, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.9742762221167537, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 3.632180298278165e-08, |
|
"loss": 1.0833, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9750355956336023, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 3.4155069933301535e-08, |
|
"loss": 1.1362, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9757949691504508, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 3.2054856001366706e-08, |
|
"loss": 1.2, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.9765543426672995, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 3.0021175205294794e-08, |
|
"loss": 1.2642, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.9773137161841481, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 2.805404111931198e-08, |
|
"loss": 1.1712, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.9780730897009967, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 2.6153466873468646e-08, |
|
"loss": 1.1773, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9788324632178452, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 2.4319465153543886e-08, |
|
"loss": 1.1556, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 2.255204820096668e-08, |
|
"loss": 1.1467, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9803512102515425, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 2.0851227812731523e-08, |
|
"loss": 1.1793, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.9811105837683911, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 1.9217015341318478e-08, |
|
"loss": 1.1366, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9818699572852396, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 1.764942169462325e-08, |
|
"loss": 1.1893, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.9826293308020883, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 1.6148457335876112e-08, |
|
"loss": 1.1308, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9833887043189369, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 1.4714132283577543e-08, |
|
"loss": 1.2597, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9841480778357855, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 1.3346456111430484e-08, |
|
"loss": 1.1048, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.984907451352634, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.2045437948275952e-08, |
|
"loss": 1.1165, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9856668248694826, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.0811086478031973e-08, |
|
"loss": 1.1419, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9864261983863313, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 9.643409939636972e-09, |
|
"loss": 1.1656, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9871855719031799, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 8.542416126989805e-09, |
|
"loss": 1.1344, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9879449454200285, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 7.508112388905363e-09, |
|
"loss": 1.1509, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.988704318936877, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 6.540505629061278e-09, |
|
"loss": 1.1836, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9894636924537257, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 5.639602305950176e-09, |
|
"loss": 1.1659, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9902230659705743, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 4.80540843283972e-09, |
|
"loss": 1.1539, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9909824394874229, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 4.037929577732636e-09, |
|
"loss": 1.1051, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9917418130042714, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 3.3371708633267443e-09, |
|
"loss": 1.153, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.9925011865211201, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 2.7031369669816566e-09, |
|
"loss": 1.1997, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9932605600379687, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 2.1358321206899067e-09, |
|
"loss": 1.1305, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9940199335548173, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 1.6352601110469768e-09, |
|
"loss": 1.0931, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9947793070716658, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 1.20142427922465e-09, |
|
"loss": 1.1754, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9955386805885145, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 8.343275209521384e-10, |
|
"loss": 1.2122, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9962980541053631, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 5.339722864927677e-10, |
|
"loss": 1.1428, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9970574276222117, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 3.003605806306542e-10, |
|
"loss": 1.1282, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9978168011390602, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 1.3349396265516235e-10, |
|
"loss": 1.1608, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9985761746559089, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 3.3373546353132614e-11, |
|
"loss": 1.1562, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9993355481727575, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 0.0, |
|
"loss": 1.1401, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9993355481727575, |
|
"eval_loss": 1.151589274406433, |
|
"eval_runtime": 640.5297, |
|
"eval_samples_per_second": 92.364, |
|
"eval_steps_per_second": 7.698, |
|
"step": 1316 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1316, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.953779885289767e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|