CarrotAI's picture
Upload folder using huggingface_hub
3a9b19c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9993355481727575,
"eval_steps": 500,
"global_step": 1316,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007593735168485999,
"grad_norm": 4.0,
"learning_rate": 2.0000000000000002e-07,
"loss": 1.9271,
"step": 1
},
{
"epoch": 0.0015187470336971997,
"grad_norm": 3.359375,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.6784,
"step": 2
},
{
"epoch": 0.0022781205505457997,
"grad_norm": 3.1875,
"learning_rate": 6.000000000000001e-07,
"loss": 1.7025,
"step": 3
},
{
"epoch": 0.0030374940673943995,
"grad_norm": 3.640625,
"learning_rate": 8.000000000000001e-07,
"loss": 1.8366,
"step": 4
},
{
"epoch": 0.0037968675842429997,
"grad_norm": 3.859375,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.8837,
"step": 5
},
{
"epoch": 0.0045562411010915994,
"grad_norm": 3.546875,
"learning_rate": 1.2000000000000002e-06,
"loss": 1.7767,
"step": 6
},
{
"epoch": 0.0053156146179402,
"grad_norm": 3.21875,
"learning_rate": 1.4000000000000001e-06,
"loss": 1.6786,
"step": 7
},
{
"epoch": 0.006074988134788799,
"grad_norm": 3.96875,
"learning_rate": 1.6000000000000001e-06,
"loss": 1.873,
"step": 8
},
{
"epoch": 0.006834361651637399,
"grad_norm": 3.703125,
"learning_rate": 1.8000000000000001e-06,
"loss": 1.8584,
"step": 9
},
{
"epoch": 0.007593735168485999,
"grad_norm": 3.40625,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.7817,
"step": 10
},
{
"epoch": 0.0083531086853346,
"grad_norm": 3.25,
"learning_rate": 2.2e-06,
"loss": 1.7579,
"step": 11
},
{
"epoch": 0.009112482202183199,
"grad_norm": 3.953125,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.9496,
"step": 12
},
{
"epoch": 0.009871855719031798,
"grad_norm": 3.515625,
"learning_rate": 2.6e-06,
"loss": 1.7234,
"step": 13
},
{
"epoch": 0.0106312292358804,
"grad_norm": 4.375,
"learning_rate": 2.8000000000000003e-06,
"loss": 2.003,
"step": 14
},
{
"epoch": 0.011390602752728999,
"grad_norm": 4.125,
"learning_rate": 3e-06,
"loss": 1.8606,
"step": 15
},
{
"epoch": 0.012149976269577598,
"grad_norm": 3.71875,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.8039,
"step": 16
},
{
"epoch": 0.012909349786426199,
"grad_norm": 3.96875,
"learning_rate": 3.4000000000000005e-06,
"loss": 1.936,
"step": 17
},
{
"epoch": 0.013668723303274798,
"grad_norm": 3.453125,
"learning_rate": 3.6000000000000003e-06,
"loss": 1.7465,
"step": 18
},
{
"epoch": 0.014428096820123398,
"grad_norm": 3.5625,
"learning_rate": 3.8000000000000005e-06,
"loss": 1.659,
"step": 19
},
{
"epoch": 0.015187470336971999,
"grad_norm": 3.59375,
"learning_rate": 4.000000000000001e-06,
"loss": 1.7962,
"step": 20
},
{
"epoch": 0.015946843853820596,
"grad_norm": 3.375,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.6802,
"step": 21
},
{
"epoch": 0.0167062173706692,
"grad_norm": 3.875,
"learning_rate": 4.4e-06,
"loss": 1.8444,
"step": 22
},
{
"epoch": 0.0174655908875178,
"grad_norm": 3.53125,
"learning_rate": 4.600000000000001e-06,
"loss": 1.7685,
"step": 23
},
{
"epoch": 0.018224964404366398,
"grad_norm": 3.421875,
"learning_rate": 4.800000000000001e-06,
"loss": 1.6768,
"step": 24
},
{
"epoch": 0.018984337921214997,
"grad_norm": 3.6875,
"learning_rate": 5e-06,
"loss": 1.8115,
"step": 25
},
{
"epoch": 0.019743711438063596,
"grad_norm": 3.671875,
"learning_rate": 5.2e-06,
"loss": 1.7844,
"step": 26
},
{
"epoch": 0.020503084954912196,
"grad_norm": 3.75,
"learning_rate": 5.400000000000001e-06,
"loss": 1.8313,
"step": 27
},
{
"epoch": 0.0212624584717608,
"grad_norm": 3.953125,
"learning_rate": 5.600000000000001e-06,
"loss": 1.8675,
"step": 28
},
{
"epoch": 0.022021831988609398,
"grad_norm": 3.71875,
"learning_rate": 5.8e-06,
"loss": 1.7895,
"step": 29
},
{
"epoch": 0.022781205505457997,
"grad_norm": 4.03125,
"learning_rate": 6e-06,
"loss": 1.8702,
"step": 30
},
{
"epoch": 0.023540579022306597,
"grad_norm": 3.640625,
"learning_rate": 6.200000000000001e-06,
"loss": 1.6666,
"step": 31
},
{
"epoch": 0.024299952539155196,
"grad_norm": 4.125,
"learning_rate": 6.4000000000000006e-06,
"loss": 1.9699,
"step": 32
},
{
"epoch": 0.025059326056003795,
"grad_norm": 3.578125,
"learning_rate": 6.600000000000001e-06,
"loss": 1.6828,
"step": 33
},
{
"epoch": 0.025818699572852398,
"grad_norm": 3.65625,
"learning_rate": 6.800000000000001e-06,
"loss": 1.8098,
"step": 34
},
{
"epoch": 0.026578073089700997,
"grad_norm": 3.484375,
"learning_rate": 7e-06,
"loss": 1.6943,
"step": 35
},
{
"epoch": 0.027337446606549597,
"grad_norm": 3.40625,
"learning_rate": 7.2000000000000005e-06,
"loss": 1.6835,
"step": 36
},
{
"epoch": 0.028096820123398196,
"grad_norm": 3.53125,
"learning_rate": 7.4e-06,
"loss": 1.7776,
"step": 37
},
{
"epoch": 0.028856193640246795,
"grad_norm": 3.828125,
"learning_rate": 7.600000000000001e-06,
"loss": 1.8554,
"step": 38
},
{
"epoch": 0.029615567157095395,
"grad_norm": 4.28125,
"learning_rate": 7.800000000000002e-06,
"loss": 2.0373,
"step": 39
},
{
"epoch": 0.030374940673943997,
"grad_norm": 3.4375,
"learning_rate": 8.000000000000001e-06,
"loss": 1.7848,
"step": 40
},
{
"epoch": 0.031134314190792597,
"grad_norm": 3.71875,
"learning_rate": 8.2e-06,
"loss": 1.8114,
"step": 41
},
{
"epoch": 0.03189368770764119,
"grad_norm": 4.21875,
"learning_rate": 8.400000000000001e-06,
"loss": 1.9938,
"step": 42
},
{
"epoch": 0.0326530612244898,
"grad_norm": 3.515625,
"learning_rate": 8.6e-06,
"loss": 1.6792,
"step": 43
},
{
"epoch": 0.0334124347413384,
"grad_norm": 3.6875,
"learning_rate": 8.8e-06,
"loss": 1.8027,
"step": 44
},
{
"epoch": 0.034171808258187,
"grad_norm": 4.0625,
"learning_rate": 9e-06,
"loss": 1.8655,
"step": 45
},
{
"epoch": 0.0349311817750356,
"grad_norm": 3.75,
"learning_rate": 9.200000000000002e-06,
"loss": 1.8402,
"step": 46
},
{
"epoch": 0.035690555291884196,
"grad_norm": 3.75,
"learning_rate": 9.4e-06,
"loss": 1.8507,
"step": 47
},
{
"epoch": 0.036449928808732796,
"grad_norm": 3.515625,
"learning_rate": 9.600000000000001e-06,
"loss": 1.7811,
"step": 48
},
{
"epoch": 0.037209302325581395,
"grad_norm": 3.796875,
"learning_rate": 9.800000000000001e-06,
"loss": 1.873,
"step": 49
},
{
"epoch": 0.037968675842429994,
"grad_norm": 3.40625,
"learning_rate": 1e-05,
"loss": 1.7869,
"step": 50
},
{
"epoch": 0.038728049359278593,
"grad_norm": 3.890625,
"learning_rate": 1.02e-05,
"loss": 1.847,
"step": 51
},
{
"epoch": 0.03948742287612719,
"grad_norm": 4.0,
"learning_rate": 1.04e-05,
"loss": 1.9245,
"step": 52
},
{
"epoch": 0.04024679639297579,
"grad_norm": 4.09375,
"learning_rate": 1.0600000000000002e-05,
"loss": 2.0196,
"step": 53
},
{
"epoch": 0.04100616990982439,
"grad_norm": 3.609375,
"learning_rate": 1.0800000000000002e-05,
"loss": 1.7158,
"step": 54
},
{
"epoch": 0.041765543426673,
"grad_norm": 3.453125,
"learning_rate": 1.1000000000000001e-05,
"loss": 1.7212,
"step": 55
},
{
"epoch": 0.0425249169435216,
"grad_norm": 4.0625,
"learning_rate": 1.1200000000000001e-05,
"loss": 1.9365,
"step": 56
},
{
"epoch": 0.043284290460370196,
"grad_norm": 3.65625,
"learning_rate": 1.14e-05,
"loss": 1.8307,
"step": 57
},
{
"epoch": 0.044043663977218796,
"grad_norm": 3.296875,
"learning_rate": 1.16e-05,
"loss": 1.6974,
"step": 58
},
{
"epoch": 0.044803037494067395,
"grad_norm": 3.328125,
"learning_rate": 1.18e-05,
"loss": 1.6723,
"step": 59
},
{
"epoch": 0.045562411010915994,
"grad_norm": 3.25,
"learning_rate": 1.2e-05,
"loss": 1.6466,
"step": 60
},
{
"epoch": 0.046321784527764594,
"grad_norm": 3.59375,
"learning_rate": 1.22e-05,
"loss": 1.7542,
"step": 61
},
{
"epoch": 0.04708115804461319,
"grad_norm": 3.546875,
"learning_rate": 1.2400000000000002e-05,
"loss": 1.8449,
"step": 62
},
{
"epoch": 0.04784053156146179,
"grad_norm": 3.140625,
"learning_rate": 1.2600000000000001e-05,
"loss": 1.6788,
"step": 63
},
{
"epoch": 0.04859990507831039,
"grad_norm": 3.421875,
"learning_rate": 1.2800000000000001e-05,
"loss": 1.8164,
"step": 64
},
{
"epoch": 0.04935927859515899,
"grad_norm": 3.359375,
"learning_rate": 1.3000000000000001e-05,
"loss": 1.7674,
"step": 65
},
{
"epoch": 0.05011865211200759,
"grad_norm": 3.296875,
"learning_rate": 1.3200000000000002e-05,
"loss": 1.711,
"step": 66
},
{
"epoch": 0.0508780256288562,
"grad_norm": 3.109375,
"learning_rate": 1.3400000000000002e-05,
"loss": 1.6482,
"step": 67
},
{
"epoch": 0.051637399145704796,
"grad_norm": 3.1875,
"learning_rate": 1.3600000000000002e-05,
"loss": 1.6734,
"step": 68
},
{
"epoch": 0.052396772662553395,
"grad_norm": 3.546875,
"learning_rate": 1.38e-05,
"loss": 1.793,
"step": 69
},
{
"epoch": 0.053156146179401995,
"grad_norm": 3.65625,
"learning_rate": 1.4e-05,
"loss": 1.8647,
"step": 70
},
{
"epoch": 0.053915519696250594,
"grad_norm": 3.53125,
"learning_rate": 1.4200000000000001e-05,
"loss": 1.8249,
"step": 71
},
{
"epoch": 0.05467489321309919,
"grad_norm": 3.796875,
"learning_rate": 1.4400000000000001e-05,
"loss": 1.88,
"step": 72
},
{
"epoch": 0.05543426672994779,
"grad_norm": 3.984375,
"learning_rate": 1.46e-05,
"loss": 1.9618,
"step": 73
},
{
"epoch": 0.05619364024679639,
"grad_norm": 3.46875,
"learning_rate": 1.48e-05,
"loss": 1.8017,
"step": 74
},
{
"epoch": 0.05695301376364499,
"grad_norm": 3.40625,
"learning_rate": 1.5000000000000002e-05,
"loss": 1.6894,
"step": 75
},
{
"epoch": 0.05771238728049359,
"grad_norm": 3.375,
"learning_rate": 1.5200000000000002e-05,
"loss": 1.7385,
"step": 76
},
{
"epoch": 0.05847176079734219,
"grad_norm": 3.21875,
"learning_rate": 1.54e-05,
"loss": 1.631,
"step": 77
},
{
"epoch": 0.05923113431419079,
"grad_norm": 3.421875,
"learning_rate": 1.5600000000000003e-05,
"loss": 1.7345,
"step": 78
},
{
"epoch": 0.059990507831039395,
"grad_norm": 3.25,
"learning_rate": 1.58e-05,
"loss": 1.6198,
"step": 79
},
{
"epoch": 0.060749881347887995,
"grad_norm": 3.703125,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.7695,
"step": 80
},
{
"epoch": 0.061509254864736594,
"grad_norm": 3.546875,
"learning_rate": 1.62e-05,
"loss": 1.7828,
"step": 81
},
{
"epoch": 0.062268628381585193,
"grad_norm": 4.0625,
"learning_rate": 1.64e-05,
"loss": 1.8845,
"step": 82
},
{
"epoch": 0.06302800189843379,
"grad_norm": 3.1875,
"learning_rate": 1.66e-05,
"loss": 1.698,
"step": 83
},
{
"epoch": 0.06378737541528239,
"grad_norm": 3.28125,
"learning_rate": 1.6800000000000002e-05,
"loss": 1.6712,
"step": 84
},
{
"epoch": 0.064546748932131,
"grad_norm": 3.375,
"learning_rate": 1.7e-05,
"loss": 1.7673,
"step": 85
},
{
"epoch": 0.0653061224489796,
"grad_norm": 3.609375,
"learning_rate": 1.72e-05,
"loss": 1.8005,
"step": 86
},
{
"epoch": 0.0660654959658282,
"grad_norm": 3.5,
"learning_rate": 1.7400000000000003e-05,
"loss": 1.7674,
"step": 87
},
{
"epoch": 0.0668248694826768,
"grad_norm": 3.484375,
"learning_rate": 1.76e-05,
"loss": 1.7665,
"step": 88
},
{
"epoch": 0.0675842429995254,
"grad_norm": 3.578125,
"learning_rate": 1.7800000000000002e-05,
"loss": 1.7815,
"step": 89
},
{
"epoch": 0.068343616516374,
"grad_norm": 3.25,
"learning_rate": 1.8e-05,
"loss": 1.6855,
"step": 90
},
{
"epoch": 0.0691029900332226,
"grad_norm": 3.1875,
"learning_rate": 1.8200000000000002e-05,
"loss": 1.6461,
"step": 91
},
{
"epoch": 0.0698623635500712,
"grad_norm": 3.09375,
"learning_rate": 1.8400000000000003e-05,
"loss": 1.6388,
"step": 92
},
{
"epoch": 0.07062173706691979,
"grad_norm": 3.46875,
"learning_rate": 1.86e-05,
"loss": 1.7505,
"step": 93
},
{
"epoch": 0.07138111058376839,
"grad_norm": 2.84375,
"learning_rate": 1.88e-05,
"loss": 1.6048,
"step": 94
},
{
"epoch": 0.07214048410061699,
"grad_norm": 3.34375,
"learning_rate": 1.9e-05,
"loss": 1.6765,
"step": 95
},
{
"epoch": 0.07289985761746559,
"grad_norm": 3.171875,
"learning_rate": 1.9200000000000003e-05,
"loss": 1.634,
"step": 96
},
{
"epoch": 0.07365923113431419,
"grad_norm": 3.0625,
"learning_rate": 1.94e-05,
"loss": 1.6586,
"step": 97
},
{
"epoch": 0.07441860465116279,
"grad_norm": 3.140625,
"learning_rate": 1.9600000000000002e-05,
"loss": 1.6871,
"step": 98
},
{
"epoch": 0.07517797816801139,
"grad_norm": 3.703125,
"learning_rate": 1.98e-05,
"loss": 1.7815,
"step": 99
},
{
"epoch": 0.07593735168485999,
"grad_norm": 3.671875,
"learning_rate": 2e-05,
"loss": 1.8349,
"step": 100
},
{
"epoch": 0.07669672520170859,
"grad_norm": 3.625,
"learning_rate": 1.9999966626453647e-05,
"loss": 1.8303,
"step": 101
},
{
"epoch": 0.07745609871855719,
"grad_norm": 2.921875,
"learning_rate": 1.9999866506037346e-05,
"loss": 1.5889,
"step": 102
},
{
"epoch": 0.07821547223540579,
"grad_norm": 2.984375,
"learning_rate": 1.9999699639419373e-05,
"loss": 1.5841,
"step": 103
},
{
"epoch": 0.07897484575225439,
"grad_norm": 2.96875,
"learning_rate": 1.999946602771351e-05,
"loss": 1.6492,
"step": 104
},
{
"epoch": 0.07973421926910298,
"grad_norm": 3.203125,
"learning_rate": 1.999916567247905e-05,
"loss": 1.6682,
"step": 105
},
{
"epoch": 0.08049359278595158,
"grad_norm": 2.6875,
"learning_rate": 1.9998798575720776e-05,
"loss": 1.522,
"step": 106
},
{
"epoch": 0.08125296630280018,
"grad_norm": 3.171875,
"learning_rate": 1.9998364739888954e-05,
"loss": 1.6903,
"step": 107
},
{
"epoch": 0.08201233981964878,
"grad_norm": 2.765625,
"learning_rate": 1.9997864167879313e-05,
"loss": 1.5823,
"step": 108
},
{
"epoch": 0.0827717133364974,
"grad_norm": 2.953125,
"learning_rate": 1.9997296863033018e-05,
"loss": 1.6105,
"step": 109
},
{
"epoch": 0.083531086853346,
"grad_norm": 2.90625,
"learning_rate": 1.9996662829136676e-05,
"loss": 1.5877,
"step": 110
},
{
"epoch": 0.0842904603701946,
"grad_norm": 3.515625,
"learning_rate": 1.999596207042227e-05,
"loss": 1.7453,
"step": 111
},
{
"epoch": 0.0850498338870432,
"grad_norm": 3.3125,
"learning_rate": 1.999519459156716e-05,
"loss": 1.7015,
"step": 112
},
{
"epoch": 0.0858092074038918,
"grad_norm": 3.09375,
"learning_rate": 1.999436039769405e-05,
"loss": 1.6773,
"step": 113
},
{
"epoch": 0.08656858092074039,
"grad_norm": 2.84375,
"learning_rate": 1.9993459494370938e-05,
"loss": 1.6287,
"step": 114
},
{
"epoch": 0.08732795443758899,
"grad_norm": 3.40625,
"learning_rate": 1.9992491887611095e-05,
"loss": 1.7393,
"step": 115
},
{
"epoch": 0.08808732795443759,
"grad_norm": 4.03125,
"learning_rate": 1.999145758387301e-05,
"loss": 1.9157,
"step": 116
},
{
"epoch": 0.08884670147128619,
"grad_norm": 2.734375,
"learning_rate": 1.9990356590060363e-05,
"loss": 1.6195,
"step": 117
},
{
"epoch": 0.08960607498813479,
"grad_norm": 2.71875,
"learning_rate": 1.998918891352197e-05,
"loss": 1.6428,
"step": 118
},
{
"epoch": 0.09036544850498339,
"grad_norm": 3.1875,
"learning_rate": 1.9987954562051724e-05,
"loss": 1.6772,
"step": 119
},
{
"epoch": 0.09112482202183199,
"grad_norm": 2.84375,
"learning_rate": 1.998665354388857e-05,
"loss": 1.5625,
"step": 120
},
{
"epoch": 0.09188419553868059,
"grad_norm": 2.984375,
"learning_rate": 1.9985285867716423e-05,
"loss": 1.6915,
"step": 121
},
{
"epoch": 0.09264356905552919,
"grad_norm": 2.828125,
"learning_rate": 1.9983851542664125e-05,
"loss": 1.6413,
"step": 122
},
{
"epoch": 0.09340294257237779,
"grad_norm": 2.65625,
"learning_rate": 1.998235057830538e-05,
"loss": 1.5844,
"step": 123
},
{
"epoch": 0.09416231608922639,
"grad_norm": 2.59375,
"learning_rate": 1.9980782984658682e-05,
"loss": 1.561,
"step": 124
},
{
"epoch": 0.09492168960607499,
"grad_norm": 2.921875,
"learning_rate": 1.997914877218727e-05,
"loss": 1.6305,
"step": 125
},
{
"epoch": 0.09568106312292358,
"grad_norm": 2.25,
"learning_rate": 1.9977447951799035e-05,
"loss": 1.4409,
"step": 126
},
{
"epoch": 0.09644043663977218,
"grad_norm": 2.484375,
"learning_rate": 1.9975680534846457e-05,
"loss": 1.5723,
"step": 127
},
{
"epoch": 0.09719981015662078,
"grad_norm": 3.453125,
"learning_rate": 1.9973846533126533e-05,
"loss": 1.7338,
"step": 128
},
{
"epoch": 0.09795918367346938,
"grad_norm": 2.703125,
"learning_rate": 1.997194595888069e-05,
"loss": 1.6383,
"step": 129
},
{
"epoch": 0.09871855719031798,
"grad_norm": 2.4375,
"learning_rate": 1.996997882479471e-05,
"loss": 1.5887,
"step": 130
},
{
"epoch": 0.09947793070716658,
"grad_norm": 2.4375,
"learning_rate": 1.9967945143998636e-05,
"loss": 1.5525,
"step": 131
},
{
"epoch": 0.10023730422401518,
"grad_norm": 2.359375,
"learning_rate": 1.99658449300667e-05,
"loss": 1.4995,
"step": 132
},
{
"epoch": 0.1009966777408638,
"grad_norm": 2.140625,
"learning_rate": 1.996367819701722e-05,
"loss": 1.5085,
"step": 133
},
{
"epoch": 0.1017560512577124,
"grad_norm": 2.46875,
"learning_rate": 1.996144495931251e-05,
"loss": 1.5708,
"step": 134
},
{
"epoch": 0.10251542477456099,
"grad_norm": 2.71875,
"learning_rate": 1.995914523185878e-05,
"loss": 1.623,
"step": 135
},
{
"epoch": 0.10327479829140959,
"grad_norm": 2.1875,
"learning_rate": 1.9956779030006038e-05,
"loss": 1.5378,
"step": 136
},
{
"epoch": 0.10403417180825819,
"grad_norm": 2.5,
"learning_rate": 1.9954346369548002e-05,
"loss": 1.5672,
"step": 137
},
{
"epoch": 0.10479354532510679,
"grad_norm": 2.078125,
"learning_rate": 1.995184726672197e-05,
"loss": 1.5316,
"step": 138
},
{
"epoch": 0.10555291884195539,
"grad_norm": 2.25,
"learning_rate": 1.994928173820873e-05,
"loss": 1.5776,
"step": 139
},
{
"epoch": 0.10631229235880399,
"grad_norm": 2.34375,
"learning_rate": 1.994664980113243e-05,
"loss": 1.6079,
"step": 140
},
{
"epoch": 0.10707166587565259,
"grad_norm": 2.296875,
"learning_rate": 1.9943951473060488e-05,
"loss": 1.5903,
"step": 141
},
{
"epoch": 0.10783103939250119,
"grad_norm": 2.53125,
"learning_rate": 1.9941186772003463e-05,
"loss": 1.6456,
"step": 142
},
{
"epoch": 0.10859041290934979,
"grad_norm": 2.171875,
"learning_rate": 1.9938355716414933e-05,
"loss": 1.5053,
"step": 143
},
{
"epoch": 0.10934978642619839,
"grad_norm": 2.09375,
"learning_rate": 1.9935458325191365e-05,
"loss": 1.5925,
"step": 144
},
{
"epoch": 0.11010915994304699,
"grad_norm": 2.171875,
"learning_rate": 1.9932494617672007e-05,
"loss": 1.6033,
"step": 145
},
{
"epoch": 0.11086853345989559,
"grad_norm": 2.046875,
"learning_rate": 1.992946461363874e-05,
"loss": 1.553,
"step": 146
},
{
"epoch": 0.11162790697674418,
"grad_norm": 2.21875,
"learning_rate": 1.9926368333315964e-05,
"loss": 1.5962,
"step": 147
},
{
"epoch": 0.11238728049359278,
"grad_norm": 2.203125,
"learning_rate": 1.992320579737045e-05,
"loss": 1.6159,
"step": 148
},
{
"epoch": 0.11314665401044138,
"grad_norm": 2.0625,
"learning_rate": 1.991997702691121e-05,
"loss": 1.4709,
"step": 149
},
{
"epoch": 0.11390602752728998,
"grad_norm": 2.375,
"learning_rate": 1.9916682043489337e-05,
"loss": 1.6076,
"step": 150
},
{
"epoch": 0.11466540104413858,
"grad_norm": 1.984375,
"learning_rate": 1.9913320869097897e-05,
"loss": 1.4864,
"step": 151
},
{
"epoch": 0.11542477456098718,
"grad_norm": 1.7734375,
"learning_rate": 1.9909893526171745e-05,
"loss": 1.4559,
"step": 152
},
{
"epoch": 0.11618414807783578,
"grad_norm": 1.9921875,
"learning_rate": 1.990640003758741e-05,
"loss": 1.5585,
"step": 153
},
{
"epoch": 0.11694352159468438,
"grad_norm": 1.796875,
"learning_rate": 1.9902840426662897e-05,
"loss": 1.4656,
"step": 154
},
{
"epoch": 0.11770289511153298,
"grad_norm": 1.9296875,
"learning_rate": 1.9899214717157588e-05,
"loss": 1.5357,
"step": 155
},
{
"epoch": 0.11846226862838158,
"grad_norm": 1.9296875,
"learning_rate": 1.9895522933272028e-05,
"loss": 1.5101,
"step": 156
},
{
"epoch": 0.11922164214523019,
"grad_norm": 1.9140625,
"learning_rate": 1.989176509964781e-05,
"loss": 1.5287,
"step": 157
},
{
"epoch": 0.11998101566207879,
"grad_norm": 1.9765625,
"learning_rate": 1.988794124136738e-05,
"loss": 1.6104,
"step": 158
},
{
"epoch": 0.12074038917892739,
"grad_norm": 1.9296875,
"learning_rate": 1.9884051383953876e-05,
"loss": 1.5313,
"step": 159
},
{
"epoch": 0.12149976269577599,
"grad_norm": 1.703125,
"learning_rate": 1.9880095553370967e-05,
"loss": 1.4602,
"step": 160
},
{
"epoch": 0.12225913621262459,
"grad_norm": 1.59375,
"learning_rate": 1.9876073776022676e-05,
"loss": 1.4071,
"step": 161
},
{
"epoch": 0.12301850972947319,
"grad_norm": 1.6953125,
"learning_rate": 1.987198607875319e-05,
"loss": 1.4707,
"step": 162
},
{
"epoch": 0.12377788324632179,
"grad_norm": 1.8125,
"learning_rate": 1.9867832488846702e-05,
"loss": 1.4729,
"step": 163
},
{
"epoch": 0.12453725676317039,
"grad_norm": 1.6328125,
"learning_rate": 1.9863613034027224e-05,
"loss": 1.4967,
"step": 164
},
{
"epoch": 0.12529663028001897,
"grad_norm": 1.6171875,
"learning_rate": 1.9859327742458387e-05,
"loss": 1.4463,
"step": 165
},
{
"epoch": 0.12605600379686757,
"grad_norm": 1.65625,
"learning_rate": 1.985497664274326e-05,
"loss": 1.4763,
"step": 166
},
{
"epoch": 0.12681537731371617,
"grad_norm": 1.6953125,
"learning_rate": 1.9850559763924176e-05,
"loss": 1.5175,
"step": 167
},
{
"epoch": 0.12757475083056477,
"grad_norm": 1.609375,
"learning_rate": 1.9846077135482513e-05,
"loss": 1.4363,
"step": 168
},
{
"epoch": 0.1283341243474134,
"grad_norm": 1.4609375,
"learning_rate": 1.9841528787338513e-05,
"loss": 1.3922,
"step": 169
},
{
"epoch": 0.129093497864262,
"grad_norm": 1.578125,
"learning_rate": 1.983691474985108e-05,
"loss": 1.4937,
"step": 170
},
{
"epoch": 0.1298528713811106,
"grad_norm": 1.421875,
"learning_rate": 1.983223505381757e-05,
"loss": 1.4381,
"step": 171
},
{
"epoch": 0.1306122448979592,
"grad_norm": 1.6484375,
"learning_rate": 1.9827489730473597e-05,
"loss": 1.5019,
"step": 172
},
{
"epoch": 0.1313716184148078,
"grad_norm": 1.4140625,
"learning_rate": 1.982267881149281e-05,
"loss": 1.3798,
"step": 173
},
{
"epoch": 0.1321309919316564,
"grad_norm": 1.6796875,
"learning_rate": 1.9817802328986696e-05,
"loss": 1.5623,
"step": 174
},
{
"epoch": 0.132890365448505,
"grad_norm": 1.5390625,
"learning_rate": 1.9812860315504362e-05,
"loss": 1.4497,
"step": 175
},
{
"epoch": 0.1336497389653536,
"grad_norm": 1.4921875,
"learning_rate": 1.9807852804032306e-05,
"loss": 1.4442,
"step": 176
},
{
"epoch": 0.1344091124822022,
"grad_norm": 1.734375,
"learning_rate": 1.9802779827994214e-05,
"loss": 1.5552,
"step": 177
},
{
"epoch": 0.1351684859990508,
"grad_norm": 1.421875,
"learning_rate": 1.9797641421250725e-05,
"loss": 1.4411,
"step": 178
},
{
"epoch": 0.1359278595158994,
"grad_norm": 1.3515625,
"learning_rate": 1.9792437618099215e-05,
"loss": 1.4569,
"step": 179
},
{
"epoch": 0.136687233032748,
"grad_norm": 1.40625,
"learning_rate": 1.9787168453273546e-05,
"loss": 1.4257,
"step": 180
},
{
"epoch": 0.1374466065495966,
"grad_norm": 1.359375,
"learning_rate": 1.9781833961943874e-05,
"loss": 1.417,
"step": 181
},
{
"epoch": 0.1382059800664452,
"grad_norm": 1.5625,
"learning_rate": 1.9776434179716365e-05,
"loss": 1.4831,
"step": 182
},
{
"epoch": 0.1389653535832938,
"grad_norm": 1.265625,
"learning_rate": 1.977096914263301e-05,
"loss": 1.3927,
"step": 183
},
{
"epoch": 0.1397247271001424,
"grad_norm": 1.3671875,
"learning_rate": 1.9765438887171327e-05,
"loss": 1.431,
"step": 184
},
{
"epoch": 0.140484100616991,
"grad_norm": 1.4609375,
"learning_rate": 1.975984345024418e-05,
"loss": 1.4798,
"step": 185
},
{
"epoch": 0.14124347413383959,
"grad_norm": 1.484375,
"learning_rate": 1.975418286919947e-05,
"loss": 1.4939,
"step": 186
},
{
"epoch": 0.14200284765068819,
"grad_norm": 1.390625,
"learning_rate": 1.9748457181819937e-05,
"loss": 1.4784,
"step": 187
},
{
"epoch": 0.14276222116753678,
"grad_norm": 1.2421875,
"learning_rate": 1.9742666426322877e-05,
"loss": 1.3947,
"step": 188
},
{
"epoch": 0.14352159468438538,
"grad_norm": 1.2109375,
"learning_rate": 1.97368106413599e-05,
"loss": 1.3783,
"step": 189
},
{
"epoch": 0.14428096820123398,
"grad_norm": 1.0859375,
"learning_rate": 1.9730889866016668e-05,
"loss": 1.3301,
"step": 190
},
{
"epoch": 0.14504034171808258,
"grad_norm": 1.1953125,
"learning_rate": 1.9724904139812636e-05,
"loss": 1.4403,
"step": 191
},
{
"epoch": 0.14579971523493118,
"grad_norm": 1.1484375,
"learning_rate": 1.9718853502700783e-05,
"loss": 1.4301,
"step": 192
},
{
"epoch": 0.14655908875177978,
"grad_norm": 1.0859375,
"learning_rate": 1.9712737995067357e-05,
"loss": 1.3473,
"step": 193
},
{
"epoch": 0.14731846226862838,
"grad_norm": 1.078125,
"learning_rate": 1.970655765773159e-05,
"loss": 1.3557,
"step": 194
},
{
"epoch": 0.14807783578547698,
"grad_norm": 1.0703125,
"learning_rate": 1.9700312531945444e-05,
"loss": 1.3979,
"step": 195
},
{
"epoch": 0.14883720930232558,
"grad_norm": 1.375,
"learning_rate": 1.9694002659393306e-05,
"loss": 1.5305,
"step": 196
},
{
"epoch": 0.14959658281917418,
"grad_norm": 1.1875,
"learning_rate": 1.9687628082191748e-05,
"loss": 1.5078,
"step": 197
},
{
"epoch": 0.15035595633602278,
"grad_norm": 1.265625,
"learning_rate": 1.9681188842889222e-05,
"loss": 1.4817,
"step": 198
},
{
"epoch": 0.15111532985287138,
"grad_norm": 1.0234375,
"learning_rate": 1.9674684984465774e-05,
"loss": 1.3599,
"step": 199
},
{
"epoch": 0.15187470336971998,
"grad_norm": 1.0625,
"learning_rate": 1.966811655033277e-05,
"loss": 1.384,
"step": 200
},
{
"epoch": 0.15263407688656858,
"grad_norm": 1.3515625,
"learning_rate": 1.9661483584332592e-05,
"loss": 1.514,
"step": 201
},
{
"epoch": 0.15339345040341718,
"grad_norm": 1.1328125,
"learning_rate": 1.9654786130738372e-05,
"loss": 1.3908,
"step": 202
},
{
"epoch": 0.15415282392026577,
"grad_norm": 1.015625,
"learning_rate": 1.9648024234253654e-05,
"loss": 1.336,
"step": 203
},
{
"epoch": 0.15491219743711437,
"grad_norm": 1.0078125,
"learning_rate": 1.9641197940012136e-05,
"loss": 1.3723,
"step": 204
},
{
"epoch": 0.15567157095396297,
"grad_norm": 1.015625,
"learning_rate": 1.963430729357735e-05,
"loss": 1.3784,
"step": 205
},
{
"epoch": 0.15643094447081157,
"grad_norm": 0.9375,
"learning_rate": 1.9627352340942355e-05,
"loss": 1.3541,
"step": 206
},
{
"epoch": 0.15719031798766017,
"grad_norm": 1.0546875,
"learning_rate": 1.9620333128529436e-05,
"loss": 1.3969,
"step": 207
},
{
"epoch": 0.15794969150450877,
"grad_norm": 1.1953125,
"learning_rate": 1.96132497031898e-05,
"loss": 1.4611,
"step": 208
},
{
"epoch": 0.15870906502135737,
"grad_norm": 0.96484375,
"learning_rate": 1.9606102112203243e-05,
"loss": 1.3631,
"step": 209
},
{
"epoch": 0.15946843853820597,
"grad_norm": 0.9375,
"learning_rate": 1.9598890403277867e-05,
"loss": 1.3605,
"step": 210
},
{
"epoch": 0.16022781205505457,
"grad_norm": 1.078125,
"learning_rate": 1.9591614624549724e-05,
"loss": 1.4721,
"step": 211
},
{
"epoch": 0.16098718557190317,
"grad_norm": 1.0234375,
"learning_rate": 1.958427482458253e-05,
"loss": 1.429,
"step": 212
},
{
"epoch": 0.16174655908875177,
"grad_norm": 0.94921875,
"learning_rate": 1.9576871052367307e-05,
"loss": 1.3866,
"step": 213
},
{
"epoch": 0.16250593260560037,
"grad_norm": 0.9140625,
"learning_rate": 1.956940335732209e-05,
"loss": 1.4103,
"step": 214
},
{
"epoch": 0.16326530612244897,
"grad_norm": 0.953125,
"learning_rate": 1.956187178929157e-05,
"loss": 1.3547,
"step": 215
},
{
"epoch": 0.16402467963929757,
"grad_norm": 1.0078125,
"learning_rate": 1.9554276398546767e-05,
"loss": 1.4262,
"step": 216
},
{
"epoch": 0.1647840531561462,
"grad_norm": 0.89453125,
"learning_rate": 1.9546617235784716e-05,
"loss": 1.3589,
"step": 217
},
{
"epoch": 0.1655434266729948,
"grad_norm": 0.91796875,
"learning_rate": 1.95388943521281e-05,
"loss": 1.3694,
"step": 218
},
{
"epoch": 0.1663028001898434,
"grad_norm": 0.859375,
"learning_rate": 1.953110779912492e-05,
"loss": 1.3515,
"step": 219
},
{
"epoch": 0.167062173706692,
"grad_norm": 0.96875,
"learning_rate": 1.9523257628748148e-05,
"loss": 1.419,
"step": 220
},
{
"epoch": 0.1678215472235406,
"grad_norm": 0.8515625,
"learning_rate": 1.9515343893395394e-05,
"loss": 1.3665,
"step": 221
},
{
"epoch": 0.1685809207403892,
"grad_norm": 0.7734375,
"learning_rate": 1.9507366645888544e-05,
"loss": 1.3448,
"step": 222
},
{
"epoch": 0.1693402942572378,
"grad_norm": 0.7578125,
"learning_rate": 1.9499325939473403e-05,
"loss": 1.3186,
"step": 223
},
{
"epoch": 0.1700996677740864,
"grad_norm": 0.6875,
"learning_rate": 1.9491221827819348e-05,
"loss": 1.2722,
"step": 224
},
{
"epoch": 0.170859041290935,
"grad_norm": 0.9375,
"learning_rate": 1.948305436501897e-05,
"loss": 1.4339,
"step": 225
},
{
"epoch": 0.1716184148077836,
"grad_norm": 0.796875,
"learning_rate": 1.9474823605587705e-05,
"loss": 1.3838,
"step": 226
},
{
"epoch": 0.1723777883246322,
"grad_norm": 0.98828125,
"learning_rate": 1.9466529604463484e-05,
"loss": 1.4411,
"step": 227
},
{
"epoch": 0.17313716184148079,
"grad_norm": 0.78515625,
"learning_rate": 1.9458172417006347e-05,
"loss": 1.3107,
"step": 228
},
{
"epoch": 0.17389653535832938,
"grad_norm": 0.86328125,
"learning_rate": 1.9449752098998097e-05,
"loss": 1.4422,
"step": 229
},
{
"epoch": 0.17465590887517798,
"grad_norm": 0.80078125,
"learning_rate": 1.9441268706641907e-05,
"loss": 1.3728,
"step": 230
},
{
"epoch": 0.17541528239202658,
"grad_norm": 0.9453125,
"learning_rate": 1.9432722296561954e-05,
"loss": 1.4489,
"step": 231
},
{
"epoch": 0.17617465590887518,
"grad_norm": 0.78125,
"learning_rate": 1.942411292580304e-05,
"loss": 1.3594,
"step": 232
},
{
"epoch": 0.17693402942572378,
"grad_norm": 0.7421875,
"learning_rate": 1.941544065183021e-05,
"loss": 1.3176,
"step": 233
},
{
"epoch": 0.17769340294257238,
"grad_norm": 0.71875,
"learning_rate": 1.9406705532528373e-05,
"loss": 1.3331,
"step": 234
},
{
"epoch": 0.17845277645942098,
"grad_norm": 0.73828125,
"learning_rate": 1.9397907626201915e-05,
"loss": 1.3217,
"step": 235
},
{
"epoch": 0.17921214997626958,
"grad_norm": 0.7578125,
"learning_rate": 1.9389046991574298e-05,
"loss": 1.3825,
"step": 236
},
{
"epoch": 0.17997152349311818,
"grad_norm": 0.78515625,
"learning_rate": 1.938012368778768e-05,
"loss": 1.3604,
"step": 237
},
{
"epoch": 0.18073089700996678,
"grad_norm": 0.6875,
"learning_rate": 1.9371137774402528e-05,
"loss": 1.3345,
"step": 238
},
{
"epoch": 0.18149027052681538,
"grad_norm": 0.87890625,
"learning_rate": 1.9362089311397194e-05,
"loss": 1.417,
"step": 239
},
{
"epoch": 0.18224964404366398,
"grad_norm": 0.63671875,
"learning_rate": 1.935297835916754e-05,
"loss": 1.2646,
"step": 240
},
{
"epoch": 0.18300901756051258,
"grad_norm": 0.67578125,
"learning_rate": 1.9343804978526525e-05,
"loss": 1.3089,
"step": 241
},
{
"epoch": 0.18376839107736118,
"grad_norm": 0.6328125,
"learning_rate": 1.9334569230703794e-05,
"loss": 1.2812,
"step": 242
},
{
"epoch": 0.18452776459420978,
"grad_norm": 0.76171875,
"learning_rate": 1.9325271177345284e-05,
"loss": 1.3355,
"step": 243
},
{
"epoch": 0.18528713811105837,
"grad_norm": 0.6484375,
"learning_rate": 1.9315910880512792e-05,
"loss": 1.3089,
"step": 244
},
{
"epoch": 0.18604651162790697,
"grad_norm": 0.703125,
"learning_rate": 1.9306488402683582e-05,
"loss": 1.3573,
"step": 245
},
{
"epoch": 0.18680588514475557,
"grad_norm": 0.62890625,
"learning_rate": 1.929700380674995e-05,
"loss": 1.2955,
"step": 246
},
{
"epoch": 0.18756525866160417,
"grad_norm": 0.6015625,
"learning_rate": 1.9287457156018824e-05,
"loss": 1.2819,
"step": 247
},
{
"epoch": 0.18832463217845277,
"grad_norm": 0.60546875,
"learning_rate": 1.927784851421132e-05,
"loss": 1.2677,
"step": 248
},
{
"epoch": 0.18908400569530137,
"grad_norm": 0.75,
"learning_rate": 1.926817794546232e-05,
"loss": 1.3524,
"step": 249
},
{
"epoch": 0.18984337921214997,
"grad_norm": 0.859375,
"learning_rate": 1.9258445514320064e-05,
"loss": 1.4673,
"step": 250
},
{
"epoch": 0.19060275272899857,
"grad_norm": 0.70703125,
"learning_rate": 1.9248651285745708e-05,
"loss": 1.3484,
"step": 251
},
{
"epoch": 0.19136212624584717,
"grad_norm": 0.7109375,
"learning_rate": 1.9238795325112867e-05,
"loss": 1.3565,
"step": 252
},
{
"epoch": 0.19212149976269577,
"grad_norm": 0.625,
"learning_rate": 1.9228877698207227e-05,
"loss": 1.3004,
"step": 253
},
{
"epoch": 0.19288087327954437,
"grad_norm": 0.66796875,
"learning_rate": 1.921889847122605e-05,
"loss": 1.3457,
"step": 254
},
{
"epoch": 0.19364024679639297,
"grad_norm": 0.69921875,
"learning_rate": 1.9208857710777785e-05,
"loss": 1.314,
"step": 255
},
{
"epoch": 0.19439962031324157,
"grad_norm": 0.8046875,
"learning_rate": 1.9198755483881585e-05,
"loss": 1.4202,
"step": 256
},
{
"epoch": 0.19515899383009017,
"grad_norm": 0.59375,
"learning_rate": 1.9188591857966875e-05,
"loss": 1.3255,
"step": 257
},
{
"epoch": 0.19591836734693877,
"grad_norm": 0.828125,
"learning_rate": 1.917836690087291e-05,
"loss": 1.4397,
"step": 258
},
{
"epoch": 0.19667774086378736,
"grad_norm": 0.640625,
"learning_rate": 1.91680806808483e-05,
"loss": 1.3296,
"step": 259
},
{
"epoch": 0.19743711438063596,
"grad_norm": 0.5859375,
"learning_rate": 1.9157733266550577e-05,
"loss": 1.2916,
"step": 260
},
{
"epoch": 0.19819648789748456,
"grad_norm": 0.64453125,
"learning_rate": 1.914732472704572e-05,
"loss": 1.3308,
"step": 261
},
{
"epoch": 0.19895586141433316,
"grad_norm": 0.6484375,
"learning_rate": 1.9136855131807705e-05,
"loss": 1.3426,
"step": 262
},
{
"epoch": 0.19971523493118176,
"grad_norm": 0.5390625,
"learning_rate": 1.9126324550718036e-05,
"loss": 1.2745,
"step": 263
},
{
"epoch": 0.20047460844803036,
"grad_norm": 0.6015625,
"learning_rate": 1.911573305406528e-05,
"loss": 1.3073,
"step": 264
},
{
"epoch": 0.201233981964879,
"grad_norm": 0.578125,
"learning_rate": 1.9105080712544603e-05,
"loss": 1.2674,
"step": 265
},
{
"epoch": 0.2019933554817276,
"grad_norm": 0.53515625,
"learning_rate": 1.909436759725728e-05,
"loss": 1.3087,
"step": 266
},
{
"epoch": 0.2027527289985762,
"grad_norm": 0.56640625,
"learning_rate": 1.908359377971025e-05,
"loss": 1.284,
"step": 267
},
{
"epoch": 0.2035121025154248,
"grad_norm": 0.5625,
"learning_rate": 1.9072759331815602e-05,
"loss": 1.2451,
"step": 268
},
{
"epoch": 0.20427147603227339,
"grad_norm": 0.5703125,
"learning_rate": 1.9061864325890132e-05,
"loss": 1.2624,
"step": 269
},
{
"epoch": 0.20503084954912199,
"grad_norm": 0.515625,
"learning_rate": 1.9050908834654834e-05,
"loss": 1.2392,
"step": 270
},
{
"epoch": 0.20579022306597058,
"grad_norm": 0.546875,
"learning_rate": 1.9039892931234434e-05,
"loss": 1.2405,
"step": 271
},
{
"epoch": 0.20654959658281918,
"grad_norm": 0.5625,
"learning_rate": 1.902881668915688e-05,
"loss": 1.2509,
"step": 272
},
{
"epoch": 0.20730897009966778,
"grad_norm": 0.5625,
"learning_rate": 1.9017680182352866e-05,
"loss": 1.3047,
"step": 273
},
{
"epoch": 0.20806834361651638,
"grad_norm": 0.68359375,
"learning_rate": 1.9006483485155338e-05,
"loss": 1.3492,
"step": 274
},
{
"epoch": 0.20882771713336498,
"grad_norm": 0.54296875,
"learning_rate": 1.8995226672298993e-05,
"loss": 1.2451,
"step": 275
},
{
"epoch": 0.20958709065021358,
"grad_norm": 0.62890625,
"learning_rate": 1.898390981891979e-05,
"loss": 1.3577,
"step": 276
},
{
"epoch": 0.21034646416706218,
"grad_norm": 0.58984375,
"learning_rate": 1.897253300055443e-05,
"loss": 1.3152,
"step": 277
},
{
"epoch": 0.21110583768391078,
"grad_norm": 0.58203125,
"learning_rate": 1.896109629313987e-05,
"loss": 1.3153,
"step": 278
},
{
"epoch": 0.21186521120075938,
"grad_norm": 0.60546875,
"learning_rate": 1.8949599773012808e-05,
"loss": 1.3153,
"step": 279
},
{
"epoch": 0.21262458471760798,
"grad_norm": 0.578125,
"learning_rate": 1.8938043516909173e-05,
"loss": 1.2932,
"step": 280
},
{
"epoch": 0.21338395823445658,
"grad_norm": 0.50390625,
"learning_rate": 1.892642760196361e-05,
"loss": 1.2294,
"step": 281
},
{
"epoch": 0.21414333175130518,
"grad_norm": 0.64453125,
"learning_rate": 1.891475210570898e-05,
"loss": 1.3246,
"step": 282
},
{
"epoch": 0.21490270526815378,
"grad_norm": 0.51953125,
"learning_rate": 1.890301710607582e-05,
"loss": 1.2312,
"step": 283
},
{
"epoch": 0.21566207878500238,
"grad_norm": 0.4609375,
"learning_rate": 1.8891222681391853e-05,
"loss": 1.2243,
"step": 284
},
{
"epoch": 0.21642145230185098,
"grad_norm": 0.5234375,
"learning_rate": 1.8879368910381423e-05,
"loss": 1.2593,
"step": 285
},
{
"epoch": 0.21718082581869957,
"grad_norm": 0.640625,
"learning_rate": 1.8867455872165006e-05,
"loss": 1.3375,
"step": 286
},
{
"epoch": 0.21794019933554817,
"grad_norm": 0.51171875,
"learning_rate": 1.8855483646258677e-05,
"loss": 1.2492,
"step": 287
},
{
"epoch": 0.21869957285239677,
"grad_norm": 0.61328125,
"learning_rate": 1.8843452312573557e-05,
"loss": 1.3306,
"step": 288
},
{
"epoch": 0.21945894636924537,
"grad_norm": 0.5546875,
"learning_rate": 1.8831361951415298e-05,
"loss": 1.2743,
"step": 289
},
{
"epoch": 0.22021831988609397,
"grad_norm": 0.6875,
"learning_rate": 1.881921264348355e-05,
"loss": 1.3699,
"step": 290
},
{
"epoch": 0.22097769340294257,
"grad_norm": 0.63671875,
"learning_rate": 1.880700446987141e-05,
"loss": 1.3548,
"step": 291
},
{
"epoch": 0.22173706691979117,
"grad_norm": 0.671875,
"learning_rate": 1.879473751206489e-05,
"loss": 1.3974,
"step": 292
},
{
"epoch": 0.22249644043663977,
"grad_norm": 0.5625,
"learning_rate": 1.8782411851942365e-05,
"loss": 1.29,
"step": 293
},
{
"epoch": 0.22325581395348837,
"grad_norm": 0.53125,
"learning_rate": 1.877002757177403e-05,
"loss": 1.2906,
"step": 294
},
{
"epoch": 0.22401518747033697,
"grad_norm": 0.462890625,
"learning_rate": 1.8757584754221363e-05,
"loss": 1.2135,
"step": 295
},
{
"epoch": 0.22477456098718557,
"grad_norm": 0.52734375,
"learning_rate": 1.8745083482336547e-05,
"loss": 1.3045,
"step": 296
},
{
"epoch": 0.22553393450403417,
"grad_norm": 0.5703125,
"learning_rate": 1.8732523839561934e-05,
"loss": 1.2641,
"step": 297
},
{
"epoch": 0.22629330802088277,
"grad_norm": 0.4921875,
"learning_rate": 1.8719905909729493e-05,
"loss": 1.2492,
"step": 298
},
{
"epoch": 0.22705268153773137,
"grad_norm": 0.5078125,
"learning_rate": 1.8707229777060242e-05,
"loss": 1.2867,
"step": 299
},
{
"epoch": 0.22781205505457996,
"grad_norm": 0.5390625,
"learning_rate": 1.869449552616367e-05,
"loss": 1.2946,
"step": 300
},
{
"epoch": 0.22857142857142856,
"grad_norm": 0.53125,
"learning_rate": 1.8681703242037208e-05,
"loss": 1.3014,
"step": 301
},
{
"epoch": 0.22933080208827716,
"grad_norm": 0.470703125,
"learning_rate": 1.8668853010065633e-05,
"loss": 1.2937,
"step": 302
},
{
"epoch": 0.23009017560512576,
"grad_norm": 0.57421875,
"learning_rate": 1.86559449160205e-05,
"loss": 1.2866,
"step": 303
},
{
"epoch": 0.23084954912197436,
"grad_norm": 0.51953125,
"learning_rate": 1.8642979046059595e-05,
"loss": 1.2542,
"step": 304
},
{
"epoch": 0.23160892263882296,
"grad_norm": 0.5078125,
"learning_rate": 1.8629955486726324e-05,
"loss": 1.2718,
"step": 305
},
{
"epoch": 0.23236829615567156,
"grad_norm": 0.5234375,
"learning_rate": 1.861687432494916e-05,
"loss": 1.2645,
"step": 306
},
{
"epoch": 0.23312766967252016,
"grad_norm": 0.54296875,
"learning_rate": 1.8603735648041054e-05,
"loss": 1.2895,
"step": 307
},
{
"epoch": 0.23388704318936876,
"grad_norm": 0.578125,
"learning_rate": 1.8590539543698852e-05,
"loss": 1.322,
"step": 308
},
{
"epoch": 0.23464641670621736,
"grad_norm": 0.49609375,
"learning_rate": 1.8577286100002723e-05,
"loss": 1.2584,
"step": 309
},
{
"epoch": 0.23540579022306596,
"grad_norm": 0.52734375,
"learning_rate": 1.856397540541554e-05,
"loss": 1.2814,
"step": 310
},
{
"epoch": 0.23616516373991456,
"grad_norm": 0.52734375,
"learning_rate": 1.855060754878233e-05,
"loss": 1.2865,
"step": 311
},
{
"epoch": 0.23692453725676316,
"grad_norm": 0.466796875,
"learning_rate": 1.853718261932964e-05,
"loss": 1.2597,
"step": 312
},
{
"epoch": 0.23768391077361178,
"grad_norm": 0.5078125,
"learning_rate": 1.852370070666498e-05,
"loss": 1.2556,
"step": 313
},
{
"epoch": 0.23844328429046038,
"grad_norm": 0.55078125,
"learning_rate": 1.8510161900776186e-05,
"loss": 1.304,
"step": 314
},
{
"epoch": 0.23920265780730898,
"grad_norm": 0.439453125,
"learning_rate": 1.8496566292030864e-05,
"loss": 1.2148,
"step": 315
},
{
"epoch": 0.23996203132415758,
"grad_norm": 0.515625,
"learning_rate": 1.8482913971175737e-05,
"loss": 1.2887,
"step": 316
},
{
"epoch": 0.24072140484100618,
"grad_norm": 0.57421875,
"learning_rate": 1.846920502933609e-05,
"loss": 1.3276,
"step": 317
},
{
"epoch": 0.24148077835785478,
"grad_norm": 0.474609375,
"learning_rate": 1.8455439558015117e-05,
"loss": 1.2681,
"step": 318
},
{
"epoch": 0.24224015187470338,
"grad_norm": 0.55078125,
"learning_rate": 1.8441617649093334e-05,
"loss": 1.2898,
"step": 319
},
{
"epoch": 0.24299952539155198,
"grad_norm": 0.5234375,
"learning_rate": 1.8427739394827976e-05,
"loss": 1.2785,
"step": 320
},
{
"epoch": 0.24375889890840058,
"grad_norm": 0.4765625,
"learning_rate": 1.8413804887852343e-05,
"loss": 1.1799,
"step": 321
},
{
"epoch": 0.24451827242524918,
"grad_norm": 0.45703125,
"learning_rate": 1.839981422117523e-05,
"loss": 1.1951,
"step": 322
},
{
"epoch": 0.24527764594209778,
"grad_norm": 0.546875,
"learning_rate": 1.8385767488180255e-05,
"loss": 1.3233,
"step": 323
},
{
"epoch": 0.24603701945894638,
"grad_norm": 0.451171875,
"learning_rate": 1.8371664782625287e-05,
"loss": 1.2204,
"step": 324
},
{
"epoch": 0.24679639297579498,
"grad_norm": 0.478515625,
"learning_rate": 1.8357506198641784e-05,
"loss": 1.2763,
"step": 325
},
{
"epoch": 0.24755576649264358,
"grad_norm": 0.578125,
"learning_rate": 1.8343291830734176e-05,
"loss": 1.3397,
"step": 326
},
{
"epoch": 0.24831514000949217,
"grad_norm": 0.52734375,
"learning_rate": 1.8329021773779242e-05,
"loss": 1.3029,
"step": 327
},
{
"epoch": 0.24907451352634077,
"grad_norm": 0.54296875,
"learning_rate": 1.8314696123025456e-05,
"loss": 1.2977,
"step": 328
},
{
"epoch": 0.24983388704318937,
"grad_norm": 0.5078125,
"learning_rate": 1.8300314974092372e-05,
"loss": 1.2915,
"step": 329
},
{
"epoch": 0.25059326056003794,
"grad_norm": 0.478515625,
"learning_rate": 1.8285878422969982e-05,
"loss": 1.2278,
"step": 330
},
{
"epoch": 0.25135263407688657,
"grad_norm": 0.421875,
"learning_rate": 1.827138656601807e-05,
"loss": 1.2337,
"step": 331
},
{
"epoch": 0.25211200759373514,
"grad_norm": 0.52734375,
"learning_rate": 1.825683949996556e-05,
"loss": 1.2978,
"step": 332
},
{
"epoch": 0.25287138111058377,
"grad_norm": 0.5390625,
"learning_rate": 1.8242237321909895e-05,
"loss": 1.2512,
"step": 333
},
{
"epoch": 0.25363075462743234,
"grad_norm": 0.49609375,
"learning_rate": 1.8227580129316368e-05,
"loss": 1.2702,
"step": 334
},
{
"epoch": 0.25439012814428097,
"grad_norm": 0.4609375,
"learning_rate": 1.821286802001747e-05,
"loss": 1.2253,
"step": 335
},
{
"epoch": 0.25514950166112954,
"grad_norm": 0.5390625,
"learning_rate": 1.819810109221227e-05,
"loss": 1.2708,
"step": 336
},
{
"epoch": 0.25590887517797817,
"grad_norm": 0.44140625,
"learning_rate": 1.81832794444657e-05,
"loss": 1.2157,
"step": 337
},
{
"epoch": 0.2566682486948268,
"grad_norm": 0.5703125,
"learning_rate": 1.8168403175707958e-05,
"loss": 1.3529,
"step": 338
},
{
"epoch": 0.25742762221167537,
"grad_norm": 0.390625,
"learning_rate": 1.815347238523381e-05,
"loss": 1.1796,
"step": 339
},
{
"epoch": 0.258186995728524,
"grad_norm": 0.466796875,
"learning_rate": 1.813848717270195e-05,
"loss": 1.2568,
"step": 340
},
{
"epoch": 0.25894636924537257,
"grad_norm": 0.490234375,
"learning_rate": 1.812344763813431e-05,
"loss": 1.2732,
"step": 341
},
{
"epoch": 0.2597057427622212,
"grad_norm": 0.44140625,
"learning_rate": 1.8108353881915403e-05,
"loss": 1.2737,
"step": 342
},
{
"epoch": 0.26046511627906976,
"grad_norm": 0.45703125,
"learning_rate": 1.8093206004791673e-05,
"loss": 1.2281,
"step": 343
},
{
"epoch": 0.2612244897959184,
"grad_norm": 0.546875,
"learning_rate": 1.8078004107870797e-05,
"loss": 1.3148,
"step": 344
},
{
"epoch": 0.26198386331276696,
"grad_norm": 0.44921875,
"learning_rate": 1.806274829262101e-05,
"loss": 1.2584,
"step": 345
},
{
"epoch": 0.2627432368296156,
"grad_norm": 0.5078125,
"learning_rate": 1.8047438660870447e-05,
"loss": 1.2665,
"step": 346
},
{
"epoch": 0.26350261034646416,
"grad_norm": 0.51171875,
"learning_rate": 1.803207531480645e-05,
"loss": 1.2892,
"step": 347
},
{
"epoch": 0.2642619838633128,
"grad_norm": 0.51953125,
"learning_rate": 1.8016658356974885e-05,
"loss": 1.2782,
"step": 348
},
{
"epoch": 0.26502135738016136,
"grad_norm": 0.46484375,
"learning_rate": 1.800118789027947e-05,
"loss": 1.2857,
"step": 349
},
{
"epoch": 0.26578073089701,
"grad_norm": 0.455078125,
"learning_rate": 1.798566401798106e-05,
"loss": 1.2529,
"step": 350
},
{
"epoch": 0.26654010441385856,
"grad_norm": 0.466796875,
"learning_rate": 1.7970086843697e-05,
"loss": 1.2445,
"step": 351
},
{
"epoch": 0.2672994779307072,
"grad_norm": 0.439453125,
"learning_rate": 1.7954456471400393e-05,
"loss": 1.2143,
"step": 352
},
{
"epoch": 0.26805885144755576,
"grad_norm": 0.421875,
"learning_rate": 1.793877300541944e-05,
"loss": 1.2444,
"step": 353
},
{
"epoch": 0.2688182249644044,
"grad_norm": 0.474609375,
"learning_rate": 1.7923036550436706e-05,
"loss": 1.2674,
"step": 354
},
{
"epoch": 0.26957759848125296,
"grad_norm": 0.5,
"learning_rate": 1.7907247211488456e-05,
"loss": 1.2926,
"step": 355
},
{
"epoch": 0.2703369719981016,
"grad_norm": 0.439453125,
"learning_rate": 1.789140509396394e-05,
"loss": 1.2125,
"step": 356
},
{
"epoch": 0.27109634551495015,
"grad_norm": 0.443359375,
"learning_rate": 1.7875510303604678e-05,
"loss": 1.1936,
"step": 357
},
{
"epoch": 0.2718557190317988,
"grad_norm": 0.462890625,
"learning_rate": 1.7859562946503787e-05,
"loss": 1.2251,
"step": 358
},
{
"epoch": 0.27261509254864735,
"grad_norm": 0.470703125,
"learning_rate": 1.784356312910523e-05,
"loss": 1.2829,
"step": 359
},
{
"epoch": 0.273374466065496,
"grad_norm": 0.44140625,
"learning_rate": 1.7827510958203147e-05,
"loss": 1.2277,
"step": 360
},
{
"epoch": 0.27413383958234455,
"grad_norm": 0.486328125,
"learning_rate": 1.78114065409411e-05,
"loss": 1.2715,
"step": 361
},
{
"epoch": 0.2748932130991932,
"grad_norm": 0.47265625,
"learning_rate": 1.7795249984811397e-05,
"loss": 1.2467,
"step": 362
},
{
"epoch": 0.27565258661604175,
"grad_norm": 0.455078125,
"learning_rate": 1.7779041397654355e-05,
"loss": 1.2529,
"step": 363
},
{
"epoch": 0.2764119601328904,
"grad_norm": 0.5,
"learning_rate": 1.7762780887657576e-05,
"loss": 1.2749,
"step": 364
},
{
"epoch": 0.27717133364973895,
"grad_norm": 0.40234375,
"learning_rate": 1.7746468563355243e-05,
"loss": 1.1978,
"step": 365
},
{
"epoch": 0.2779307071665876,
"grad_norm": 0.451171875,
"learning_rate": 1.773010453362737e-05,
"loss": 1.244,
"step": 366
},
{
"epoch": 0.27869008068343615,
"grad_norm": 0.53515625,
"learning_rate": 1.7713688907699107e-05,
"loss": 1.3013,
"step": 367
},
{
"epoch": 0.2794494542002848,
"grad_norm": 0.482421875,
"learning_rate": 1.769722179513998e-05,
"loss": 1.2608,
"step": 368
},
{
"epoch": 0.28020882771713335,
"grad_norm": 0.412109375,
"learning_rate": 1.7680703305863177e-05,
"loss": 1.1853,
"step": 369
},
{
"epoch": 0.280968201233982,
"grad_norm": 0.44140625,
"learning_rate": 1.7664133550124815e-05,
"loss": 1.2565,
"step": 370
},
{
"epoch": 0.28172757475083055,
"grad_norm": 0.41796875,
"learning_rate": 1.7647512638523193e-05,
"loss": 1.1891,
"step": 371
},
{
"epoch": 0.28248694826767917,
"grad_norm": 0.4375,
"learning_rate": 1.7630840681998068e-05,
"loss": 1.231,
"step": 372
},
{
"epoch": 0.28324632178452774,
"grad_norm": 0.490234375,
"learning_rate": 1.7614117791829897e-05,
"loss": 1.2935,
"step": 373
},
{
"epoch": 0.28400569530137637,
"grad_norm": 0.52734375,
"learning_rate": 1.759734407963911e-05,
"loss": 1.2953,
"step": 374
},
{
"epoch": 0.28476506881822494,
"grad_norm": 0.482421875,
"learning_rate": 1.7580519657385368e-05,
"loss": 1.2782,
"step": 375
},
{
"epoch": 0.28552444233507357,
"grad_norm": 0.55859375,
"learning_rate": 1.7563644637366786e-05,
"loss": 1.333,
"step": 376
},
{
"epoch": 0.28628381585192214,
"grad_norm": 0.515625,
"learning_rate": 1.754671913221923e-05,
"loss": 1.2813,
"step": 377
},
{
"epoch": 0.28704318936877077,
"grad_norm": 0.498046875,
"learning_rate": 1.752974325491551e-05,
"loss": 1.2581,
"step": 378
},
{
"epoch": 0.28780256288561934,
"grad_norm": 0.400390625,
"learning_rate": 1.7512717118764687e-05,
"loss": 1.2302,
"step": 379
},
{
"epoch": 0.28856193640246797,
"grad_norm": 0.466796875,
"learning_rate": 1.7495640837411265e-05,
"loss": 1.2359,
"step": 380
},
{
"epoch": 0.28932130991931654,
"grad_norm": 0.51953125,
"learning_rate": 1.747851452483445e-05,
"loss": 1.2548,
"step": 381
},
{
"epoch": 0.29008068343616517,
"grad_norm": 0.47265625,
"learning_rate": 1.7461338295347404e-05,
"loss": 1.2752,
"step": 382
},
{
"epoch": 0.29084005695301374,
"grad_norm": 0.392578125,
"learning_rate": 1.7444112263596474e-05,
"loss": 1.2092,
"step": 383
},
{
"epoch": 0.29159943046986236,
"grad_norm": 0.51171875,
"learning_rate": 1.74268365445604e-05,
"loss": 1.3045,
"step": 384
},
{
"epoch": 0.292358803986711,
"grad_norm": 0.46484375,
"learning_rate": 1.7409511253549592e-05,
"loss": 1.2586,
"step": 385
},
{
"epoch": 0.29311817750355956,
"grad_norm": 0.439453125,
"learning_rate": 1.7392136506205332e-05,
"loss": 1.1966,
"step": 386
},
{
"epoch": 0.2938775510204082,
"grad_norm": 0.408203125,
"learning_rate": 1.7374712418498997e-05,
"loss": 1.1853,
"step": 387
},
{
"epoch": 0.29463692453725676,
"grad_norm": 0.439453125,
"learning_rate": 1.735723910673132e-05,
"loss": 1.2408,
"step": 388
},
{
"epoch": 0.2953962980541054,
"grad_norm": 0.412109375,
"learning_rate": 1.7339716687531564e-05,
"loss": 1.163,
"step": 389
},
{
"epoch": 0.29615567157095396,
"grad_norm": 0.50390625,
"learning_rate": 1.7322145277856793e-05,
"loss": 1.2941,
"step": 390
},
{
"epoch": 0.2969150450878026,
"grad_norm": 0.419921875,
"learning_rate": 1.7304524994991056e-05,
"loss": 1.2504,
"step": 391
},
{
"epoch": 0.29767441860465116,
"grad_norm": 0.470703125,
"learning_rate": 1.7286855956544616e-05,
"loss": 1.2842,
"step": 392
},
{
"epoch": 0.2984337921214998,
"grad_norm": 0.41796875,
"learning_rate": 1.726913828045317e-05,
"loss": 1.2403,
"step": 393
},
{
"epoch": 0.29919316563834836,
"grad_norm": 0.498046875,
"learning_rate": 1.725137208497705e-05,
"loss": 1.254,
"step": 394
},
{
"epoch": 0.299952539155197,
"grad_norm": 0.4609375,
"learning_rate": 1.7233557488700453e-05,
"loss": 1.2395,
"step": 395
},
{
"epoch": 0.30071191267204556,
"grad_norm": 0.53125,
"learning_rate": 1.7215694610530624e-05,
"loss": 1.2705,
"step": 396
},
{
"epoch": 0.3014712861888942,
"grad_norm": 0.416015625,
"learning_rate": 1.7197783569697084e-05,
"loss": 1.2212,
"step": 397
},
{
"epoch": 0.30223065970574275,
"grad_norm": 0.51171875,
"learning_rate": 1.7179824485750824e-05,
"loss": 1.2975,
"step": 398
},
{
"epoch": 0.3029900332225914,
"grad_norm": 0.43359375,
"learning_rate": 1.7161817478563504e-05,
"loss": 1.2402,
"step": 399
},
{
"epoch": 0.30374940673943995,
"grad_norm": 0.423828125,
"learning_rate": 1.7143762668326667e-05,
"loss": 1.2287,
"step": 400
},
{
"epoch": 0.3045087802562886,
"grad_norm": 0.451171875,
"learning_rate": 1.712566017555092e-05,
"loss": 1.2097,
"step": 401
},
{
"epoch": 0.30526815377313715,
"grad_norm": 0.51953125,
"learning_rate": 1.7107510121065138e-05,
"loss": 1.3114,
"step": 402
},
{
"epoch": 0.3060275272899858,
"grad_norm": 0.423828125,
"learning_rate": 1.7089312626015663e-05,
"loss": 1.2468,
"step": 403
},
{
"epoch": 0.30678690080683435,
"grad_norm": 0.384765625,
"learning_rate": 1.7071067811865477e-05,
"loss": 1.1837,
"step": 404
},
{
"epoch": 0.307546274323683,
"grad_norm": 0.470703125,
"learning_rate": 1.7052775800393415e-05,
"loss": 1.238,
"step": 405
},
{
"epoch": 0.30830564784053155,
"grad_norm": 0.39453125,
"learning_rate": 1.703443671369333e-05,
"loss": 1.217,
"step": 406
},
{
"epoch": 0.3090650213573802,
"grad_norm": 0.384765625,
"learning_rate": 1.7016050674173304e-05,
"loss": 1.2202,
"step": 407
},
{
"epoch": 0.30982439487422875,
"grad_norm": 0.45703125,
"learning_rate": 1.69976178045548e-05,
"loss": 1.2238,
"step": 408
},
{
"epoch": 0.3105837683910774,
"grad_norm": 0.435546875,
"learning_rate": 1.6979138227871858e-05,
"loss": 1.2318,
"step": 409
},
{
"epoch": 0.31134314190792595,
"grad_norm": 0.453125,
"learning_rate": 1.696061206747029e-05,
"loss": 1.2208,
"step": 410
},
{
"epoch": 0.3121025154247746,
"grad_norm": 0.453125,
"learning_rate": 1.6942039447006823e-05,
"loss": 1.2223,
"step": 411
},
{
"epoch": 0.31286188894162315,
"grad_norm": 0.33984375,
"learning_rate": 1.6923420490448298e-05,
"loss": 1.1626,
"step": 412
},
{
"epoch": 0.3136212624584718,
"grad_norm": 0.5625,
"learning_rate": 1.6904755322070846e-05,
"loss": 1.2768,
"step": 413
},
{
"epoch": 0.31438063597532034,
"grad_norm": 0.462890625,
"learning_rate": 1.688604406645903e-05,
"loss": 1.2694,
"step": 414
},
{
"epoch": 0.31514000949216897,
"grad_norm": 0.39453125,
"learning_rate": 1.686728684850505e-05,
"loss": 1.1856,
"step": 415
},
{
"epoch": 0.31589938300901754,
"grad_norm": 0.380859375,
"learning_rate": 1.6848483793407874e-05,
"loss": 1.2184,
"step": 416
},
{
"epoch": 0.31665875652586617,
"grad_norm": 0.361328125,
"learning_rate": 1.6829635026672432e-05,
"loss": 1.1899,
"step": 417
},
{
"epoch": 0.31741813004271474,
"grad_norm": 0.44921875,
"learning_rate": 1.6810740674108763e-05,
"loss": 1.2078,
"step": 418
},
{
"epoch": 0.31817750355956337,
"grad_norm": 0.46484375,
"learning_rate": 1.6791800861831176e-05,
"loss": 1.2226,
"step": 419
},
{
"epoch": 0.31893687707641194,
"grad_norm": 0.404296875,
"learning_rate": 1.6772815716257414e-05,
"loss": 1.2044,
"step": 420
},
{
"epoch": 0.31969625059326057,
"grad_norm": 0.44921875,
"learning_rate": 1.6753785364107796e-05,
"loss": 1.2699,
"step": 421
},
{
"epoch": 0.32045562411010914,
"grad_norm": 0.37109375,
"learning_rate": 1.6734709932404404e-05,
"loss": 1.1732,
"step": 422
},
{
"epoch": 0.32121499762695777,
"grad_norm": 0.48046875,
"learning_rate": 1.6715589548470187e-05,
"loss": 1.2655,
"step": 423
},
{
"epoch": 0.32197437114380634,
"grad_norm": 0.40625,
"learning_rate": 1.6696424339928153e-05,
"loss": 1.2044,
"step": 424
},
{
"epoch": 0.32273374466065496,
"grad_norm": 0.427734375,
"learning_rate": 1.6677214434700495e-05,
"loss": 1.2083,
"step": 425
},
{
"epoch": 0.32349311817750354,
"grad_norm": 0.4453125,
"learning_rate": 1.665795996100775e-05,
"loss": 1.2273,
"step": 426
},
{
"epoch": 0.32425249169435216,
"grad_norm": 0.5,
"learning_rate": 1.663866104736793e-05,
"loss": 1.2407,
"step": 427
},
{
"epoch": 0.32501186521120073,
"grad_norm": 0.390625,
"learning_rate": 1.6619317822595666e-05,
"loss": 1.2166,
"step": 428
},
{
"epoch": 0.32577123872804936,
"grad_norm": 0.42578125,
"learning_rate": 1.6599930415801374e-05,
"loss": 1.238,
"step": 429
},
{
"epoch": 0.32653061224489793,
"grad_norm": 0.390625,
"learning_rate": 1.658049895639034e-05,
"loss": 1.1813,
"step": 430
},
{
"epoch": 0.32728998576174656,
"grad_norm": 0.423828125,
"learning_rate": 1.6561023574061925e-05,
"loss": 1.2264,
"step": 431
},
{
"epoch": 0.32804935927859513,
"grad_norm": 0.40625,
"learning_rate": 1.6541504398808633e-05,
"loss": 1.2364,
"step": 432
},
{
"epoch": 0.32880873279544376,
"grad_norm": 0.44921875,
"learning_rate": 1.6521941560915284e-05,
"loss": 1.2339,
"step": 433
},
{
"epoch": 0.3295681063122924,
"grad_norm": 0.4765625,
"learning_rate": 1.6502335190958135e-05,
"loss": 1.2952,
"step": 434
},
{
"epoch": 0.33032747982914096,
"grad_norm": 0.380859375,
"learning_rate": 1.648268541980401e-05,
"loss": 1.195,
"step": 435
},
{
"epoch": 0.3310868533459896,
"grad_norm": 0.490234375,
"learning_rate": 1.646299237860941e-05,
"loss": 1.2866,
"step": 436
},
{
"epoch": 0.33184622686283816,
"grad_norm": 0.392578125,
"learning_rate": 1.6443256198819665e-05,
"loss": 1.2219,
"step": 437
},
{
"epoch": 0.3326056003796868,
"grad_norm": 0.427734375,
"learning_rate": 1.6423477012168038e-05,
"loss": 1.2458,
"step": 438
},
{
"epoch": 0.33336497389653535,
"grad_norm": 0.384765625,
"learning_rate": 1.640365495067485e-05,
"loss": 1.21,
"step": 439
},
{
"epoch": 0.334124347413384,
"grad_norm": 0.416015625,
"learning_rate": 1.638379014664659e-05,
"loss": 1.2286,
"step": 440
},
{
"epoch": 0.33488372093023255,
"grad_norm": 0.40625,
"learning_rate": 1.636388273267506e-05,
"loss": 1.1945,
"step": 441
},
{
"epoch": 0.3356430944470812,
"grad_norm": 0.5078125,
"learning_rate": 1.6343932841636455e-05,
"loss": 1.3204,
"step": 442
},
{
"epoch": 0.33640246796392975,
"grad_norm": 0.423828125,
"learning_rate": 1.63239406066905e-05,
"loss": 1.2361,
"step": 443
},
{
"epoch": 0.3371618414807784,
"grad_norm": 0.40234375,
"learning_rate": 1.6303906161279554e-05,
"loss": 1.1951,
"step": 444
},
{
"epoch": 0.33792121499762695,
"grad_norm": 0.43359375,
"learning_rate": 1.6283829639127705e-05,
"loss": 1.2686,
"step": 445
},
{
"epoch": 0.3386805885144756,
"grad_norm": 0.482421875,
"learning_rate": 1.6263711174239914e-05,
"loss": 1.264,
"step": 446
},
{
"epoch": 0.33943996203132415,
"grad_norm": 0.4375,
"learning_rate": 1.6243550900901076e-05,
"loss": 1.2668,
"step": 447
},
{
"epoch": 0.3401993355481728,
"grad_norm": 0.408203125,
"learning_rate": 1.6223348953675163e-05,
"loss": 1.1683,
"step": 448
},
{
"epoch": 0.34095870906502135,
"grad_norm": 0.408203125,
"learning_rate": 1.6203105467404284e-05,
"loss": 1.2147,
"step": 449
},
{
"epoch": 0.34171808258187,
"grad_norm": 0.400390625,
"learning_rate": 1.6182820577207842e-05,
"loss": 1.2178,
"step": 450
},
{
"epoch": 0.34247745609871855,
"grad_norm": 0.408203125,
"learning_rate": 1.6162494418481574e-05,
"loss": 1.2321,
"step": 451
},
{
"epoch": 0.3432368296155672,
"grad_norm": 0.447265625,
"learning_rate": 1.6142127126896682e-05,
"loss": 1.2495,
"step": 452
},
{
"epoch": 0.34399620313241575,
"grad_norm": 0.38671875,
"learning_rate": 1.612171883839891e-05,
"loss": 1.1807,
"step": 453
},
{
"epoch": 0.3447555766492644,
"grad_norm": 0.38671875,
"learning_rate": 1.6101269689207656e-05,
"loss": 1.1941,
"step": 454
},
{
"epoch": 0.34551495016611294,
"grad_norm": 0.369140625,
"learning_rate": 1.6080779815815043e-05,
"loss": 1.2159,
"step": 455
},
{
"epoch": 0.34627432368296157,
"grad_norm": 0.412109375,
"learning_rate": 1.6060249354985023e-05,
"loss": 1.222,
"step": 456
},
{
"epoch": 0.34703369719981014,
"grad_norm": 0.44140625,
"learning_rate": 1.603967844375245e-05,
"loss": 1.2526,
"step": 457
},
{
"epoch": 0.34779307071665877,
"grad_norm": 0.3671875,
"learning_rate": 1.6019067219422178e-05,
"loss": 1.1691,
"step": 458
},
{
"epoch": 0.34855244423350734,
"grad_norm": 0.390625,
"learning_rate": 1.5998415819568135e-05,
"loss": 1.1933,
"step": 459
},
{
"epoch": 0.34931181775035597,
"grad_norm": 0.50390625,
"learning_rate": 1.597772438203241e-05,
"loss": 1.2525,
"step": 460
},
{
"epoch": 0.35007119126720454,
"grad_norm": 0.38671875,
"learning_rate": 1.5956993044924334e-05,
"loss": 1.2022,
"step": 461
},
{
"epoch": 0.35083056478405317,
"grad_norm": 0.470703125,
"learning_rate": 1.593622194661956e-05,
"loss": 1.2853,
"step": 462
},
{
"epoch": 0.35158993830090174,
"grad_norm": 0.466796875,
"learning_rate": 1.5915411225759122e-05,
"loss": 1.3113,
"step": 463
},
{
"epoch": 0.35234931181775037,
"grad_norm": 0.462890625,
"learning_rate": 1.5894561021248535e-05,
"loss": 1.246,
"step": 464
},
{
"epoch": 0.35310868533459894,
"grad_norm": 0.376953125,
"learning_rate": 1.5873671472256854e-05,
"loss": 1.1929,
"step": 465
},
{
"epoch": 0.35386805885144756,
"grad_norm": 0.416015625,
"learning_rate": 1.5852742718215743e-05,
"loss": 1.2469,
"step": 466
},
{
"epoch": 0.35462743236829614,
"grad_norm": 0.35546875,
"learning_rate": 1.5831774898818558e-05,
"loss": 1.1592,
"step": 467
},
{
"epoch": 0.35538680588514476,
"grad_norm": 0.40625,
"learning_rate": 1.5810768154019386e-05,
"loss": 1.2145,
"step": 468
},
{
"epoch": 0.35614617940199333,
"grad_norm": 0.400390625,
"learning_rate": 1.5789722624032143e-05,
"loss": 1.1859,
"step": 469
},
{
"epoch": 0.35690555291884196,
"grad_norm": 0.423828125,
"learning_rate": 1.576863844932963e-05,
"loss": 1.2184,
"step": 470
},
{
"epoch": 0.35766492643569053,
"grad_norm": 0.435546875,
"learning_rate": 1.5747515770642582e-05,
"loss": 1.2126,
"step": 471
},
{
"epoch": 0.35842429995253916,
"grad_norm": 0.443359375,
"learning_rate": 1.5726354728958736e-05,
"loss": 1.2569,
"step": 472
},
{
"epoch": 0.35918367346938773,
"grad_norm": 0.39453125,
"learning_rate": 1.570515546552189e-05,
"loss": 1.2173,
"step": 473
},
{
"epoch": 0.35994304698623636,
"grad_norm": 0.400390625,
"learning_rate": 1.568391812183097e-05,
"loss": 1.1995,
"step": 474
},
{
"epoch": 0.36070242050308493,
"grad_norm": 0.40234375,
"learning_rate": 1.566264283963907e-05,
"loss": 1.238,
"step": 475
},
{
"epoch": 0.36146179401993356,
"grad_norm": 0.400390625,
"learning_rate": 1.5641329760952514e-05,
"loss": 1.2179,
"step": 476
},
{
"epoch": 0.36222116753678213,
"grad_norm": 0.41015625,
"learning_rate": 1.5619979028029898e-05,
"loss": 1.2148,
"step": 477
},
{
"epoch": 0.36298054105363076,
"grad_norm": 0.3828125,
"learning_rate": 1.5598590783381165e-05,
"loss": 1.201,
"step": 478
},
{
"epoch": 0.36373991457047933,
"grad_norm": 0.40234375,
"learning_rate": 1.5577165169766627e-05,
"loss": 1.2383,
"step": 479
},
{
"epoch": 0.36449928808732796,
"grad_norm": 0.396484375,
"learning_rate": 1.5555702330196024e-05,
"loss": 1.2399,
"step": 480
},
{
"epoch": 0.3652586616041765,
"grad_norm": 0.41015625,
"learning_rate": 1.5534202407927574e-05,
"loss": 1.2565,
"step": 481
},
{
"epoch": 0.36601803512102515,
"grad_norm": 0.41796875,
"learning_rate": 1.5512665546467008e-05,
"loss": 1.2256,
"step": 482
},
{
"epoch": 0.3667774086378738,
"grad_norm": 0.38671875,
"learning_rate": 1.549109188956661e-05,
"loss": 1.1796,
"step": 483
},
{
"epoch": 0.36753678215472235,
"grad_norm": 0.404296875,
"learning_rate": 1.5469481581224274e-05,
"loss": 1.2004,
"step": 484
},
{
"epoch": 0.368296155671571,
"grad_norm": 0.369140625,
"learning_rate": 1.5447834765682515e-05,
"loss": 1.1787,
"step": 485
},
{
"epoch": 0.36905552918841955,
"grad_norm": 0.337890625,
"learning_rate": 1.5426151587427548e-05,
"loss": 1.1656,
"step": 486
},
{
"epoch": 0.3698149027052682,
"grad_norm": 0.376953125,
"learning_rate": 1.540443219118827e-05,
"loss": 1.1887,
"step": 487
},
{
"epoch": 0.37057427622211675,
"grad_norm": 0.427734375,
"learning_rate": 1.5382676721935344e-05,
"loss": 1.2309,
"step": 488
},
{
"epoch": 0.3713336497389654,
"grad_norm": 0.365234375,
"learning_rate": 1.5360885324880205e-05,
"loss": 1.1869,
"step": 489
},
{
"epoch": 0.37209302325581395,
"grad_norm": 0.4296875,
"learning_rate": 1.5339058145474086e-05,
"loss": 1.2477,
"step": 490
},
{
"epoch": 0.3728523967726626,
"grad_norm": 0.412109375,
"learning_rate": 1.5317195329407067e-05,
"loss": 1.2257,
"step": 491
},
{
"epoch": 0.37361177028951115,
"grad_norm": 0.4140625,
"learning_rate": 1.529529702260709e-05,
"loss": 1.2565,
"step": 492
},
{
"epoch": 0.3743711438063598,
"grad_norm": 0.41015625,
"learning_rate": 1.5273363371238983e-05,
"loss": 1.1869,
"step": 493
},
{
"epoch": 0.37513051732320835,
"grad_norm": 0.3828125,
"learning_rate": 1.5251394521703496e-05,
"loss": 1.2229,
"step": 494
},
{
"epoch": 0.375889890840057,
"grad_norm": 0.427734375,
"learning_rate": 1.5229390620636309e-05,
"loss": 1.2105,
"step": 495
},
{
"epoch": 0.37664926435690554,
"grad_norm": 0.37890625,
"learning_rate": 1.5207351814907068e-05,
"loss": 1.2271,
"step": 496
},
{
"epoch": 0.37740863787375417,
"grad_norm": 0.361328125,
"learning_rate": 1.5185278251618391e-05,
"loss": 1.1995,
"step": 497
},
{
"epoch": 0.37816801139060274,
"grad_norm": 0.318359375,
"learning_rate": 1.51631700781049e-05,
"loss": 1.1512,
"step": 498
},
{
"epoch": 0.37892738490745137,
"grad_norm": 0.384765625,
"learning_rate": 1.5141027441932217e-05,
"loss": 1.2129,
"step": 499
},
{
"epoch": 0.37968675842429994,
"grad_norm": 0.44921875,
"learning_rate": 1.5118850490896012e-05,
"loss": 1.2336,
"step": 500
},
{
"epoch": 0.38044613194114857,
"grad_norm": 0.388671875,
"learning_rate": 1.5096639373020976e-05,
"loss": 1.1947,
"step": 501
},
{
"epoch": 0.38120550545799714,
"grad_norm": 0.373046875,
"learning_rate": 1.5074394236559871e-05,
"loss": 1.2024,
"step": 502
},
{
"epoch": 0.38196487897484577,
"grad_norm": 0.3828125,
"learning_rate": 1.5052115229992512e-05,
"loss": 1.2024,
"step": 503
},
{
"epoch": 0.38272425249169434,
"grad_norm": 0.41796875,
"learning_rate": 1.5029802502024788e-05,
"loss": 1.2601,
"step": 504
},
{
"epoch": 0.38348362600854297,
"grad_norm": 0.373046875,
"learning_rate": 1.5007456201587676e-05,
"loss": 1.2082,
"step": 505
},
{
"epoch": 0.38424299952539154,
"grad_norm": 0.357421875,
"learning_rate": 1.4985076477836232e-05,
"loss": 1.1751,
"step": 506
},
{
"epoch": 0.38500237304224016,
"grad_norm": 0.34375,
"learning_rate": 1.4962663480148606e-05,
"loss": 1.1682,
"step": 507
},
{
"epoch": 0.38576174655908874,
"grad_norm": 0.400390625,
"learning_rate": 1.4940217358125042e-05,
"loss": 1.222,
"step": 508
},
{
"epoch": 0.38652112007593736,
"grad_norm": 0.376953125,
"learning_rate": 1.4917738261586878e-05,
"loss": 1.1834,
"step": 509
},
{
"epoch": 0.38728049359278593,
"grad_norm": 0.38671875,
"learning_rate": 1.489522634057555e-05,
"loss": 1.1874,
"step": 510
},
{
"epoch": 0.38803986710963456,
"grad_norm": 0.41015625,
"learning_rate": 1.4872681745351582e-05,
"loss": 1.2168,
"step": 511
},
{
"epoch": 0.38879924062648313,
"grad_norm": 0.44921875,
"learning_rate": 1.4850104626393598e-05,
"loss": 1.2838,
"step": 512
},
{
"epoch": 0.38955861414333176,
"grad_norm": 0.39453125,
"learning_rate": 1.4827495134397298e-05,
"loss": 1.1814,
"step": 513
},
{
"epoch": 0.39031798766018033,
"grad_norm": 0.421875,
"learning_rate": 1.4804853420274471e-05,
"loss": 1.2424,
"step": 514
},
{
"epoch": 0.39107736117702896,
"grad_norm": 0.48046875,
"learning_rate": 1.4782179635151978e-05,
"loss": 1.2785,
"step": 515
},
{
"epoch": 0.39183673469387753,
"grad_norm": 0.41015625,
"learning_rate": 1.4759473930370738e-05,
"loss": 1.2162,
"step": 516
},
{
"epoch": 0.39259610821072616,
"grad_norm": 0.3828125,
"learning_rate": 1.473673645748473e-05,
"loss": 1.2142,
"step": 517
},
{
"epoch": 0.39335548172757473,
"grad_norm": 0.3984375,
"learning_rate": 1.4713967368259981e-05,
"loss": 1.2056,
"step": 518
},
{
"epoch": 0.39411485524442336,
"grad_norm": 0.427734375,
"learning_rate": 1.469116681467353e-05,
"loss": 1.2555,
"step": 519
},
{
"epoch": 0.39487422876127193,
"grad_norm": 0.353515625,
"learning_rate": 1.4668334948912455e-05,
"loss": 1.1837,
"step": 520
},
{
"epoch": 0.39563360227812056,
"grad_norm": 0.390625,
"learning_rate": 1.4645471923372818e-05,
"loss": 1.192,
"step": 521
},
{
"epoch": 0.3963929757949691,
"grad_norm": 0.3828125,
"learning_rate": 1.4622577890658668e-05,
"loss": 1.2303,
"step": 522
},
{
"epoch": 0.39715234931181775,
"grad_norm": 0.439453125,
"learning_rate": 1.4599653003581016e-05,
"loss": 1.2871,
"step": 523
},
{
"epoch": 0.3979117228286663,
"grad_norm": 0.404296875,
"learning_rate": 1.4576697415156818e-05,
"loss": 1.2274,
"step": 524
},
{
"epoch": 0.39867109634551495,
"grad_norm": 0.408203125,
"learning_rate": 1.4553711278607953e-05,
"loss": 1.2148,
"step": 525
},
{
"epoch": 0.3994304698623635,
"grad_norm": 0.298828125,
"learning_rate": 1.4530694747360203e-05,
"loss": 1.123,
"step": 526
},
{
"epoch": 0.40018984337921215,
"grad_norm": 0.380859375,
"learning_rate": 1.4507647975042221e-05,
"loss": 1.1685,
"step": 527
},
{
"epoch": 0.4009492168960607,
"grad_norm": 0.388671875,
"learning_rate": 1.4484571115484508e-05,
"loss": 1.2304,
"step": 528
},
{
"epoch": 0.40170859041290935,
"grad_norm": 0.375,
"learning_rate": 1.44614643227184e-05,
"loss": 1.1826,
"step": 529
},
{
"epoch": 0.402467963929758,
"grad_norm": 0.369140625,
"learning_rate": 1.4438327750975009e-05,
"loss": 1.2434,
"step": 530
},
{
"epoch": 0.40322733744660655,
"grad_norm": 0.3671875,
"learning_rate": 1.4415161554684239e-05,
"loss": 1.177,
"step": 531
},
{
"epoch": 0.4039867109634552,
"grad_norm": 0.357421875,
"learning_rate": 1.4391965888473705e-05,
"loss": 1.1952,
"step": 532
},
{
"epoch": 0.40474608448030375,
"grad_norm": 0.4609375,
"learning_rate": 1.436874090716774e-05,
"loss": 1.2767,
"step": 533
},
{
"epoch": 0.4055054579971524,
"grad_norm": 0.408203125,
"learning_rate": 1.434548676578634e-05,
"loss": 1.2334,
"step": 534
},
{
"epoch": 0.40626483151400095,
"grad_norm": 0.376953125,
"learning_rate": 1.432220361954414e-05,
"loss": 1.1755,
"step": 535
},
{
"epoch": 0.4070242050308496,
"grad_norm": 0.32421875,
"learning_rate": 1.429889162384937e-05,
"loss": 1.1615,
"step": 536
},
{
"epoch": 0.40778357854769814,
"grad_norm": 0.408203125,
"learning_rate": 1.4275550934302822e-05,
"loss": 1.2221,
"step": 537
},
{
"epoch": 0.40854295206454677,
"grad_norm": 0.357421875,
"learning_rate": 1.4252181706696817e-05,
"loss": 1.2065,
"step": 538
},
{
"epoch": 0.40930232558139534,
"grad_norm": 0.388671875,
"learning_rate": 1.4228784097014156e-05,
"loss": 1.2361,
"step": 539
},
{
"epoch": 0.41006169909824397,
"grad_norm": 0.349609375,
"learning_rate": 1.4205358261427076e-05,
"loss": 1.1413,
"step": 540
},
{
"epoch": 0.41082107261509254,
"grad_norm": 0.34765625,
"learning_rate": 1.4181904356296225e-05,
"loss": 1.1597,
"step": 541
},
{
"epoch": 0.41158044613194117,
"grad_norm": 0.33984375,
"learning_rate": 1.4158422538169596e-05,
"loss": 1.1972,
"step": 542
},
{
"epoch": 0.41233981964878974,
"grad_norm": 0.365234375,
"learning_rate": 1.4134912963781501e-05,
"loss": 1.1908,
"step": 543
},
{
"epoch": 0.41309919316563837,
"grad_norm": 0.35546875,
"learning_rate": 1.4111375790051511e-05,
"loss": 1.2195,
"step": 544
},
{
"epoch": 0.41385856668248694,
"grad_norm": 0.439453125,
"learning_rate": 1.4087811174083422e-05,
"loss": 1.2675,
"step": 545
},
{
"epoch": 0.41461794019933557,
"grad_norm": 0.38671875,
"learning_rate": 1.4064219273164192e-05,
"loss": 1.2397,
"step": 546
},
{
"epoch": 0.41537731371618414,
"grad_norm": 0.37109375,
"learning_rate": 1.40406002447629e-05,
"loss": 1.1723,
"step": 547
},
{
"epoch": 0.41613668723303276,
"grad_norm": 0.361328125,
"learning_rate": 1.4016954246529697e-05,
"loss": 1.1875,
"step": 548
},
{
"epoch": 0.41689606074988134,
"grad_norm": 0.3984375,
"learning_rate": 1.3993281436294743e-05,
"loss": 1.1678,
"step": 549
},
{
"epoch": 0.41765543426672996,
"grad_norm": 0.44140625,
"learning_rate": 1.3969581972067166e-05,
"loss": 1.2402,
"step": 550
},
{
"epoch": 0.41841480778357854,
"grad_norm": 0.40625,
"learning_rate": 1.3945856012034003e-05,
"loss": 1.2136,
"step": 551
},
{
"epoch": 0.41917418130042716,
"grad_norm": 0.40625,
"learning_rate": 1.392210371455913e-05,
"loss": 1.1965,
"step": 552
},
{
"epoch": 0.41993355481727573,
"grad_norm": 0.4140625,
"learning_rate": 1.3898325238182235e-05,
"loss": 1.1927,
"step": 553
},
{
"epoch": 0.42069292833412436,
"grad_norm": 0.3671875,
"learning_rate": 1.3874520741617734e-05,
"loss": 1.2102,
"step": 554
},
{
"epoch": 0.42145230185097293,
"grad_norm": 0.41796875,
"learning_rate": 1.3850690383753718e-05,
"loss": 1.2486,
"step": 555
},
{
"epoch": 0.42221167536782156,
"grad_norm": 0.359375,
"learning_rate": 1.3826834323650899e-05,
"loss": 1.1525,
"step": 556
},
{
"epoch": 0.42297104888467013,
"grad_norm": 0.4140625,
"learning_rate": 1.3802952720541543e-05,
"loss": 1.2107,
"step": 557
},
{
"epoch": 0.42373042240151876,
"grad_norm": 0.41796875,
"learning_rate": 1.377904573382841e-05,
"loss": 1.22,
"step": 558
},
{
"epoch": 0.42448979591836733,
"grad_norm": 0.34765625,
"learning_rate": 1.3755113523083679e-05,
"loss": 1.1559,
"step": 559
},
{
"epoch": 0.42524916943521596,
"grad_norm": 0.361328125,
"learning_rate": 1.3731156248047903e-05,
"loss": 1.2233,
"step": 560
},
{
"epoch": 0.42600854295206453,
"grad_norm": 0.314453125,
"learning_rate": 1.3707174068628927e-05,
"loss": 1.1299,
"step": 561
},
{
"epoch": 0.42676791646891316,
"grad_norm": 0.361328125,
"learning_rate": 1.3683167144900833e-05,
"loss": 1.182,
"step": 562
},
{
"epoch": 0.4275272899857617,
"grad_norm": 0.400390625,
"learning_rate": 1.3659135637102845e-05,
"loss": 1.2002,
"step": 563
},
{
"epoch": 0.42828666350261035,
"grad_norm": 0.375,
"learning_rate": 1.3635079705638298e-05,
"loss": 1.2027,
"step": 564
},
{
"epoch": 0.4290460370194589,
"grad_norm": 0.359375,
"learning_rate": 1.3610999511073544e-05,
"loss": 1.1353,
"step": 565
},
{
"epoch": 0.42980541053630755,
"grad_norm": 0.349609375,
"learning_rate": 1.3586895214136875e-05,
"loss": 1.1544,
"step": 566
},
{
"epoch": 0.4305647840531561,
"grad_norm": 0.318359375,
"learning_rate": 1.3562766975717468e-05,
"loss": 1.1621,
"step": 567
},
{
"epoch": 0.43132415757000475,
"grad_norm": 0.30859375,
"learning_rate": 1.3538614956864297e-05,
"loss": 1.1351,
"step": 568
},
{
"epoch": 0.4320835310868533,
"grad_norm": 0.3828125,
"learning_rate": 1.3514439318785067e-05,
"loss": 1.2011,
"step": 569
},
{
"epoch": 0.43284290460370195,
"grad_norm": 0.34375,
"learning_rate": 1.3490240222845139e-05,
"loss": 1.1835,
"step": 570
},
{
"epoch": 0.4336022781205505,
"grad_norm": 0.392578125,
"learning_rate": 1.3466017830566433e-05,
"loss": 1.1919,
"step": 571
},
{
"epoch": 0.43436165163739915,
"grad_norm": 0.33203125,
"learning_rate": 1.3441772303626387e-05,
"loss": 1.1314,
"step": 572
},
{
"epoch": 0.4351210251542477,
"grad_norm": 0.34375,
"learning_rate": 1.3417503803856835e-05,
"loss": 1.1481,
"step": 573
},
{
"epoch": 0.43588039867109635,
"grad_norm": 0.36328125,
"learning_rate": 1.3393212493242964e-05,
"loss": 1.2217,
"step": 574
},
{
"epoch": 0.4366397721879449,
"grad_norm": 0.39453125,
"learning_rate": 1.3368898533922202e-05,
"loss": 1.1553,
"step": 575
},
{
"epoch": 0.43739914570479355,
"grad_norm": 0.41015625,
"learning_rate": 1.3344562088183166e-05,
"loss": 1.2189,
"step": 576
},
{
"epoch": 0.4381585192216421,
"grad_norm": 0.330078125,
"learning_rate": 1.3320203318464552e-05,
"loss": 1.1301,
"step": 577
},
{
"epoch": 0.43891789273849074,
"grad_norm": 0.40234375,
"learning_rate": 1.3295822387354071e-05,
"loss": 1.2088,
"step": 578
},
{
"epoch": 0.43967726625533937,
"grad_norm": 0.337890625,
"learning_rate": 1.3271419457587344e-05,
"loss": 1.1475,
"step": 579
},
{
"epoch": 0.44043663977218794,
"grad_norm": 0.33203125,
"learning_rate": 1.3246994692046837e-05,
"loss": 1.16,
"step": 580
},
{
"epoch": 0.44119601328903657,
"grad_norm": 0.384765625,
"learning_rate": 1.3222548253760756e-05,
"loss": 1.1764,
"step": 581
},
{
"epoch": 0.44195538680588514,
"grad_norm": 0.41015625,
"learning_rate": 1.319808030590197e-05,
"loss": 1.206,
"step": 582
},
{
"epoch": 0.44271476032273377,
"grad_norm": 0.32421875,
"learning_rate": 1.3173591011786917e-05,
"loss": 1.1696,
"step": 583
},
{
"epoch": 0.44347413383958234,
"grad_norm": 0.359375,
"learning_rate": 1.3149080534874519e-05,
"loss": 1.1935,
"step": 584
},
{
"epoch": 0.44423350735643097,
"grad_norm": 0.384765625,
"learning_rate": 1.3124549038765078e-05,
"loss": 1.1915,
"step": 585
},
{
"epoch": 0.44499288087327954,
"grad_norm": 0.326171875,
"learning_rate": 1.3099996687199203e-05,
"loss": 1.159,
"step": 586
},
{
"epoch": 0.44575225439012817,
"grad_norm": 0.39453125,
"learning_rate": 1.3075423644056699e-05,
"loss": 1.2283,
"step": 587
},
{
"epoch": 0.44651162790697674,
"grad_norm": 0.361328125,
"learning_rate": 1.305083007335549e-05,
"loss": 1.1949,
"step": 588
},
{
"epoch": 0.44727100142382537,
"grad_norm": 0.3359375,
"learning_rate": 1.3026216139250505e-05,
"loss": 1.1641,
"step": 589
},
{
"epoch": 0.44803037494067394,
"grad_norm": 0.375,
"learning_rate": 1.3001582006032601e-05,
"loss": 1.2071,
"step": 590
},
{
"epoch": 0.44878974845752256,
"grad_norm": 0.3671875,
"learning_rate": 1.2976927838127453e-05,
"loss": 1.16,
"step": 591
},
{
"epoch": 0.44954912197437114,
"grad_norm": 0.38671875,
"learning_rate": 1.2952253800094467e-05,
"loss": 1.2239,
"step": 592
},
{
"epoch": 0.45030849549121976,
"grad_norm": 0.361328125,
"learning_rate": 1.2927560056625672e-05,
"loss": 1.1955,
"step": 593
},
{
"epoch": 0.45106786900806833,
"grad_norm": 0.34375,
"learning_rate": 1.2902846772544625e-05,
"loss": 1.1833,
"step": 594
},
{
"epoch": 0.45182724252491696,
"grad_norm": 0.36328125,
"learning_rate": 1.2878114112805315e-05,
"loss": 1.212,
"step": 595
},
{
"epoch": 0.45258661604176553,
"grad_norm": 0.38671875,
"learning_rate": 1.2853362242491054e-05,
"loss": 1.1979,
"step": 596
},
{
"epoch": 0.45334598955861416,
"grad_norm": 0.3203125,
"learning_rate": 1.2828591326813382e-05,
"loss": 1.1222,
"step": 597
},
{
"epoch": 0.45410536307546273,
"grad_norm": 0.375,
"learning_rate": 1.2803801531110956e-05,
"loss": 1.1922,
"step": 598
},
{
"epoch": 0.45486473659231136,
"grad_norm": 0.361328125,
"learning_rate": 1.2778993020848457e-05,
"loss": 1.1596,
"step": 599
},
{
"epoch": 0.45562411010915993,
"grad_norm": 0.392578125,
"learning_rate": 1.2754165961615482e-05,
"loss": 1.2171,
"step": 600
},
{
"epoch": 0.45638348362600856,
"grad_norm": 0.376953125,
"learning_rate": 1.2729320519125426e-05,
"loss": 1.1937,
"step": 601
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.34375,
"learning_rate": 1.2704456859214397e-05,
"loss": 1.1604,
"step": 602
},
{
"epoch": 0.45790223065970576,
"grad_norm": 0.380859375,
"learning_rate": 1.2679575147840102e-05,
"loss": 1.1724,
"step": 603
},
{
"epoch": 0.4586616041765543,
"grad_norm": 0.353515625,
"learning_rate": 1.2654675551080724e-05,
"loss": 1.1699,
"step": 604
},
{
"epoch": 0.45942097769340295,
"grad_norm": 0.3203125,
"learning_rate": 1.2629758235133838e-05,
"loss": 1.1697,
"step": 605
},
{
"epoch": 0.4601803512102515,
"grad_norm": 0.376953125,
"learning_rate": 1.2604823366315273e-05,
"loss": 1.1973,
"step": 606
},
{
"epoch": 0.46093972472710015,
"grad_norm": 0.451171875,
"learning_rate": 1.2579871111058042e-05,
"loss": 1.2494,
"step": 607
},
{
"epoch": 0.4616990982439487,
"grad_norm": 0.3359375,
"learning_rate": 1.2554901635911188e-05,
"loss": 1.1515,
"step": 608
},
{
"epoch": 0.46245847176079735,
"grad_norm": 0.353515625,
"learning_rate": 1.2529915107538698e-05,
"loss": 1.1638,
"step": 609
},
{
"epoch": 0.4632178452776459,
"grad_norm": 0.40234375,
"learning_rate": 1.2504911692718387e-05,
"loss": 1.2225,
"step": 610
},
{
"epoch": 0.46397721879449455,
"grad_norm": 0.365234375,
"learning_rate": 1.2479891558340777e-05,
"loss": 1.1996,
"step": 611
},
{
"epoch": 0.4647365923113431,
"grad_norm": 0.466796875,
"learning_rate": 1.2454854871407993e-05,
"loss": 1.2728,
"step": 612
},
{
"epoch": 0.46549596582819175,
"grad_norm": 0.31640625,
"learning_rate": 1.242980179903264e-05,
"loss": 1.1579,
"step": 613
},
{
"epoch": 0.4662553393450403,
"grad_norm": 0.380859375,
"learning_rate": 1.2404732508436693e-05,
"loss": 1.2026,
"step": 614
},
{
"epoch": 0.46701471286188895,
"grad_norm": 0.4140625,
"learning_rate": 1.2379647166950381e-05,
"loss": 1.1719,
"step": 615
},
{
"epoch": 0.4677740863787375,
"grad_norm": 0.353515625,
"learning_rate": 1.2354545942011058e-05,
"loss": 1.1853,
"step": 616
},
{
"epoch": 0.46853345989558615,
"grad_norm": 0.318359375,
"learning_rate": 1.2329429001162114e-05,
"loss": 1.1524,
"step": 617
},
{
"epoch": 0.4692928334124347,
"grad_norm": 0.419921875,
"learning_rate": 1.2304296512051814e-05,
"loss": 1.2056,
"step": 618
},
{
"epoch": 0.47005220692928334,
"grad_norm": 0.33984375,
"learning_rate": 1.2279148642432229e-05,
"loss": 1.187,
"step": 619
},
{
"epoch": 0.4708115804461319,
"grad_norm": 0.330078125,
"learning_rate": 1.2253985560158064e-05,
"loss": 1.1578,
"step": 620
},
{
"epoch": 0.47157095396298054,
"grad_norm": 0.3203125,
"learning_rate": 1.2228807433185588e-05,
"loss": 1.1355,
"step": 621
},
{
"epoch": 0.4723303274798291,
"grad_norm": 0.326171875,
"learning_rate": 1.2203614429571475e-05,
"loss": 1.1617,
"step": 622
},
{
"epoch": 0.47308970099667774,
"grad_norm": 0.416015625,
"learning_rate": 1.2178406717471702e-05,
"loss": 1.1254,
"step": 623
},
{
"epoch": 0.4738490745135263,
"grad_norm": 0.3828125,
"learning_rate": 1.2153184465140413e-05,
"loss": 1.1904,
"step": 624
},
{
"epoch": 0.47460844803037494,
"grad_norm": 0.337890625,
"learning_rate": 1.2127947840928816e-05,
"loss": 1.158,
"step": 625
},
{
"epoch": 0.47536782154722357,
"grad_norm": 0.36328125,
"learning_rate": 1.2102697013284035e-05,
"loss": 1.1188,
"step": 626
},
{
"epoch": 0.47612719506407214,
"grad_norm": 0.333984375,
"learning_rate": 1.207743215074801e-05,
"loss": 1.1458,
"step": 627
},
{
"epoch": 0.47688656858092077,
"grad_norm": 0.33203125,
"learning_rate": 1.2052153421956343e-05,
"loss": 1.1472,
"step": 628
},
{
"epoch": 0.47764594209776934,
"grad_norm": 0.3828125,
"learning_rate": 1.2026860995637211e-05,
"loss": 1.2092,
"step": 629
},
{
"epoch": 0.47840531561461797,
"grad_norm": 0.3828125,
"learning_rate": 1.2001555040610197e-05,
"loss": 1.1966,
"step": 630
},
{
"epoch": 0.47916468913146654,
"grad_norm": 0.3203125,
"learning_rate": 1.1976235725785202e-05,
"loss": 1.094,
"step": 631
},
{
"epoch": 0.47992406264831516,
"grad_norm": 0.34765625,
"learning_rate": 1.1950903220161286e-05,
"loss": 1.1493,
"step": 632
},
{
"epoch": 0.48068343616516374,
"grad_norm": 0.39453125,
"learning_rate": 1.1925557692825558e-05,
"loss": 1.2334,
"step": 633
},
{
"epoch": 0.48144280968201236,
"grad_norm": 0.310546875,
"learning_rate": 1.1900199312952047e-05,
"loss": 1.1418,
"step": 634
},
{
"epoch": 0.48220218319886093,
"grad_norm": 0.359375,
"learning_rate": 1.1874828249800565e-05,
"loss": 1.144,
"step": 635
},
{
"epoch": 0.48296155671570956,
"grad_norm": 0.353515625,
"learning_rate": 1.1849444672715587e-05,
"loss": 1.1465,
"step": 636
},
{
"epoch": 0.48372093023255813,
"grad_norm": 0.404296875,
"learning_rate": 1.1824048751125101e-05,
"loss": 1.2054,
"step": 637
},
{
"epoch": 0.48448030374940676,
"grad_norm": 0.310546875,
"learning_rate": 1.1798640654539511e-05,
"loss": 1.1376,
"step": 638
},
{
"epoch": 0.48523967726625533,
"grad_norm": 0.30859375,
"learning_rate": 1.1773220552550463e-05,
"loss": 1.1574,
"step": 639
},
{
"epoch": 0.48599905078310396,
"grad_norm": 0.4140625,
"learning_rate": 1.1747788614829758e-05,
"loss": 1.2302,
"step": 640
},
{
"epoch": 0.48675842429995253,
"grad_norm": 0.3046875,
"learning_rate": 1.1722345011128183e-05,
"loss": 1.1259,
"step": 641
},
{
"epoch": 0.48751779781680116,
"grad_norm": 0.32421875,
"learning_rate": 1.1696889911274394e-05,
"loss": 1.1542,
"step": 642
},
{
"epoch": 0.48827717133364973,
"grad_norm": 0.37109375,
"learning_rate": 1.1671423485173783e-05,
"loss": 1.23,
"step": 643
},
{
"epoch": 0.48903654485049836,
"grad_norm": 0.44921875,
"learning_rate": 1.164594590280734e-05,
"loss": 1.2568,
"step": 644
},
{
"epoch": 0.4897959183673469,
"grad_norm": 0.341796875,
"learning_rate": 1.162045733423052e-05,
"loss": 1.1619,
"step": 645
},
{
"epoch": 0.49055529188419555,
"grad_norm": 0.40234375,
"learning_rate": 1.159495794957211e-05,
"loss": 1.2003,
"step": 646
},
{
"epoch": 0.4913146654010441,
"grad_norm": 0.412109375,
"learning_rate": 1.1569447919033086e-05,
"loss": 1.2507,
"step": 647
},
{
"epoch": 0.49207403891789275,
"grad_norm": 0.337890625,
"learning_rate": 1.1543927412885489e-05,
"loss": 1.1381,
"step": 648
},
{
"epoch": 0.4928334124347413,
"grad_norm": 0.3515625,
"learning_rate": 1.1518396601471273e-05,
"loss": 1.1715,
"step": 649
},
{
"epoch": 0.49359278595158995,
"grad_norm": 0.359375,
"learning_rate": 1.149285565520119e-05,
"loss": 1.1947,
"step": 650
},
{
"epoch": 0.4943521594684385,
"grad_norm": 0.3515625,
"learning_rate": 1.1467304744553618e-05,
"loss": 1.1499,
"step": 651
},
{
"epoch": 0.49511153298528715,
"grad_norm": 0.35546875,
"learning_rate": 1.1441744040073469e-05,
"loss": 1.1873,
"step": 652
},
{
"epoch": 0.4958709065021357,
"grad_norm": 0.3203125,
"learning_rate": 1.1416173712371008e-05,
"loss": 1.1398,
"step": 653
},
{
"epoch": 0.49663028001898435,
"grad_norm": 0.388671875,
"learning_rate": 1.1390593932120742e-05,
"loss": 1.2044,
"step": 654
},
{
"epoch": 0.4973896535358329,
"grad_norm": 0.349609375,
"learning_rate": 1.1365004870060266e-05,
"loss": 1.1856,
"step": 655
},
{
"epoch": 0.49814902705268155,
"grad_norm": 0.345703125,
"learning_rate": 1.1339406696989128e-05,
"loss": 1.1601,
"step": 656
},
{
"epoch": 0.4989084005695301,
"grad_norm": 0.408203125,
"learning_rate": 1.1313799583767693e-05,
"loss": 1.2261,
"step": 657
},
{
"epoch": 0.49966777408637875,
"grad_norm": 0.3515625,
"learning_rate": 1.1288183701315996e-05,
"loss": 1.1504,
"step": 658
},
{
"epoch": 0.5004271476032274,
"grad_norm": 0.361328125,
"learning_rate": 1.1262559220612602e-05,
"loss": 1.1967,
"step": 659
},
{
"epoch": 0.5011865211200759,
"grad_norm": 0.359375,
"learning_rate": 1.123692631269348e-05,
"loss": 1.1724,
"step": 660
},
{
"epoch": 0.5019458946369245,
"grad_norm": 0.326171875,
"learning_rate": 1.1211285148650826e-05,
"loss": 1.158,
"step": 661
},
{
"epoch": 0.5027052681537731,
"grad_norm": 0.36328125,
"learning_rate": 1.1185635899631963e-05,
"loss": 1.1994,
"step": 662
},
{
"epoch": 0.5034646416706218,
"grad_norm": 0.3515625,
"learning_rate": 1.1159978736838169e-05,
"loss": 1.1844,
"step": 663
},
{
"epoch": 0.5042240151874703,
"grad_norm": 0.322265625,
"learning_rate": 1.1134313831523547e-05,
"loss": 1.151,
"step": 664
},
{
"epoch": 0.5049833887043189,
"grad_norm": 0.390625,
"learning_rate": 1.1108641354993876e-05,
"loss": 1.1455,
"step": 665
},
{
"epoch": 0.5057427622211675,
"grad_norm": 0.373046875,
"learning_rate": 1.1082961478605476e-05,
"loss": 1.1656,
"step": 666
},
{
"epoch": 0.5065021357380162,
"grad_norm": 0.328125,
"learning_rate": 1.1057274373764056e-05,
"loss": 1.141,
"step": 667
},
{
"epoch": 0.5072615092548647,
"grad_norm": 0.302734375,
"learning_rate": 1.103158021192357e-05,
"loss": 1.136,
"step": 668
},
{
"epoch": 0.5080208827717133,
"grad_norm": 0.361328125,
"learning_rate": 1.1005879164585083e-05,
"loss": 1.1902,
"step": 669
},
{
"epoch": 0.5087802562885619,
"grad_norm": 0.345703125,
"learning_rate": 1.098017140329561e-05,
"loss": 1.1535,
"step": 670
},
{
"epoch": 0.5095396298054106,
"grad_norm": 0.390625,
"learning_rate": 1.0954457099646981e-05,
"loss": 1.1909,
"step": 671
},
{
"epoch": 0.5102990033222591,
"grad_norm": 0.40625,
"learning_rate": 1.0928736425274702e-05,
"loss": 1.1445,
"step": 672
},
{
"epoch": 0.5110583768391077,
"grad_norm": 0.326171875,
"learning_rate": 1.0903009551856795e-05,
"loss": 1.1776,
"step": 673
},
{
"epoch": 0.5118177503559563,
"grad_norm": 0.330078125,
"learning_rate": 1.0877276651112662e-05,
"loss": 1.1799,
"step": 674
},
{
"epoch": 0.512577123872805,
"grad_norm": 0.451171875,
"learning_rate": 1.0851537894801935e-05,
"loss": 1.2681,
"step": 675
},
{
"epoch": 0.5133364973896536,
"grad_norm": 0.392578125,
"learning_rate": 1.0825793454723325e-05,
"loss": 1.1858,
"step": 676
},
{
"epoch": 0.5140958709065021,
"grad_norm": 0.388671875,
"learning_rate": 1.0800043502713486e-05,
"loss": 1.2268,
"step": 677
},
{
"epoch": 0.5148552444233507,
"grad_norm": 0.375,
"learning_rate": 1.0774288210645862e-05,
"loss": 1.1628,
"step": 678
},
{
"epoch": 0.5156146179401994,
"grad_norm": 0.400390625,
"learning_rate": 1.0748527750429545e-05,
"loss": 1.2508,
"step": 679
},
{
"epoch": 0.516373991457048,
"grad_norm": 0.373046875,
"learning_rate": 1.0722762294008107e-05,
"loss": 1.1958,
"step": 680
},
{
"epoch": 0.5171333649738965,
"grad_norm": 0.326171875,
"learning_rate": 1.069699201335849e-05,
"loss": 1.13,
"step": 681
},
{
"epoch": 0.5178927384907451,
"grad_norm": 0.365234375,
"learning_rate": 1.0671217080489816e-05,
"loss": 1.2132,
"step": 682
},
{
"epoch": 0.5186521120075938,
"grad_norm": 0.408203125,
"learning_rate": 1.0645437667442273e-05,
"loss": 1.2433,
"step": 683
},
{
"epoch": 0.5194114855244424,
"grad_norm": 0.328125,
"learning_rate": 1.0619653946285948e-05,
"loss": 1.1013,
"step": 684
},
{
"epoch": 0.5201708590412909,
"grad_norm": 0.365234375,
"learning_rate": 1.0593866089119683e-05,
"loss": 1.171,
"step": 685
},
{
"epoch": 0.5209302325581395,
"grad_norm": 0.375,
"learning_rate": 1.0568074268069928e-05,
"loss": 1.1771,
"step": 686
},
{
"epoch": 0.5216896060749882,
"grad_norm": 0.396484375,
"learning_rate": 1.0542278655289588e-05,
"loss": 1.1808,
"step": 687
},
{
"epoch": 0.5224489795918368,
"grad_norm": 0.357421875,
"learning_rate": 1.0516479422956882e-05,
"loss": 1.1398,
"step": 688
},
{
"epoch": 0.5232083531086853,
"grad_norm": 0.38671875,
"learning_rate": 1.0490676743274181e-05,
"loss": 1.1954,
"step": 689
},
{
"epoch": 0.5239677266255339,
"grad_norm": 0.37890625,
"learning_rate": 1.0464870788466875e-05,
"loss": 1.1792,
"step": 690
},
{
"epoch": 0.5247271001423826,
"grad_norm": 0.3359375,
"learning_rate": 1.0439061730782207e-05,
"loss": 1.1585,
"step": 691
},
{
"epoch": 0.5254864736592312,
"grad_norm": 0.3203125,
"learning_rate": 1.0413249742488132e-05,
"loss": 1.1658,
"step": 692
},
{
"epoch": 0.5262458471760797,
"grad_norm": 0.337890625,
"learning_rate": 1.0387434995872174e-05,
"loss": 1.1443,
"step": 693
},
{
"epoch": 0.5270052206929283,
"grad_norm": 0.376953125,
"learning_rate": 1.0361617663240253e-05,
"loss": 1.176,
"step": 694
},
{
"epoch": 0.527764594209777,
"grad_norm": 0.345703125,
"learning_rate": 1.0335797916915568e-05,
"loss": 1.2121,
"step": 695
},
{
"epoch": 0.5285239677266256,
"grad_norm": 0.38671875,
"learning_rate": 1.0309975929237408e-05,
"loss": 1.209,
"step": 696
},
{
"epoch": 0.5292833412434741,
"grad_norm": 0.3203125,
"learning_rate": 1.0284151872560042e-05,
"loss": 1.1629,
"step": 697
},
{
"epoch": 0.5300427147603227,
"grad_norm": 0.376953125,
"learning_rate": 1.0258325919251537e-05,
"loss": 1.1606,
"step": 698
},
{
"epoch": 0.5308020882771713,
"grad_norm": 0.30078125,
"learning_rate": 1.0232498241692625e-05,
"loss": 1.1405,
"step": 699
},
{
"epoch": 0.53156146179402,
"grad_norm": 0.41796875,
"learning_rate": 1.0206669012275546e-05,
"loss": 1.1829,
"step": 700
},
{
"epoch": 0.5323208353108685,
"grad_norm": 0.33984375,
"learning_rate": 1.018083840340289e-05,
"loss": 1.1182,
"step": 701
},
{
"epoch": 0.5330802088277171,
"grad_norm": 0.380859375,
"learning_rate": 1.0155006587486468e-05,
"loss": 1.2416,
"step": 702
},
{
"epoch": 0.5338395823445657,
"grad_norm": 0.349609375,
"learning_rate": 1.0129173736946143e-05,
"loss": 1.1733,
"step": 703
},
{
"epoch": 0.5345989558614144,
"grad_norm": 0.333984375,
"learning_rate": 1.0103340024208674e-05,
"loss": 1.1117,
"step": 704
},
{
"epoch": 0.5353583293782629,
"grad_norm": 0.353515625,
"learning_rate": 1.007750562170659e-05,
"loss": 1.2096,
"step": 705
},
{
"epoch": 0.5361177028951115,
"grad_norm": 0.3515625,
"learning_rate": 1.0051670701877011e-05,
"loss": 1.1615,
"step": 706
},
{
"epoch": 0.5368770764119601,
"grad_norm": 0.322265625,
"learning_rate": 1.0025835437160523e-05,
"loss": 1.181,
"step": 707
},
{
"epoch": 0.5376364499288088,
"grad_norm": 0.40625,
"learning_rate": 1e-05,
"loss": 1.2599,
"step": 708
},
{
"epoch": 0.5383958234456573,
"grad_norm": 0.306640625,
"learning_rate": 9.97416456283948e-06,
"loss": 1.1557,
"step": 709
},
{
"epoch": 0.5391551969625059,
"grad_norm": 0.333984375,
"learning_rate": 9.948329298122989e-06,
"loss": 1.1486,
"step": 710
},
{
"epoch": 0.5399145704793545,
"grad_norm": 0.32421875,
"learning_rate": 9.922494378293414e-06,
"loss": 1.146,
"step": 711
},
{
"epoch": 0.5406739439962032,
"grad_norm": 0.375,
"learning_rate": 9.89665997579133e-06,
"loss": 1.1826,
"step": 712
},
{
"epoch": 0.5414333175130517,
"grad_norm": 0.353515625,
"learning_rate": 9.870826263053859e-06,
"loss": 1.1607,
"step": 713
},
{
"epoch": 0.5421926910299003,
"grad_norm": 0.318359375,
"learning_rate": 9.844993412513533e-06,
"loss": 1.1287,
"step": 714
},
{
"epoch": 0.5429520645467489,
"grad_norm": 0.466796875,
"learning_rate": 9.819161596597112e-06,
"loss": 1.3019,
"step": 715
},
{
"epoch": 0.5437114380635976,
"grad_norm": 0.330078125,
"learning_rate": 9.79333098772446e-06,
"loss": 1.1456,
"step": 716
},
{
"epoch": 0.5444708115804461,
"grad_norm": 0.333984375,
"learning_rate": 9.767501758307376e-06,
"loss": 1.1532,
"step": 717
},
{
"epoch": 0.5452301850972947,
"grad_norm": 0.275390625,
"learning_rate": 9.741674080748465e-06,
"loss": 1.1244,
"step": 718
},
{
"epoch": 0.5459895586141433,
"grad_norm": 0.333984375,
"learning_rate": 9.715848127439958e-06,
"loss": 1.1617,
"step": 719
},
{
"epoch": 0.546748932130992,
"grad_norm": 0.369140625,
"learning_rate": 9.690024070762597e-06,
"loss": 1.2031,
"step": 720
},
{
"epoch": 0.5475083056478405,
"grad_norm": 0.35546875,
"learning_rate": 9.664202083084437e-06,
"loss": 1.1701,
"step": 721
},
{
"epoch": 0.5482676791646891,
"grad_norm": 0.341796875,
"learning_rate": 9.638382336759749e-06,
"loss": 1.1756,
"step": 722
},
{
"epoch": 0.5490270526815377,
"grad_norm": 0.34375,
"learning_rate": 9.612565004127828e-06,
"loss": 1.192,
"step": 723
},
{
"epoch": 0.5497864261983864,
"grad_norm": 0.341796875,
"learning_rate": 9.586750257511868e-06,
"loss": 1.1673,
"step": 724
},
{
"epoch": 0.550545799715235,
"grad_norm": 0.3359375,
"learning_rate": 9.560938269217798e-06,
"loss": 1.1835,
"step": 725
},
{
"epoch": 0.5513051732320835,
"grad_norm": 0.34375,
"learning_rate": 9.53512921153313e-06,
"loss": 1.2177,
"step": 726
},
{
"epoch": 0.5520645467489321,
"grad_norm": 0.33984375,
"learning_rate": 9.50932325672582e-06,
"loss": 1.1675,
"step": 727
},
{
"epoch": 0.5528239202657808,
"grad_norm": 0.275390625,
"learning_rate": 9.483520577043121e-06,
"loss": 1.104,
"step": 728
},
{
"epoch": 0.5535832937826294,
"grad_norm": 0.31640625,
"learning_rate": 9.457721344710412e-06,
"loss": 1.126,
"step": 729
},
{
"epoch": 0.5543426672994779,
"grad_norm": 0.392578125,
"learning_rate": 9.431925731930079e-06,
"loss": 1.1852,
"step": 730
},
{
"epoch": 0.5551020408163265,
"grad_norm": 0.330078125,
"learning_rate": 9.406133910880319e-06,
"loss": 1.1576,
"step": 731
},
{
"epoch": 0.5558614143331752,
"grad_norm": 0.291015625,
"learning_rate": 9.380346053714055e-06,
"loss": 1.0863,
"step": 732
},
{
"epoch": 0.5566207878500238,
"grad_norm": 0.318359375,
"learning_rate": 9.354562332557728e-06,
"loss": 1.1338,
"step": 733
},
{
"epoch": 0.5573801613668723,
"grad_norm": 0.37890625,
"learning_rate": 9.328782919510186e-06,
"loss": 1.2238,
"step": 734
},
{
"epoch": 0.5581395348837209,
"grad_norm": 0.330078125,
"learning_rate": 9.303007986641515e-06,
"loss": 1.1432,
"step": 735
},
{
"epoch": 0.5588989084005695,
"grad_norm": 0.302734375,
"learning_rate": 9.277237705991895e-06,
"loss": 1.15,
"step": 736
},
{
"epoch": 0.5596582819174182,
"grad_norm": 0.283203125,
"learning_rate": 9.251472249570458e-06,
"loss": 1.1075,
"step": 737
},
{
"epoch": 0.5604176554342667,
"grad_norm": 0.33203125,
"learning_rate": 9.225711789354138e-06,
"loss": 1.1256,
"step": 738
},
{
"epoch": 0.5611770289511153,
"grad_norm": 0.357421875,
"learning_rate": 9.199956497286517e-06,
"loss": 1.1923,
"step": 739
},
{
"epoch": 0.561936402467964,
"grad_norm": 0.287109375,
"learning_rate": 9.174206545276678e-06,
"loss": 1.1069,
"step": 740
},
{
"epoch": 0.5626957759848126,
"grad_norm": 0.375,
"learning_rate": 9.148462105198068e-06,
"loss": 1.2118,
"step": 741
},
{
"epoch": 0.5634551495016611,
"grad_norm": 0.392578125,
"learning_rate": 9.12272334888734e-06,
"loss": 1.2203,
"step": 742
},
{
"epoch": 0.5642145230185097,
"grad_norm": 0.375,
"learning_rate": 9.096990448143203e-06,
"loss": 1.1714,
"step": 743
},
{
"epoch": 0.5649738965353583,
"grad_norm": 0.32421875,
"learning_rate": 9.0712635747253e-06,
"loss": 1.1562,
"step": 744
},
{
"epoch": 0.565733270052207,
"grad_norm": 0.3125,
"learning_rate": 9.045542900353022e-06,
"loss": 1.138,
"step": 745
},
{
"epoch": 0.5664926435690555,
"grad_norm": 0.376953125,
"learning_rate": 9.019828596704394e-06,
"loss": 1.2036,
"step": 746
},
{
"epoch": 0.5672520170859041,
"grad_norm": 0.3984375,
"learning_rate": 8.99412083541492e-06,
"loss": 1.2011,
"step": 747
},
{
"epoch": 0.5680113906027527,
"grad_norm": 0.3828125,
"learning_rate": 8.968419788076431e-06,
"loss": 1.2146,
"step": 748
},
{
"epoch": 0.5687707641196014,
"grad_norm": 0.3125,
"learning_rate": 8.942725626235949e-06,
"loss": 1.1499,
"step": 749
},
{
"epoch": 0.5695301376364499,
"grad_norm": 0.357421875,
"learning_rate": 8.917038521394526e-06,
"loss": 1.1884,
"step": 750
},
{
"epoch": 0.5702895111532985,
"grad_norm": 0.3359375,
"learning_rate": 8.891358645006126e-06,
"loss": 1.1455,
"step": 751
},
{
"epoch": 0.5710488846701471,
"grad_norm": 0.2578125,
"learning_rate": 8.865686168476458e-06,
"loss": 1.1044,
"step": 752
},
{
"epoch": 0.5718082581869958,
"grad_norm": 0.3671875,
"learning_rate": 8.840021263161831e-06,
"loss": 1.1989,
"step": 753
},
{
"epoch": 0.5725676317038443,
"grad_norm": 0.328125,
"learning_rate": 8.81436410036804e-06,
"loss": 1.1432,
"step": 754
},
{
"epoch": 0.5733270052206929,
"grad_norm": 0.30078125,
"learning_rate": 8.788714851349177e-06,
"loss": 1.1265,
"step": 755
},
{
"epoch": 0.5740863787375415,
"grad_norm": 0.326171875,
"learning_rate": 8.763073687306523e-06,
"loss": 1.1427,
"step": 756
},
{
"epoch": 0.5748457522543902,
"grad_norm": 0.345703125,
"learning_rate": 8.737440779387398e-06,
"loss": 1.1363,
"step": 757
},
{
"epoch": 0.5756051257712387,
"grad_norm": 0.326171875,
"learning_rate": 8.711816298684011e-06,
"loss": 1.1628,
"step": 758
},
{
"epoch": 0.5763644992880873,
"grad_norm": 0.4140625,
"learning_rate": 8.686200416232314e-06,
"loss": 1.2075,
"step": 759
},
{
"epoch": 0.5771238728049359,
"grad_norm": 0.3203125,
"learning_rate": 8.660593303010876e-06,
"loss": 1.1384,
"step": 760
},
{
"epoch": 0.5778832463217846,
"grad_norm": 0.3046875,
"learning_rate": 8.634995129939737e-06,
"loss": 1.1354,
"step": 761
},
{
"epoch": 0.5786426198386331,
"grad_norm": 0.390625,
"learning_rate": 8.609406067879258e-06,
"loss": 1.1626,
"step": 762
},
{
"epoch": 0.5794019933554817,
"grad_norm": 0.34765625,
"learning_rate": 8.583826287628996e-06,
"loss": 1.2072,
"step": 763
},
{
"epoch": 0.5801613668723303,
"grad_norm": 0.328125,
"learning_rate": 8.558255959926533e-06,
"loss": 1.1492,
"step": 764
},
{
"epoch": 0.580920740389179,
"grad_norm": 0.40234375,
"learning_rate": 8.532695255446384e-06,
"loss": 1.1948,
"step": 765
},
{
"epoch": 0.5816801139060275,
"grad_norm": 0.345703125,
"learning_rate": 8.507144344798814e-06,
"loss": 1.1786,
"step": 766
},
{
"epoch": 0.5824394874228761,
"grad_norm": 0.34765625,
"learning_rate": 8.481603398528727e-06,
"loss": 1.172,
"step": 767
},
{
"epoch": 0.5831988609397247,
"grad_norm": 0.322265625,
"learning_rate": 8.456072587114516e-06,
"loss": 1.1431,
"step": 768
},
{
"epoch": 0.5839582344565734,
"grad_norm": 0.3671875,
"learning_rate": 8.430552080966918e-06,
"loss": 1.2079,
"step": 769
},
{
"epoch": 0.584717607973422,
"grad_norm": 0.349609375,
"learning_rate": 8.405042050427891e-06,
"loss": 1.1885,
"step": 770
},
{
"epoch": 0.5854769814902705,
"grad_norm": 0.390625,
"learning_rate": 8.37954266576948e-06,
"loss": 1.1858,
"step": 771
},
{
"epoch": 0.5862363550071191,
"grad_norm": 0.380859375,
"learning_rate": 8.35405409719266e-06,
"loss": 1.2242,
"step": 772
},
{
"epoch": 0.5869957285239678,
"grad_norm": 0.369140625,
"learning_rate": 8.328576514826222e-06,
"loss": 1.1984,
"step": 773
},
{
"epoch": 0.5877551020408164,
"grad_norm": 0.29296875,
"learning_rate": 8.30311008872561e-06,
"loss": 1.1178,
"step": 774
},
{
"epoch": 0.5885144755576649,
"grad_norm": 0.2890625,
"learning_rate": 8.277654988871819e-06,
"loss": 1.1126,
"step": 775
},
{
"epoch": 0.5892738490745135,
"grad_norm": 0.337890625,
"learning_rate": 8.252211385170242e-06,
"loss": 1.1394,
"step": 776
},
{
"epoch": 0.5900332225913621,
"grad_norm": 0.341796875,
"learning_rate": 8.226779447449538e-06,
"loss": 1.1999,
"step": 777
},
{
"epoch": 0.5907925961082108,
"grad_norm": 0.328125,
"learning_rate": 8.201359345460496e-06,
"loss": 1.1602,
"step": 778
},
{
"epoch": 0.5915519696250593,
"grad_norm": 0.38671875,
"learning_rate": 8.175951248874902e-06,
"loss": 1.1864,
"step": 779
},
{
"epoch": 0.5923113431419079,
"grad_norm": 0.302734375,
"learning_rate": 8.150555327284417e-06,
"loss": 1.1053,
"step": 780
},
{
"epoch": 0.5930707166587565,
"grad_norm": 0.275390625,
"learning_rate": 8.125171750199436e-06,
"loss": 1.1004,
"step": 781
},
{
"epoch": 0.5938300901756052,
"grad_norm": 0.36328125,
"learning_rate": 8.099800687047958e-06,
"loss": 1.1189,
"step": 782
},
{
"epoch": 0.5945894636924537,
"grad_norm": 0.400390625,
"learning_rate": 8.074442307174445e-06,
"loss": 1.2653,
"step": 783
},
{
"epoch": 0.5953488372093023,
"grad_norm": 0.298828125,
"learning_rate": 8.04909677983872e-06,
"loss": 1.1253,
"step": 784
},
{
"epoch": 0.5961082107261509,
"grad_norm": 0.373046875,
"learning_rate": 8.023764274214802e-06,
"loss": 1.1351,
"step": 785
},
{
"epoch": 0.5968675842429996,
"grad_norm": 0.33984375,
"learning_rate": 7.998444959389803e-06,
"loss": 1.145,
"step": 786
},
{
"epoch": 0.5976269577598481,
"grad_norm": 0.302734375,
"learning_rate": 7.973139004362794e-06,
"loss": 1.1679,
"step": 787
},
{
"epoch": 0.5983863312766967,
"grad_norm": 0.3203125,
"learning_rate": 7.947846578043658e-06,
"loss": 1.1475,
"step": 788
},
{
"epoch": 0.5991457047935453,
"grad_norm": 0.34765625,
"learning_rate": 7.922567849251995e-06,
"loss": 1.1941,
"step": 789
},
{
"epoch": 0.599905078310394,
"grad_norm": 0.365234375,
"learning_rate": 7.897302986715967e-06,
"loss": 1.1754,
"step": 790
},
{
"epoch": 0.6006644518272425,
"grad_norm": 0.37890625,
"learning_rate": 7.872052159071186e-06,
"loss": 1.1762,
"step": 791
},
{
"epoch": 0.6014238253440911,
"grad_norm": 0.302734375,
"learning_rate": 7.846815534859592e-06,
"loss": 1.1361,
"step": 792
},
{
"epoch": 0.6021831988609397,
"grad_norm": 0.41015625,
"learning_rate": 7.821593282528301e-06,
"loss": 1.2727,
"step": 793
},
{
"epoch": 0.6029425723777884,
"grad_norm": 0.291015625,
"learning_rate": 7.796385570428527e-06,
"loss": 1.1568,
"step": 794
},
{
"epoch": 0.6037019458946369,
"grad_norm": 0.396484375,
"learning_rate": 7.771192566814412e-06,
"loss": 1.2494,
"step": 795
},
{
"epoch": 0.6044613194114855,
"grad_norm": 0.3828125,
"learning_rate": 7.746014439841941e-06,
"loss": 1.223,
"step": 796
},
{
"epoch": 0.6052206929283341,
"grad_norm": 0.337890625,
"learning_rate": 7.720851357567778e-06,
"loss": 1.1366,
"step": 797
},
{
"epoch": 0.6059800664451828,
"grad_norm": 0.3203125,
"learning_rate": 7.69570348794819e-06,
"loss": 1.1451,
"step": 798
},
{
"epoch": 0.6067394399620313,
"grad_norm": 0.29296875,
"learning_rate": 7.670570998837889e-06,
"loss": 1.1189,
"step": 799
},
{
"epoch": 0.6074988134788799,
"grad_norm": 0.25390625,
"learning_rate": 7.645454057988942e-06,
"loss": 1.1005,
"step": 800
},
{
"epoch": 0.6082581869957285,
"grad_norm": 0.31640625,
"learning_rate": 7.6203528330496245e-06,
"loss": 1.1741,
"step": 801
},
{
"epoch": 0.6090175605125772,
"grad_norm": 0.2734375,
"learning_rate": 7.595267491563311e-06,
"loss": 1.1124,
"step": 802
},
{
"epoch": 0.6097769340294257,
"grad_norm": 0.33984375,
"learning_rate": 7.570198200967363e-06,
"loss": 1.1459,
"step": 803
},
{
"epoch": 0.6105363075462743,
"grad_norm": 0.36328125,
"learning_rate": 7.545145128592009e-06,
"loss": 1.1668,
"step": 804
},
{
"epoch": 0.6112956810631229,
"grad_norm": 0.29296875,
"learning_rate": 7.520108441659223e-06,
"loss": 1.1384,
"step": 805
},
{
"epoch": 0.6120550545799716,
"grad_norm": 0.314453125,
"learning_rate": 7.495088307281619e-06,
"loss": 1.1462,
"step": 806
},
{
"epoch": 0.6128144280968201,
"grad_norm": 0.330078125,
"learning_rate": 7.470084892461305e-06,
"loss": 1.1645,
"step": 807
},
{
"epoch": 0.6135738016136687,
"grad_norm": 0.3359375,
"learning_rate": 7.445098364088815e-06,
"loss": 1.1709,
"step": 808
},
{
"epoch": 0.6143331751305173,
"grad_norm": 0.298828125,
"learning_rate": 7.420128888941958e-06,
"loss": 1.1914,
"step": 809
},
{
"epoch": 0.615092548647366,
"grad_norm": 0.4140625,
"learning_rate": 7.395176633684726e-06,
"loss": 1.2529,
"step": 810
},
{
"epoch": 0.6158519221642145,
"grad_norm": 0.30859375,
"learning_rate": 7.370241764866169e-06,
"loss": 1.1245,
"step": 811
},
{
"epoch": 0.6166112956810631,
"grad_norm": 0.359375,
"learning_rate": 7.34532444891928e-06,
"loss": 1.1952,
"step": 812
},
{
"epoch": 0.6173706691979117,
"grad_norm": 0.296875,
"learning_rate": 7.3204248521599e-06,
"loss": 1.1247,
"step": 813
},
{
"epoch": 0.6181300427147604,
"grad_norm": 0.33203125,
"learning_rate": 7.295543140785604e-06,
"loss": 1.1417,
"step": 814
},
{
"epoch": 0.6188894162316089,
"grad_norm": 0.27734375,
"learning_rate": 7.27067948087458e-06,
"loss": 1.1264,
"step": 815
},
{
"epoch": 0.6196487897484575,
"grad_norm": 0.322265625,
"learning_rate": 7.245834038384523e-06,
"loss": 1.176,
"step": 816
},
{
"epoch": 0.6204081632653061,
"grad_norm": 0.314453125,
"learning_rate": 7.221006979151546e-06,
"loss": 1.1171,
"step": 817
},
{
"epoch": 0.6211675367821547,
"grad_norm": 0.3828125,
"learning_rate": 7.196198468889047e-06,
"loss": 1.1906,
"step": 818
},
{
"epoch": 0.6219269102990034,
"grad_norm": 0.3046875,
"learning_rate": 7.171408673186619e-06,
"loss": 1.1394,
"step": 819
},
{
"epoch": 0.6226862838158519,
"grad_norm": 0.3125,
"learning_rate": 7.14663775750895e-06,
"loss": 1.1334,
"step": 820
},
{
"epoch": 0.6234456573327005,
"grad_norm": 0.3359375,
"learning_rate": 7.1218858871946885e-06,
"loss": 1.149,
"step": 821
},
{
"epoch": 0.6242050308495491,
"grad_norm": 0.3359375,
"learning_rate": 7.097153227455379e-06,
"loss": 1.1593,
"step": 822
},
{
"epoch": 0.6249644043663978,
"grad_norm": 0.3984375,
"learning_rate": 7.072439943374331e-06,
"loss": 1.1399,
"step": 823
},
{
"epoch": 0.6257237778832463,
"grad_norm": 0.376953125,
"learning_rate": 7.0477461999055365e-06,
"loss": 1.2022,
"step": 824
},
{
"epoch": 0.6264831514000949,
"grad_norm": 0.337890625,
"learning_rate": 7.023072161872551e-06,
"loss": 1.1374,
"step": 825
},
{
"epoch": 0.6272425249169435,
"grad_norm": 0.2734375,
"learning_rate": 6.998417993967403e-06,
"loss": 1.1267,
"step": 826
},
{
"epoch": 0.6280018984337922,
"grad_norm": 0.330078125,
"learning_rate": 6.973783860749499e-06,
"loss": 1.179,
"step": 827
},
{
"epoch": 0.6287612719506407,
"grad_norm": 0.349609375,
"learning_rate": 6.949169926644513e-06,
"loss": 1.1685,
"step": 828
},
{
"epoch": 0.6295206454674893,
"grad_norm": 0.365234375,
"learning_rate": 6.9245763559432996e-06,
"loss": 1.2012,
"step": 829
},
{
"epoch": 0.6302800189843379,
"grad_norm": 0.353515625,
"learning_rate": 6.9000033128008e-06,
"loss": 1.187,
"step": 830
},
{
"epoch": 0.6310393925011866,
"grad_norm": 0.373046875,
"learning_rate": 6.875450961234924e-06,
"loss": 1.1949,
"step": 831
},
{
"epoch": 0.6317987660180351,
"grad_norm": 0.3515625,
"learning_rate": 6.8509194651254825e-06,
"loss": 1.1995,
"step": 832
},
{
"epoch": 0.6325581395348837,
"grad_norm": 0.376953125,
"learning_rate": 6.826408988213083e-06,
"loss": 1.1705,
"step": 833
},
{
"epoch": 0.6333175130517323,
"grad_norm": 0.326171875,
"learning_rate": 6.801919694098034e-06,
"loss": 1.1469,
"step": 834
},
{
"epoch": 0.634076886568581,
"grad_norm": 0.357421875,
"learning_rate": 6.777451746239249e-06,
"loss": 1.1363,
"step": 835
},
{
"epoch": 0.6348362600854295,
"grad_norm": 0.33984375,
"learning_rate": 6.7530053079531664e-06,
"loss": 1.1968,
"step": 836
},
{
"epoch": 0.6355956336022781,
"grad_norm": 0.376953125,
"learning_rate": 6.7285805424126585e-06,
"loss": 1.2189,
"step": 837
},
{
"epoch": 0.6363550071191267,
"grad_norm": 0.298828125,
"learning_rate": 6.70417761264593e-06,
"loss": 1.1232,
"step": 838
},
{
"epoch": 0.6371143806359754,
"grad_norm": 0.33984375,
"learning_rate": 6.679796681535451e-06,
"loss": 1.1898,
"step": 839
},
{
"epoch": 0.6378737541528239,
"grad_norm": 0.296875,
"learning_rate": 6.655437911816838e-06,
"loss": 1.1666,
"step": 840
},
{
"epoch": 0.6386331276696725,
"grad_norm": 0.296875,
"learning_rate": 6.631101466077801e-06,
"loss": 1.146,
"step": 841
},
{
"epoch": 0.6393925011865211,
"grad_norm": 0.419921875,
"learning_rate": 6.60678750675704e-06,
"loss": 1.1723,
"step": 842
},
{
"epoch": 0.6401518747033698,
"grad_norm": 0.34375,
"learning_rate": 6.582496196143167e-06,
"loss": 1.1488,
"step": 843
},
{
"epoch": 0.6409112482202183,
"grad_norm": 0.3203125,
"learning_rate": 6.558227696373617e-06,
"loss": 1.1899,
"step": 844
},
{
"epoch": 0.6416706217370669,
"grad_norm": 0.3515625,
"learning_rate": 6.533982169433568e-06,
"loss": 1.1478,
"step": 845
},
{
"epoch": 0.6424299952539155,
"grad_norm": 0.333984375,
"learning_rate": 6.509759777154864e-06,
"loss": 1.1353,
"step": 846
},
{
"epoch": 0.6431893687707642,
"grad_norm": 0.28515625,
"learning_rate": 6.485560681214933e-06,
"loss": 1.1481,
"step": 847
},
{
"epoch": 0.6439487422876127,
"grad_norm": 0.298828125,
"learning_rate": 6.461385043135704e-06,
"loss": 1.1222,
"step": 848
},
{
"epoch": 0.6447081158044613,
"grad_norm": 0.328125,
"learning_rate": 6.437233024282538e-06,
"loss": 1.1029,
"step": 849
},
{
"epoch": 0.6454674893213099,
"grad_norm": 0.376953125,
"learning_rate": 6.413104785863128e-06,
"loss": 1.192,
"step": 850
},
{
"epoch": 0.6462268628381586,
"grad_norm": 0.36328125,
"learning_rate": 6.389000488926459e-06,
"loss": 1.2227,
"step": 851
},
{
"epoch": 0.6469862363550071,
"grad_norm": 0.279296875,
"learning_rate": 6.364920294361701e-06,
"loss": 1.0898,
"step": 852
},
{
"epoch": 0.6477456098718557,
"grad_norm": 0.375,
"learning_rate": 6.3408643628971585e-06,
"loss": 1.1882,
"step": 853
},
{
"epoch": 0.6485049833887043,
"grad_norm": 0.33984375,
"learning_rate": 6.316832855099173e-06,
"loss": 1.1572,
"step": 854
},
{
"epoch": 0.649264356905553,
"grad_norm": 0.296875,
"learning_rate": 6.292825931371075e-06,
"loss": 1.1056,
"step": 855
},
{
"epoch": 0.6500237304224015,
"grad_norm": 0.298828125,
"learning_rate": 6.2688437519521e-06,
"loss": 1.1232,
"step": 856
},
{
"epoch": 0.6507831039392501,
"grad_norm": 0.373046875,
"learning_rate": 6.244886476916325e-06,
"loss": 1.1479,
"step": 857
},
{
"epoch": 0.6515424774560987,
"grad_norm": 0.294921875,
"learning_rate": 6.220954266171597e-06,
"loss": 1.1355,
"step": 858
},
{
"epoch": 0.6523018509729474,
"grad_norm": 0.357421875,
"learning_rate": 6.197047279458459e-06,
"loss": 1.185,
"step": 859
},
{
"epoch": 0.6530612244897959,
"grad_norm": 0.341796875,
"learning_rate": 6.173165676349103e-06,
"loss": 1.141,
"step": 860
},
{
"epoch": 0.6538205980066445,
"grad_norm": 0.314453125,
"learning_rate": 6.149309616246285e-06,
"loss": 1.129,
"step": 861
},
{
"epoch": 0.6545799715234931,
"grad_norm": 0.34375,
"learning_rate": 6.125479258382268e-06,
"loss": 1.1517,
"step": 862
},
{
"epoch": 0.6553393450403417,
"grad_norm": 0.326171875,
"learning_rate": 6.101674761817769e-06,
"loss": 1.0984,
"step": 863
},
{
"epoch": 0.6560987185571903,
"grad_norm": 0.341796875,
"learning_rate": 6.077896285440874e-06,
"loss": 1.175,
"step": 864
},
{
"epoch": 0.6568580920740389,
"grad_norm": 0.34375,
"learning_rate": 6.054143987966001e-06,
"loss": 1.1625,
"step": 865
},
{
"epoch": 0.6576174655908875,
"grad_norm": 0.357421875,
"learning_rate": 6.030418027932835e-06,
"loss": 1.2025,
"step": 866
},
{
"epoch": 0.6583768391077361,
"grad_norm": 0.3671875,
"learning_rate": 6.006718563705258e-06,
"loss": 1.1843,
"step": 867
},
{
"epoch": 0.6591362126245848,
"grad_norm": 0.3671875,
"learning_rate": 5.983045753470308e-06,
"loss": 1.1775,
"step": 868
},
{
"epoch": 0.6598955861414333,
"grad_norm": 0.3984375,
"learning_rate": 5.959399755237103e-06,
"loss": 1.1727,
"step": 869
},
{
"epoch": 0.6606549596582819,
"grad_norm": 0.353515625,
"learning_rate": 5.935780726835811e-06,
"loss": 1.1502,
"step": 870
},
{
"epoch": 0.6614143331751305,
"grad_norm": 0.3515625,
"learning_rate": 5.91218882591658e-06,
"loss": 1.1346,
"step": 871
},
{
"epoch": 0.6621737066919792,
"grad_norm": 0.41796875,
"learning_rate": 5.888624209948495e-06,
"loss": 1.1899,
"step": 872
},
{
"epoch": 0.6629330802088277,
"grad_norm": 0.345703125,
"learning_rate": 5.865087036218504e-06,
"loss": 1.1826,
"step": 873
},
{
"epoch": 0.6636924537256763,
"grad_norm": 0.337890625,
"learning_rate": 5.841577461830408e-06,
"loss": 1.1627,
"step": 874
},
{
"epoch": 0.6644518272425249,
"grad_norm": 0.33984375,
"learning_rate": 5.818095643703779e-06,
"loss": 1.1732,
"step": 875
},
{
"epoch": 0.6652112007593736,
"grad_norm": 0.294921875,
"learning_rate": 5.794641738572925e-06,
"loss": 1.1294,
"step": 876
},
{
"epoch": 0.6659705742762221,
"grad_norm": 0.271484375,
"learning_rate": 5.771215902985848e-06,
"loss": 1.1594,
"step": 877
},
{
"epoch": 0.6667299477930707,
"grad_norm": 0.279296875,
"learning_rate": 5.747818293303185e-06,
"loss": 1.1273,
"step": 878
},
{
"epoch": 0.6674893213099193,
"grad_norm": 0.3359375,
"learning_rate": 5.724449065697182e-06,
"loss": 1.1463,
"step": 879
},
{
"epoch": 0.668248694826768,
"grad_norm": 0.333984375,
"learning_rate": 5.701108376150635e-06,
"loss": 1.1557,
"step": 880
},
{
"epoch": 0.6690080683436165,
"grad_norm": 0.35546875,
"learning_rate": 5.677796380455862e-06,
"loss": 1.1537,
"step": 881
},
{
"epoch": 0.6697674418604651,
"grad_norm": 0.30859375,
"learning_rate": 5.654513234213663e-06,
"loss": 1.1203,
"step": 882
},
{
"epoch": 0.6705268153773137,
"grad_norm": 0.33203125,
"learning_rate": 5.631259092832265e-06,
"loss": 1.1744,
"step": 883
},
{
"epoch": 0.6712861888941624,
"grad_norm": 0.35546875,
"learning_rate": 5.608034111526298e-06,
"loss": 1.1531,
"step": 884
},
{
"epoch": 0.6720455624110109,
"grad_norm": 0.37109375,
"learning_rate": 5.584838445315764e-06,
"loss": 1.1989,
"step": 885
},
{
"epoch": 0.6728049359278595,
"grad_norm": 0.39453125,
"learning_rate": 5.561672249024988e-06,
"loss": 1.2282,
"step": 886
},
{
"epoch": 0.6735643094447081,
"grad_norm": 0.36328125,
"learning_rate": 5.538535677281608e-06,
"loss": 1.186,
"step": 887
},
{
"epoch": 0.6743236829615568,
"grad_norm": 0.357421875,
"learning_rate": 5.515428884515495e-06,
"loss": 1.1552,
"step": 888
},
{
"epoch": 0.6750830564784053,
"grad_norm": 0.349609375,
"learning_rate": 5.492352024957781e-06,
"loss": 1.1389,
"step": 889
},
{
"epoch": 0.6758424299952539,
"grad_norm": 0.33984375,
"learning_rate": 5.4693052526397965e-06,
"loss": 1.133,
"step": 890
},
{
"epoch": 0.6766018035121025,
"grad_norm": 0.365234375,
"learning_rate": 5.446288721392048e-06,
"loss": 1.2011,
"step": 891
},
{
"epoch": 0.6773611770289512,
"grad_norm": 0.30859375,
"learning_rate": 5.423302584843186e-06,
"loss": 1.1344,
"step": 892
},
{
"epoch": 0.6781205505457997,
"grad_norm": 0.328125,
"learning_rate": 5.400346996418988e-06,
"loss": 1.161,
"step": 893
},
{
"epoch": 0.6788799240626483,
"grad_norm": 0.2734375,
"learning_rate": 5.377422109341332e-06,
"loss": 1.1067,
"step": 894
},
{
"epoch": 0.6796392975794969,
"grad_norm": 0.306640625,
"learning_rate": 5.354528076627185e-06,
"loss": 1.1321,
"step": 895
},
{
"epoch": 0.6803986710963456,
"grad_norm": 0.37109375,
"learning_rate": 5.331665051087549e-06,
"loss": 1.1952,
"step": 896
},
{
"epoch": 0.6811580446131941,
"grad_norm": 0.267578125,
"learning_rate": 5.308833185326472e-06,
"loss": 1.1063,
"step": 897
},
{
"epoch": 0.6819174181300427,
"grad_norm": 0.357421875,
"learning_rate": 5.286032631740023e-06,
"loss": 1.19,
"step": 898
},
{
"epoch": 0.6826767916468913,
"grad_norm": 0.365234375,
"learning_rate": 5.263263542515273e-06,
"loss": 1.1727,
"step": 899
},
{
"epoch": 0.68343616516374,
"grad_norm": 0.3046875,
"learning_rate": 5.240526069629265e-06,
"loss": 1.172,
"step": 900
},
{
"epoch": 0.6841955386805885,
"grad_norm": 0.357421875,
"learning_rate": 5.217820364848027e-06,
"loss": 1.1787,
"step": 901
},
{
"epoch": 0.6849549121974371,
"grad_norm": 0.390625,
"learning_rate": 5.19514657972553e-06,
"loss": 1.2442,
"step": 902
},
{
"epoch": 0.6857142857142857,
"grad_norm": 0.337890625,
"learning_rate": 5.172504865602701e-06,
"loss": 1.1876,
"step": 903
},
{
"epoch": 0.6864736592311343,
"grad_norm": 0.37109375,
"learning_rate": 5.149895373606405e-06,
"loss": 1.2092,
"step": 904
},
{
"epoch": 0.6872330327479829,
"grad_norm": 0.265625,
"learning_rate": 5.127318254648418e-06,
"loss": 1.1086,
"step": 905
},
{
"epoch": 0.6879924062648315,
"grad_norm": 0.328125,
"learning_rate": 5.104773659424453e-06,
"loss": 1.1276,
"step": 906
},
{
"epoch": 0.6887517797816801,
"grad_norm": 0.369140625,
"learning_rate": 5.082261738413124e-06,
"loss": 1.2118,
"step": 907
},
{
"epoch": 0.6895111532985287,
"grad_norm": 0.33203125,
"learning_rate": 5.059782641874962e-06,
"loss": 1.1634,
"step": 908
},
{
"epoch": 0.6902705268153773,
"grad_norm": 0.33203125,
"learning_rate": 5.037336519851397e-06,
"loss": 1.1525,
"step": 909
},
{
"epoch": 0.6910299003322259,
"grad_norm": 0.306640625,
"learning_rate": 5.014923522163773e-06,
"loss": 1.1586,
"step": 910
},
{
"epoch": 0.6917892738490745,
"grad_norm": 0.318359375,
"learning_rate": 4.992543798412327e-06,
"loss": 1.185,
"step": 911
},
{
"epoch": 0.6925486473659231,
"grad_norm": 0.328125,
"learning_rate": 4.970197497975216e-06,
"loss": 1.1233,
"step": 912
},
{
"epoch": 0.6933080208827717,
"grad_norm": 0.337890625,
"learning_rate": 4.947884770007491e-06,
"loss": 1.1646,
"step": 913
},
{
"epoch": 0.6940673943996203,
"grad_norm": 0.373046875,
"learning_rate": 4.92560576344013e-06,
"loss": 1.1766,
"step": 914
},
{
"epoch": 0.6948267679164689,
"grad_norm": 0.337890625,
"learning_rate": 4.903360626979026e-06,
"loss": 1.1797,
"step": 915
},
{
"epoch": 0.6955861414333175,
"grad_norm": 0.291015625,
"learning_rate": 4.881149509103993e-06,
"loss": 1.1327,
"step": 916
},
{
"epoch": 0.6963455149501662,
"grad_norm": 0.3125,
"learning_rate": 4.858972558067784e-06,
"loss": 1.1353,
"step": 917
},
{
"epoch": 0.6971048884670147,
"grad_norm": 0.33984375,
"learning_rate": 4.836829921895103e-06,
"loss": 1.1603,
"step": 918
},
{
"epoch": 0.6978642619838633,
"grad_norm": 0.3359375,
"learning_rate": 4.814721748381608e-06,
"loss": 1.1768,
"step": 919
},
{
"epoch": 0.6986236355007119,
"grad_norm": 0.349609375,
"learning_rate": 4.7926481850929376e-06,
"loss": 1.1515,
"step": 920
},
{
"epoch": 0.6993830090175606,
"grad_norm": 0.380859375,
"learning_rate": 4.770609379363694e-06,
"loss": 1.2258,
"step": 921
},
{
"epoch": 0.7001423825344091,
"grad_norm": 0.3515625,
"learning_rate": 4.748605478296508e-06,
"loss": 1.1553,
"step": 922
},
{
"epoch": 0.7009017560512577,
"grad_norm": 0.380859375,
"learning_rate": 4.726636628761018e-06,
"loss": 1.1856,
"step": 923
},
{
"epoch": 0.7016611295681063,
"grad_norm": 0.33203125,
"learning_rate": 4.704702977392914e-06,
"loss": 1.172,
"step": 924
},
{
"epoch": 0.702420503084955,
"grad_norm": 0.318359375,
"learning_rate": 4.682804670592937e-06,
"loss": 1.145,
"step": 925
},
{
"epoch": 0.7031798766018035,
"grad_norm": 0.341796875,
"learning_rate": 4.660941854525917e-06,
"loss": 1.1645,
"step": 926
},
{
"epoch": 0.7039392501186521,
"grad_norm": 0.314453125,
"learning_rate": 4.639114675119797e-06,
"loss": 1.1369,
"step": 927
},
{
"epoch": 0.7046986236355007,
"grad_norm": 0.291015625,
"learning_rate": 4.617323278064657e-06,
"loss": 1.1206,
"step": 928
},
{
"epoch": 0.7054579971523494,
"grad_norm": 0.267578125,
"learning_rate": 4.595567808811735e-06,
"loss": 1.1056,
"step": 929
},
{
"epoch": 0.7062173706691979,
"grad_norm": 0.40234375,
"learning_rate": 4.573848412572458e-06,
"loss": 1.1796,
"step": 930
},
{
"epoch": 0.7069767441860465,
"grad_norm": 0.341796875,
"learning_rate": 4.552165234317486e-06,
"loss": 1.1623,
"step": 931
},
{
"epoch": 0.7077361177028951,
"grad_norm": 0.345703125,
"learning_rate": 4.530518418775734e-06,
"loss": 1.1729,
"step": 932
},
{
"epoch": 0.7084954912197438,
"grad_norm": 0.33984375,
"learning_rate": 4.508908110433393e-06,
"loss": 1.1316,
"step": 933
},
{
"epoch": 0.7092548647365923,
"grad_norm": 0.3515625,
"learning_rate": 4.487334453532998e-06,
"loss": 1.198,
"step": 934
},
{
"epoch": 0.7100142382534409,
"grad_norm": 0.369140625,
"learning_rate": 4.465797592072428e-06,
"loss": 1.2132,
"step": 935
},
{
"epoch": 0.7107736117702895,
"grad_norm": 0.341796875,
"learning_rate": 4.444297669803981e-06,
"loss": 1.1731,
"step": 936
},
{
"epoch": 0.7115329852871382,
"grad_norm": 0.298828125,
"learning_rate": 4.422834830233378e-06,
"loss": 1.119,
"step": 937
},
{
"epoch": 0.7122923588039867,
"grad_norm": 0.29296875,
"learning_rate": 4.4014092166188375e-06,
"loss": 1.1435,
"step": 938
},
{
"epoch": 0.7130517323208353,
"grad_norm": 0.3671875,
"learning_rate": 4.3800209719701055e-06,
"loss": 1.1884,
"step": 939
},
{
"epoch": 0.7138111058376839,
"grad_norm": 0.369140625,
"learning_rate": 4.35867023904749e-06,
"loss": 1.1715,
"step": 940
},
{
"epoch": 0.7145704793545326,
"grad_norm": 0.33203125,
"learning_rate": 4.337357160360931e-06,
"loss": 1.1819,
"step": 941
},
{
"epoch": 0.7153298528713811,
"grad_norm": 0.326171875,
"learning_rate": 4.3160818781690286e-06,
"loss": 1.165,
"step": 942
},
{
"epoch": 0.7160892263882297,
"grad_norm": 0.302734375,
"learning_rate": 4.294844534478107e-06,
"loss": 1.0917,
"step": 943
},
{
"epoch": 0.7168485999050783,
"grad_norm": 0.322265625,
"learning_rate": 4.2736452710412645e-06,
"loss": 1.1302,
"step": 944
},
{
"epoch": 0.717607973421927,
"grad_norm": 0.365234375,
"learning_rate": 4.25248422935742e-06,
"loss": 1.1528,
"step": 945
},
{
"epoch": 0.7183673469387755,
"grad_norm": 0.341796875,
"learning_rate": 4.2313615506703685e-06,
"loss": 1.1557,
"step": 946
},
{
"epoch": 0.7191267204556241,
"grad_norm": 0.37109375,
"learning_rate": 4.210277375967855e-06,
"loss": 1.2004,
"step": 947
},
{
"epoch": 0.7198860939724727,
"grad_norm": 0.326171875,
"learning_rate": 4.189231845980618e-06,
"loss": 1.1886,
"step": 948
},
{
"epoch": 0.7206454674893213,
"grad_norm": 0.33984375,
"learning_rate": 4.168225101181449e-06,
"loss": 1.1163,
"step": 949
},
{
"epoch": 0.7214048410061699,
"grad_norm": 0.287109375,
"learning_rate": 4.147257281784257e-06,
"loss": 1.1078,
"step": 950
},
{
"epoch": 0.7221642145230185,
"grad_norm": 0.306640625,
"learning_rate": 4.1263285277431465e-06,
"loss": 1.1385,
"step": 951
},
{
"epoch": 0.7229235880398671,
"grad_norm": 0.3515625,
"learning_rate": 4.105438978751465e-06,
"loss": 1.1829,
"step": 952
},
{
"epoch": 0.7236829615567157,
"grad_norm": 0.31640625,
"learning_rate": 4.084588774240884e-06,
"loss": 1.1458,
"step": 953
},
{
"epoch": 0.7244423350735643,
"grad_norm": 0.31640625,
"learning_rate": 4.063778053380446e-06,
"loss": 1.1388,
"step": 954
},
{
"epoch": 0.7252017085904129,
"grad_norm": 0.3125,
"learning_rate": 4.043006955075667e-06,
"loss": 1.1234,
"step": 955
},
{
"epoch": 0.7259610821072615,
"grad_norm": 0.3359375,
"learning_rate": 4.0222756179675915e-06,
"loss": 1.171,
"step": 956
},
{
"epoch": 0.7267204556241101,
"grad_norm": 0.30078125,
"learning_rate": 4.001584180431869e-06,
"loss": 1.1435,
"step": 957
},
{
"epoch": 0.7274798291409587,
"grad_norm": 0.2578125,
"learning_rate": 3.980932780577827e-06,
"loss": 1.1021,
"step": 958
},
{
"epoch": 0.7282392026578073,
"grad_norm": 0.357421875,
"learning_rate": 3.960321556247552e-06,
"loss": 1.1885,
"step": 959
},
{
"epoch": 0.7289985761746559,
"grad_norm": 0.29296875,
"learning_rate": 3.939750645014977e-06,
"loss": 1.1244,
"step": 960
},
{
"epoch": 0.7297579496915045,
"grad_norm": 0.3125,
"learning_rate": 3.919220184184959e-06,
"loss": 1.1245,
"step": 961
},
{
"epoch": 0.730517323208353,
"grad_norm": 0.314453125,
"learning_rate": 3.898730310792346e-06,
"loss": 1.1353,
"step": 962
},
{
"epoch": 0.7312766967252017,
"grad_norm": 0.29296875,
"learning_rate": 3.878281161601094e-06,
"loss": 1.1653,
"step": 963
},
{
"epoch": 0.7320360702420503,
"grad_norm": 0.294921875,
"learning_rate": 3.857872873103322e-06,
"loss": 1.1238,
"step": 964
},
{
"epoch": 0.7327954437588989,
"grad_norm": 0.380859375,
"learning_rate": 3.837505581518429e-06,
"loss": 1.1952,
"step": 965
},
{
"epoch": 0.7335548172757476,
"grad_norm": 0.380859375,
"learning_rate": 3.8171794227921585e-06,
"loss": 1.2425,
"step": 966
},
{
"epoch": 0.7343141907925961,
"grad_norm": 0.33203125,
"learning_rate": 3.7968945325957175e-06,
"loss": 1.099,
"step": 967
},
{
"epoch": 0.7350735643094447,
"grad_norm": 0.35546875,
"learning_rate": 3.776651046324843e-06,
"loss": 1.151,
"step": 968
},
{
"epoch": 0.7358329378262933,
"grad_norm": 0.287109375,
"learning_rate": 3.7564490990989276e-06,
"loss": 1.1206,
"step": 969
},
{
"epoch": 0.736592311343142,
"grad_norm": 0.302734375,
"learning_rate": 3.7362888257600894e-06,
"loss": 1.1203,
"step": 970
},
{
"epoch": 0.7373516848599905,
"grad_norm": 0.3671875,
"learning_rate": 3.716170360872294e-06,
"loss": 1.19,
"step": 971
},
{
"epoch": 0.7381110583768391,
"grad_norm": 0.365234375,
"learning_rate": 3.69609383872045e-06,
"loss": 1.1872,
"step": 972
},
{
"epoch": 0.7388704318936877,
"grad_norm": 0.32421875,
"learning_rate": 3.676059393309499e-06,
"loss": 1.1264,
"step": 973
},
{
"epoch": 0.7396298054105364,
"grad_norm": 0.392578125,
"learning_rate": 3.6560671583635467e-06,
"loss": 1.1832,
"step": 974
},
{
"epoch": 0.7403891789273849,
"grad_norm": 0.30859375,
"learning_rate": 3.636117267324941e-06,
"loss": 1.1855,
"step": 975
},
{
"epoch": 0.7411485524442335,
"grad_norm": 0.373046875,
"learning_rate": 3.6162098533534095e-06,
"loss": 1.2236,
"step": 976
},
{
"epoch": 0.7419079259610821,
"grad_norm": 0.30078125,
"learning_rate": 3.5963450493251552e-06,
"loss": 1.1248,
"step": 977
},
{
"epoch": 0.7426672994779308,
"grad_norm": 0.283203125,
"learning_rate": 3.576522987831965e-06,
"loss": 1.0895,
"step": 978
},
{
"epoch": 0.7434266729947793,
"grad_norm": 0.322265625,
"learning_rate": 3.5567438011803356e-06,
"loss": 1.1789,
"step": 979
},
{
"epoch": 0.7441860465116279,
"grad_norm": 0.283203125,
"learning_rate": 3.5370076213905904e-06,
"loss": 1.1332,
"step": 980
},
{
"epoch": 0.7449454200284765,
"grad_norm": 0.33203125,
"learning_rate": 3.5173145801959942e-06,
"loss": 1.1575,
"step": 981
},
{
"epoch": 0.7457047935453252,
"grad_norm": 0.29296875,
"learning_rate": 3.4976648090418685e-06,
"loss": 1.1542,
"step": 982
},
{
"epoch": 0.7464641670621737,
"grad_norm": 0.376953125,
"learning_rate": 3.4780584390847193e-06,
"loss": 1.2163,
"step": 983
},
{
"epoch": 0.7472235405790223,
"grad_norm": 0.341796875,
"learning_rate": 3.4584956011913693e-06,
"loss": 1.1658,
"step": 984
},
{
"epoch": 0.7479829140958709,
"grad_norm": 0.3125,
"learning_rate": 3.4389764259380754e-06,
"loss": 1.1344,
"step": 985
},
{
"epoch": 0.7487422876127195,
"grad_norm": 0.3203125,
"learning_rate": 3.4195010436096622e-06,
"loss": 1.1608,
"step": 986
},
{
"epoch": 0.7495016611295681,
"grad_norm": 0.38671875,
"learning_rate": 3.400069584198633e-06,
"loss": 1.2214,
"step": 987
},
{
"epoch": 0.7502610346464167,
"grad_norm": 0.353515625,
"learning_rate": 3.380682177404335e-06,
"loss": 1.1724,
"step": 988
},
{
"epoch": 0.7510204081632653,
"grad_norm": 0.333984375,
"learning_rate": 3.361338952632074e-06,
"loss": 1.1665,
"step": 989
},
{
"epoch": 0.751779781680114,
"grad_norm": 0.375,
"learning_rate": 3.3420400389922535e-06,
"loss": 1.2119,
"step": 990
},
{
"epoch": 0.7525391551969625,
"grad_norm": 0.296875,
"learning_rate": 3.32278556529951e-06,
"loss": 1.1508,
"step": 991
},
{
"epoch": 0.7532985287138111,
"grad_norm": 0.328125,
"learning_rate": 3.3035756600718515e-06,
"loss": 1.1584,
"step": 992
},
{
"epoch": 0.7540579022306597,
"grad_norm": 0.322265625,
"learning_rate": 3.284410451529816e-06,
"loss": 1.1329,
"step": 993
},
{
"epoch": 0.7548172757475083,
"grad_norm": 0.3515625,
"learning_rate": 3.2652900675956e-06,
"loss": 1.1675,
"step": 994
},
{
"epoch": 0.7555766492643569,
"grad_norm": 0.32421875,
"learning_rate": 3.2462146358922033e-06,
"loss": 1.1203,
"step": 995
},
{
"epoch": 0.7563360227812055,
"grad_norm": 0.2890625,
"learning_rate": 3.2271842837425917e-06,
"loss": 1.1085,
"step": 996
},
{
"epoch": 0.7570953962980541,
"grad_norm": 0.29296875,
"learning_rate": 3.208199138168826e-06,
"loss": 1.1281,
"step": 997
},
{
"epoch": 0.7578547698149027,
"grad_norm": 0.375,
"learning_rate": 3.1892593258912407e-06,
"loss": 1.1927,
"step": 998
},
{
"epoch": 0.7586141433317513,
"grad_norm": 0.34375,
"learning_rate": 3.1703649733275697e-06,
"loss": 1.1877,
"step": 999
},
{
"epoch": 0.7593735168485999,
"grad_norm": 0.326171875,
"learning_rate": 3.151516206592128e-06,
"loss": 1.1486,
"step": 1000
},
{
"epoch": 0.7601328903654485,
"grad_norm": 0.373046875,
"learning_rate": 3.132713151494955e-06,
"loss": 1.1856,
"step": 1001
},
{
"epoch": 0.7608922638822971,
"grad_norm": 0.30859375,
"learning_rate": 3.113955933540973e-06,
"loss": 1.1627,
"step": 1002
},
{
"epoch": 0.7616516373991457,
"grad_norm": 0.33203125,
"learning_rate": 3.0952446779291577e-06,
"loss": 1.1441,
"step": 1003
},
{
"epoch": 0.7624110109159943,
"grad_norm": 0.33203125,
"learning_rate": 3.0765795095517026e-06,
"loss": 1.1066,
"step": 1004
},
{
"epoch": 0.7631703844328429,
"grad_norm": 0.341796875,
"learning_rate": 3.0579605529931832e-06,
"loss": 1.1927,
"step": 1005
},
{
"epoch": 0.7639297579496915,
"grad_norm": 0.34375,
"learning_rate": 3.0393879325297136e-06,
"loss": 1.1468,
"step": 1006
},
{
"epoch": 0.76468913146654,
"grad_norm": 0.333984375,
"learning_rate": 3.020861772128145e-06,
"loss": 1.1106,
"step": 1007
},
{
"epoch": 0.7654485049833887,
"grad_norm": 0.326171875,
"learning_rate": 3.0023821954452036e-06,
"loss": 1.1217,
"step": 1008
},
{
"epoch": 0.7662078785002373,
"grad_norm": 0.318359375,
"learning_rate": 2.983949325826696e-06,
"loss": 1.156,
"step": 1009
},
{
"epoch": 0.7669672520170859,
"grad_norm": 0.33984375,
"learning_rate": 2.9655632863066696e-06,
"loss": 1.1315,
"step": 1010
},
{
"epoch": 0.7677266255339346,
"grad_norm": 0.328125,
"learning_rate": 2.9472241996065897e-06,
"loss": 1.1651,
"step": 1011
},
{
"epoch": 0.7684859990507831,
"grad_norm": 0.291015625,
"learning_rate": 2.9289321881345257e-06,
"loss": 1.1209,
"step": 1012
},
{
"epoch": 0.7692453725676317,
"grad_norm": 0.2890625,
"learning_rate": 2.910687373984339e-06,
"loss": 1.1137,
"step": 1013
},
{
"epoch": 0.7700047460844803,
"grad_norm": 0.326171875,
"learning_rate": 2.8924898789348645e-06,
"loss": 1.1695,
"step": 1014
},
{
"epoch": 0.770764119601329,
"grad_norm": 0.33984375,
"learning_rate": 2.874339824449085e-06,
"loss": 1.1603,
"step": 1015
},
{
"epoch": 0.7715234931181775,
"grad_norm": 0.296875,
"learning_rate": 2.856237331673336e-06,
"loss": 1.1263,
"step": 1016
},
{
"epoch": 0.7722828666350261,
"grad_norm": 0.30859375,
"learning_rate": 2.838182521436498e-06,
"loss": 1.1512,
"step": 1017
},
{
"epoch": 0.7730422401518747,
"grad_norm": 0.40234375,
"learning_rate": 2.8201755142491814e-06,
"loss": 1.2103,
"step": 1018
},
{
"epoch": 0.7738016136687234,
"grad_norm": 0.330078125,
"learning_rate": 2.8022164303029186e-06,
"loss": 1.1234,
"step": 1019
},
{
"epoch": 0.7745609871855719,
"grad_norm": 0.296875,
"learning_rate": 2.7843053894693805e-06,
"loss": 1.1291,
"step": 1020
},
{
"epoch": 0.7753203607024205,
"grad_norm": 0.3046875,
"learning_rate": 2.76644251129955e-06,
"loss": 1.1616,
"step": 1021
},
{
"epoch": 0.7760797342192691,
"grad_norm": 0.31640625,
"learning_rate": 2.74862791502295e-06,
"loss": 1.1467,
"step": 1022
},
{
"epoch": 0.7768391077361178,
"grad_norm": 0.314453125,
"learning_rate": 2.7308617195468336e-06,
"loss": 1.1435,
"step": 1023
},
{
"epoch": 0.7775984812529663,
"grad_norm": 0.353515625,
"learning_rate": 2.713144043455388e-06,
"loss": 1.1323,
"step": 1024
},
{
"epoch": 0.7783578547698149,
"grad_norm": 0.322265625,
"learning_rate": 2.695475005008946e-06,
"loss": 1.1765,
"step": 1025
},
{
"epoch": 0.7791172282866635,
"grad_norm": 0.30859375,
"learning_rate": 2.6778547221432063e-06,
"loss": 1.1441,
"step": 1026
},
{
"epoch": 0.7798766018035121,
"grad_norm": 0.31640625,
"learning_rate": 2.660283312468438e-06,
"loss": 1.1428,
"step": 1027
},
{
"epoch": 0.7806359753203607,
"grad_norm": 0.298828125,
"learning_rate": 2.642760893268684e-06,
"loss": 1.1243,
"step": 1028
},
{
"epoch": 0.7813953488372093,
"grad_norm": 0.349609375,
"learning_rate": 2.625287581501006e-06,
"loss": 1.1824,
"step": 1029
},
{
"epoch": 0.7821547223540579,
"grad_norm": 0.359375,
"learning_rate": 2.6078634937946724e-06,
"loss": 1.1663,
"step": 1030
},
{
"epoch": 0.7829140958709065,
"grad_norm": 0.365234375,
"learning_rate": 2.5904887464504115e-06,
"loss": 1.1911,
"step": 1031
},
{
"epoch": 0.7836734693877551,
"grad_norm": 0.359375,
"learning_rate": 2.573163455439601e-06,
"loss": 1.1811,
"step": 1032
},
{
"epoch": 0.7844328429046037,
"grad_norm": 0.37109375,
"learning_rate": 2.5558877364035286e-06,
"loss": 1.2266,
"step": 1033
},
{
"epoch": 0.7851922164214523,
"grad_norm": 0.333984375,
"learning_rate": 2.538661704652595e-06,
"loss": 1.1456,
"step": 1034
},
{
"epoch": 0.7859515899383009,
"grad_norm": 0.33203125,
"learning_rate": 2.521485475165555e-06,
"loss": 1.177,
"step": 1035
},
{
"epoch": 0.7867109634551495,
"grad_norm": 0.341796875,
"learning_rate": 2.504359162588741e-06,
"loss": 1.18,
"step": 1036
},
{
"epoch": 0.7874703369719981,
"grad_norm": 0.345703125,
"learning_rate": 2.4872828812353146e-06,
"loss": 1.1414,
"step": 1037
},
{
"epoch": 0.7882297104888467,
"grad_norm": 0.384765625,
"learning_rate": 2.470256745084488e-06,
"loss": 1.1995,
"step": 1038
},
{
"epoch": 0.7889890840056953,
"grad_norm": 0.349609375,
"learning_rate": 2.4532808677807772e-06,
"loss": 1.1283,
"step": 1039
},
{
"epoch": 0.7897484575225439,
"grad_norm": 0.345703125,
"learning_rate": 2.4363553626332157e-06,
"loss": 1.1844,
"step": 1040
},
{
"epoch": 0.7905078310393925,
"grad_norm": 0.369140625,
"learning_rate": 2.419480342614635e-06,
"loss": 1.1947,
"step": 1041
},
{
"epoch": 0.7912672045562411,
"grad_norm": 0.35546875,
"learning_rate": 2.402655920360889e-06,
"loss": 1.1751,
"step": 1042
},
{
"epoch": 0.7920265780730897,
"grad_norm": 0.365234375,
"learning_rate": 2.385882208170106e-06,
"loss": 1.1976,
"step": 1043
},
{
"epoch": 0.7927859515899383,
"grad_norm": 0.36328125,
"learning_rate": 2.369159318001937e-06,
"loss": 1.1705,
"step": 1044
},
{
"epoch": 0.7935453251067869,
"grad_norm": 0.30078125,
"learning_rate": 2.3524873614768085e-06,
"loss": 1.1149,
"step": 1045
},
{
"epoch": 0.7943046986236355,
"grad_norm": 0.3203125,
"learning_rate": 2.335866449875185e-06,
"loss": 1.1556,
"step": 1046
},
{
"epoch": 0.7950640721404841,
"grad_norm": 0.322265625,
"learning_rate": 2.3192966941368247e-06,
"loss": 1.1266,
"step": 1047
},
{
"epoch": 0.7958234456573327,
"grad_norm": 0.28515625,
"learning_rate": 2.3027782048600247e-06,
"loss": 1.0954,
"step": 1048
},
{
"epoch": 0.7965828191741813,
"grad_norm": 0.310546875,
"learning_rate": 2.2863110923008958e-06,
"loss": 1.1715,
"step": 1049
},
{
"epoch": 0.7973421926910299,
"grad_norm": 0.40234375,
"learning_rate": 2.26989546637263e-06,
"loss": 1.2394,
"step": 1050
},
{
"epoch": 0.7981015662078785,
"grad_norm": 0.37109375,
"learning_rate": 2.2535314366447625e-06,
"loss": 1.1812,
"step": 1051
},
{
"epoch": 0.798860939724727,
"grad_norm": 0.330078125,
"learning_rate": 2.237219112342426e-06,
"loss": 1.146,
"step": 1052
},
{
"epoch": 0.7996203132415757,
"grad_norm": 0.3046875,
"learning_rate": 2.2209586023456495e-06,
"loss": 1.1245,
"step": 1053
},
{
"epoch": 0.8003796867584243,
"grad_norm": 0.3359375,
"learning_rate": 2.2047500151886047e-06,
"loss": 1.1608,
"step": 1054
},
{
"epoch": 0.8011390602752729,
"grad_norm": 0.341796875,
"learning_rate": 2.1885934590589008e-06,
"loss": 1.1919,
"step": 1055
},
{
"epoch": 0.8018984337921214,
"grad_norm": 0.314453125,
"learning_rate": 2.172489041796856e-06,
"loss": 1.1411,
"step": 1056
},
{
"epoch": 0.8026578073089701,
"grad_norm": 0.3203125,
"learning_rate": 2.156436870894767e-06,
"loss": 1.1685,
"step": 1057
},
{
"epoch": 0.8034171808258187,
"grad_norm": 0.341796875,
"learning_rate": 2.140437053496214e-06,
"loss": 1.1709,
"step": 1058
},
{
"epoch": 0.8041765543426673,
"grad_norm": 0.353515625,
"learning_rate": 2.124489696395321e-06,
"loss": 1.1552,
"step": 1059
},
{
"epoch": 0.804935927859516,
"grad_norm": 0.328125,
"learning_rate": 2.1085949060360654e-06,
"loss": 1.1587,
"step": 1060
},
{
"epoch": 0.8056953013763645,
"grad_norm": 0.30859375,
"learning_rate": 2.092752788511546e-06,
"loss": 1.1752,
"step": 1061
},
{
"epoch": 0.8064546748932131,
"grad_norm": 0.3125,
"learning_rate": 2.0769634495632986e-06,
"loss": 1.1594,
"step": 1062
},
{
"epoch": 0.8072140484100617,
"grad_norm": 0.28515625,
"learning_rate": 2.061226994580563e-06,
"loss": 1.1164,
"step": 1063
},
{
"epoch": 0.8079734219269104,
"grad_norm": 0.294921875,
"learning_rate": 2.045543528599607e-06,
"loss": 1.0982,
"step": 1064
},
{
"epoch": 0.8087327954437589,
"grad_norm": 0.33984375,
"learning_rate": 2.0299131563030016e-06,
"loss": 1.1587,
"step": 1065
},
{
"epoch": 0.8094921689606075,
"grad_norm": 0.388671875,
"learning_rate": 2.0143359820189403e-06,
"loss": 1.1613,
"step": 1066
},
{
"epoch": 0.8102515424774561,
"grad_norm": 0.30078125,
"learning_rate": 1.998812109720535e-06,
"loss": 1.1486,
"step": 1067
},
{
"epoch": 0.8110109159943047,
"grad_norm": 0.349609375,
"learning_rate": 1.983341643025117e-06,
"loss": 1.1652,
"step": 1068
},
{
"epoch": 0.8117702895111533,
"grad_norm": 0.31640625,
"learning_rate": 1.967924685193552e-06,
"loss": 1.1593,
"step": 1069
},
{
"epoch": 0.8125296630280019,
"grad_norm": 0.34375,
"learning_rate": 1.952561339129554e-06,
"loss": 1.1904,
"step": 1070
},
{
"epoch": 0.8132890365448505,
"grad_norm": 0.32421875,
"learning_rate": 1.93725170737899e-06,
"loss": 1.151,
"step": 1071
},
{
"epoch": 0.8140484100616991,
"grad_norm": 0.29296875,
"learning_rate": 1.921995892129208e-06,
"loss": 1.1097,
"step": 1072
},
{
"epoch": 0.8148077835785477,
"grad_norm": 0.375,
"learning_rate": 1.906793995208328e-06,
"loss": 1.1875,
"step": 1073
},
{
"epoch": 0.8155671570953963,
"grad_norm": 0.400390625,
"learning_rate": 1.8916461180845968e-06,
"loss": 1.2437,
"step": 1074
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.375,
"learning_rate": 1.8765523618656923e-06,
"loss": 1.1949,
"step": 1075
},
{
"epoch": 0.8170859041290935,
"grad_norm": 0.33203125,
"learning_rate": 1.861512827298051e-06,
"loss": 1.1321,
"step": 1076
},
{
"epoch": 0.8178452776459421,
"grad_norm": 0.328125,
"learning_rate": 1.8465276147661905e-06,
"loss": 1.1811,
"step": 1077
},
{
"epoch": 0.8186046511627907,
"grad_norm": 0.35546875,
"learning_rate": 1.8315968242920446e-06,
"loss": 1.2074,
"step": 1078
},
{
"epoch": 0.8193640246796393,
"grad_norm": 0.345703125,
"learning_rate": 1.8167205555343027e-06,
"loss": 1.1378,
"step": 1079
},
{
"epoch": 0.8201233981964879,
"grad_norm": 0.314453125,
"learning_rate": 1.8018989077877368e-06,
"loss": 1.1401,
"step": 1080
},
{
"epoch": 0.8208827717133365,
"grad_norm": 0.3203125,
"learning_rate": 1.7871319799825316e-06,
"loss": 1.1455,
"step": 1081
},
{
"epoch": 0.8216421452301851,
"grad_norm": 0.365234375,
"learning_rate": 1.7724198706836372e-06,
"loss": 1.1678,
"step": 1082
},
{
"epoch": 0.8224015187470337,
"grad_norm": 0.447265625,
"learning_rate": 1.757762678090107e-06,
"loss": 1.1541,
"step": 1083
},
{
"epoch": 0.8231608922638823,
"grad_norm": 0.365234375,
"learning_rate": 1.743160500034443e-06,
"loss": 1.1924,
"step": 1084
},
{
"epoch": 0.8239202657807309,
"grad_norm": 0.30859375,
"learning_rate": 1.7286134339819337e-06,
"loss": 1.1414,
"step": 1085
},
{
"epoch": 0.8246796392975795,
"grad_norm": 0.322265625,
"learning_rate": 1.7141215770300202e-06,
"loss": 1.1341,
"step": 1086
},
{
"epoch": 0.8254390128144281,
"grad_norm": 0.359375,
"learning_rate": 1.6996850259076303e-06,
"loss": 1.1874,
"step": 1087
},
{
"epoch": 0.8261983863312767,
"grad_norm": 0.3359375,
"learning_rate": 1.6853038769745466e-06,
"loss": 1.1982,
"step": 1088
},
{
"epoch": 0.8269577598481253,
"grad_norm": 0.369140625,
"learning_rate": 1.670978226220762e-06,
"loss": 1.2065,
"step": 1089
},
{
"epoch": 0.8277171333649739,
"grad_norm": 0.322265625,
"learning_rate": 1.6567081692658238e-06,
"loss": 1.148,
"step": 1090
},
{
"epoch": 0.8284765068818225,
"grad_norm": 0.3046875,
"learning_rate": 1.642493801358218e-06,
"loss": 1.1179,
"step": 1091
},
{
"epoch": 0.8292358803986711,
"grad_norm": 0.3359375,
"learning_rate": 1.6283352173747148e-06,
"loss": 1.1411,
"step": 1092
},
{
"epoch": 0.8299952539155196,
"grad_norm": 0.369140625,
"learning_rate": 1.6142325118197488e-06,
"loss": 1.1431,
"step": 1093
},
{
"epoch": 0.8307546274323683,
"grad_norm": 0.32421875,
"learning_rate": 1.6001857788247755e-06,
"loss": 1.1494,
"step": 1094
},
{
"epoch": 0.8315140009492169,
"grad_norm": 0.365234375,
"learning_rate": 1.5861951121476571e-06,
"loss": 1.1864,
"step": 1095
},
{
"epoch": 0.8322733744660655,
"grad_norm": 0.26171875,
"learning_rate": 1.5722606051720268e-06,
"loss": 1.1363,
"step": 1096
},
{
"epoch": 0.833032747982914,
"grad_norm": 0.322265625,
"learning_rate": 1.5583823509066665e-06,
"loss": 1.1366,
"step": 1097
},
{
"epoch": 0.8337921214997627,
"grad_norm": 0.275390625,
"learning_rate": 1.5445604419848858e-06,
"loss": 1.1422,
"step": 1098
},
{
"epoch": 0.8345514950166113,
"grad_norm": 0.380859375,
"learning_rate": 1.5307949706639114e-06,
"loss": 1.1861,
"step": 1099
},
{
"epoch": 0.8353108685334599,
"grad_norm": 0.408203125,
"learning_rate": 1.5170860288242638e-06,
"loss": 1.1732,
"step": 1100
},
{
"epoch": 0.8360702420503084,
"grad_norm": 0.349609375,
"learning_rate": 1.503433707969142e-06,
"loss": 1.1638,
"step": 1101
},
{
"epoch": 0.8368296155671571,
"grad_norm": 0.310546875,
"learning_rate": 1.489838099223816e-06,
"loss": 1.1235,
"step": 1102
},
{
"epoch": 0.8375889890840057,
"grad_norm": 0.318359375,
"learning_rate": 1.476299293335024e-06,
"loss": 1.1356,
"step": 1103
},
{
"epoch": 0.8383483626008543,
"grad_norm": 0.27734375,
"learning_rate": 1.4628173806703594e-06,
"loss": 1.1142,
"step": 1104
},
{
"epoch": 0.8391077361177028,
"grad_norm": 0.30859375,
"learning_rate": 1.4493924512176748e-06,
"loss": 1.1373,
"step": 1105
},
{
"epoch": 0.8398671096345515,
"grad_norm": 0.40625,
"learning_rate": 1.436024594584461e-06,
"loss": 1.2117,
"step": 1106
},
{
"epoch": 0.8406264831514001,
"grad_norm": 0.248046875,
"learning_rate": 1.4227138999972801e-06,
"loss": 1.077,
"step": 1107
},
{
"epoch": 0.8413858566682487,
"grad_norm": 0.353515625,
"learning_rate": 1.409460456301147e-06,
"loss": 1.1294,
"step": 1108
},
{
"epoch": 0.8421452301850973,
"grad_norm": 0.31640625,
"learning_rate": 1.3962643519589502e-06,
"loss": 1.1354,
"step": 1109
},
{
"epoch": 0.8429046037019459,
"grad_norm": 0.412109375,
"learning_rate": 1.3831256750508449e-06,
"loss": 1.1973,
"step": 1110
},
{
"epoch": 0.8436639772187945,
"grad_norm": 0.30078125,
"learning_rate": 1.3700445132736795e-06,
"loss": 1.1396,
"step": 1111
},
{
"epoch": 0.8444233507356431,
"grad_norm": 0.302734375,
"learning_rate": 1.3570209539404067e-06,
"loss": 1.1354,
"step": 1112
},
{
"epoch": 0.8451827242524917,
"grad_norm": 0.322265625,
"learning_rate": 1.3440550839795008e-06,
"loss": 1.1847,
"step": 1113
},
{
"epoch": 0.8459420977693403,
"grad_norm": 0.306640625,
"learning_rate": 1.3311469899343698e-06,
"loss": 1.1425,
"step": 1114
},
{
"epoch": 0.8467014712861889,
"grad_norm": 0.298828125,
"learning_rate": 1.3182967579627948e-06,
"loss": 1.1266,
"step": 1115
},
{
"epoch": 0.8474608448030375,
"grad_norm": 0.318359375,
"learning_rate": 1.305504473836331e-06,
"loss": 1.1409,
"step": 1116
},
{
"epoch": 0.8482202183198861,
"grad_norm": 0.341796875,
"learning_rate": 1.2927702229397633e-06,
"loss": 1.1686,
"step": 1117
},
{
"epoch": 0.8489795918367347,
"grad_norm": 0.36328125,
"learning_rate": 1.2800940902705072e-06,
"loss": 1.1655,
"step": 1118
},
{
"epoch": 0.8497389653535833,
"grad_norm": 0.322265625,
"learning_rate": 1.2674761604380692e-06,
"loss": 1.1476,
"step": 1119
},
{
"epoch": 0.8504983388704319,
"grad_norm": 0.388671875,
"learning_rate": 1.2549165176634582e-06,
"loss": 1.2241,
"step": 1120
},
{
"epoch": 0.8512577123872805,
"grad_norm": 0.3203125,
"learning_rate": 1.2424152457786408e-06,
"loss": 1.1283,
"step": 1121
},
{
"epoch": 0.8520170859041291,
"grad_norm": 0.330078125,
"learning_rate": 1.2299724282259685e-06,
"loss": 1.1519,
"step": 1122
},
{
"epoch": 0.8527764594209777,
"grad_norm": 0.31640625,
"learning_rate": 1.2175881480576347e-06,
"loss": 1.1268,
"step": 1123
},
{
"epoch": 0.8535358329378263,
"grad_norm": 0.30859375,
"learning_rate": 1.2052624879351105e-06,
"loss": 1.0941,
"step": 1124
},
{
"epoch": 0.8542952064546749,
"grad_norm": 0.3359375,
"learning_rate": 1.1929955301285889e-06,
"loss": 1.1533,
"step": 1125
},
{
"epoch": 0.8550545799715235,
"grad_norm": 0.365234375,
"learning_rate": 1.1807873565164507e-06,
"loss": 1.1927,
"step": 1126
},
{
"epoch": 0.8558139534883721,
"grad_norm": 0.361328125,
"learning_rate": 1.1686380485847027e-06,
"loss": 1.1902,
"step": 1127
},
{
"epoch": 0.8565733270052207,
"grad_norm": 0.287109375,
"learning_rate": 1.1565476874264448e-06,
"loss": 1.1152,
"step": 1128
},
{
"epoch": 0.8573327005220693,
"grad_norm": 0.330078125,
"learning_rate": 1.144516353741324e-06,
"loss": 1.1328,
"step": 1129
},
{
"epoch": 0.8580920740389179,
"grad_norm": 0.333984375,
"learning_rate": 1.1325441278349935e-06,
"loss": 1.1626,
"step": 1130
},
{
"epoch": 0.8588514475557665,
"grad_norm": 0.3671875,
"learning_rate": 1.120631089618579e-06,
"loss": 1.1927,
"step": 1131
},
{
"epoch": 0.8596108210726151,
"grad_norm": 0.365234375,
"learning_rate": 1.1087773186081474e-06,
"loss": 1.2139,
"step": 1132
},
{
"epoch": 0.8603701945894637,
"grad_norm": 0.33984375,
"learning_rate": 1.0969828939241779e-06,
"loss": 1.1491,
"step": 1133
},
{
"epoch": 0.8611295681063122,
"grad_norm": 0.341796875,
"learning_rate": 1.0852478942910228e-06,
"loss": 1.156,
"step": 1134
},
{
"epoch": 0.8618889416231609,
"grad_norm": 0.33984375,
"learning_rate": 1.0735723980363921e-06,
"loss": 1.1736,
"step": 1135
},
{
"epoch": 0.8626483151400095,
"grad_norm": 0.365234375,
"learning_rate": 1.0619564830908303e-06,
"loss": 1.1818,
"step": 1136
},
{
"epoch": 0.8634076886568581,
"grad_norm": 0.3515625,
"learning_rate": 1.0504002269871927e-06,
"loss": 1.1886,
"step": 1137
},
{
"epoch": 0.8641670621737066,
"grad_norm": 0.357421875,
"learning_rate": 1.0389037068601325e-06,
"loss": 1.2172,
"step": 1138
},
{
"epoch": 0.8649264356905553,
"grad_norm": 0.302734375,
"learning_rate": 1.027466999445572e-06,
"loss": 1.1286,
"step": 1139
},
{
"epoch": 0.8656858092074039,
"grad_norm": 0.32421875,
"learning_rate": 1.0160901810802114e-06,
"loss": 1.1688,
"step": 1140
},
{
"epoch": 0.8664451827242525,
"grad_norm": 0.36328125,
"learning_rate": 1.0047733277010064e-06,
"loss": 1.2127,
"step": 1141
},
{
"epoch": 0.867204556241101,
"grad_norm": 0.35546875,
"learning_rate": 9.935165148446658e-07,
"loss": 1.1628,
"step": 1142
},
{
"epoch": 0.8679639297579497,
"grad_norm": 0.30859375,
"learning_rate": 9.823198176471381e-07,
"loss": 1.1454,
"step": 1143
},
{
"epoch": 0.8687233032747983,
"grad_norm": 0.306640625,
"learning_rate": 9.711833108431234e-07,
"loss": 1.1546,
"step": 1144
},
{
"epoch": 0.8694826767916469,
"grad_norm": 0.34765625,
"learning_rate": 9.601070687655667e-07,
"loss": 1.1958,
"step": 1145
},
{
"epoch": 0.8702420503084954,
"grad_norm": 0.30859375,
"learning_rate": 9.490911653451651e-07,
"loss": 1.1511,
"step": 1146
},
{
"epoch": 0.8710014238253441,
"grad_norm": 0.3125,
"learning_rate": 9.381356741098702e-07,
"loss": 1.148,
"step": 1147
},
{
"epoch": 0.8717607973421927,
"grad_norm": 0.328125,
"learning_rate": 9.272406681844015e-07,
"loss": 1.1383,
"step": 1148
},
{
"epoch": 0.8725201708590413,
"grad_norm": 0.345703125,
"learning_rate": 9.164062202897539e-07,
"loss": 1.137,
"step": 1149
},
{
"epoch": 0.8732795443758898,
"grad_norm": 0.33984375,
"learning_rate": 9.05632402742721e-07,
"loss": 1.1381,
"step": 1150
},
{
"epoch": 0.8740389178927385,
"grad_norm": 0.365234375,
"learning_rate": 8.949192874553991e-07,
"loss": 1.1854,
"step": 1151
},
{
"epoch": 0.8747982914095871,
"grad_norm": 0.42578125,
"learning_rate": 8.842669459347186e-07,
"loss": 1.199,
"step": 1152
},
{
"epoch": 0.8755576649264357,
"grad_norm": 0.35546875,
"learning_rate": 8.736754492819655e-07,
"loss": 1.1787,
"step": 1153
},
{
"epoch": 0.8763170384432842,
"grad_norm": 0.36328125,
"learning_rate": 8.631448681922994e-07,
"loss": 1.1742,
"step": 1154
},
{
"epoch": 0.8770764119601329,
"grad_norm": 0.3359375,
"learning_rate": 8.526752729542831e-07,
"loss": 1.1326,
"step": 1155
},
{
"epoch": 0.8778357854769815,
"grad_norm": 0.365234375,
"learning_rate": 8.42266733449425e-07,
"loss": 1.1984,
"step": 1156
},
{
"epoch": 0.8785951589938301,
"grad_norm": 0.296875,
"learning_rate": 8.319193191517016e-07,
"loss": 1.1403,
"step": 1157
},
{
"epoch": 0.8793545325106787,
"grad_norm": 0.333984375,
"learning_rate": 8.216330991270916e-07,
"loss": 1.1532,
"step": 1158
},
{
"epoch": 0.8801139060275273,
"grad_norm": 0.283203125,
"learning_rate": 8.114081420331266e-07,
"loss": 1.1398,
"step": 1159
},
{
"epoch": 0.8808732795443759,
"grad_norm": 0.283203125,
"learning_rate": 8.012445161184179e-07,
"loss": 1.1201,
"step": 1160
},
{
"epoch": 0.8816326530612245,
"grad_norm": 0.306640625,
"learning_rate": 7.911422892222165e-07,
"loss": 1.1367,
"step": 1161
},
{
"epoch": 0.8823920265780731,
"grad_norm": 0.36328125,
"learning_rate": 7.81101528773951e-07,
"loss": 1.1888,
"step": 1162
},
{
"epoch": 0.8831514000949217,
"grad_norm": 0.373046875,
"learning_rate": 7.711223017927783e-07,
"loss": 1.1283,
"step": 1163
},
{
"epoch": 0.8839107736117703,
"grad_norm": 0.298828125,
"learning_rate": 7.612046748871327e-07,
"loss": 1.114,
"step": 1164
},
{
"epoch": 0.8846701471286189,
"grad_norm": 0.330078125,
"learning_rate": 7.513487142542941e-07,
"loss": 1.1995,
"step": 1165
},
{
"epoch": 0.8854295206454675,
"grad_norm": 0.302734375,
"learning_rate": 7.415544856799362e-07,
"loss": 1.1137,
"step": 1166
},
{
"epoch": 0.886188894162316,
"grad_norm": 0.33984375,
"learning_rate": 7.318220545376842e-07,
"loss": 1.1919,
"step": 1167
},
{
"epoch": 0.8869482676791647,
"grad_norm": 0.298828125,
"learning_rate": 7.221514857886857e-07,
"loss": 1.1217,
"step": 1168
},
{
"epoch": 0.8877076411960133,
"grad_norm": 0.3046875,
"learning_rate": 7.125428439811765e-07,
"loss": 1.1266,
"step": 1169
},
{
"epoch": 0.8884670147128619,
"grad_norm": 0.318359375,
"learning_rate": 7.029961932500506e-07,
"loss": 1.159,
"step": 1170
},
{
"epoch": 0.8892263882297105,
"grad_norm": 0.33984375,
"learning_rate": 6.935115973164208e-07,
"loss": 1.1782,
"step": 1171
},
{
"epoch": 0.8899857617465591,
"grad_norm": 0.3046875,
"learning_rate": 6.840891194872112e-07,
"loss": 1.109,
"step": 1172
},
{
"epoch": 0.8907451352634077,
"grad_norm": 0.341796875,
"learning_rate": 6.7472882265472e-07,
"loss": 1.2068,
"step": 1173
},
{
"epoch": 0.8915045087802563,
"grad_norm": 0.296875,
"learning_rate": 6.65430769296207e-07,
"loss": 1.1619,
"step": 1174
},
{
"epoch": 0.8922638822971048,
"grad_norm": 0.306640625,
"learning_rate": 6.56195021473478e-07,
"loss": 1.1534,
"step": 1175
},
{
"epoch": 0.8930232558139535,
"grad_norm": 0.384765625,
"learning_rate": 6.470216408324626e-07,
"loss": 1.1999,
"step": 1176
},
{
"epoch": 0.8937826293308021,
"grad_norm": 0.3046875,
"learning_rate": 6.379106886028086e-07,
"loss": 1.1417,
"step": 1177
},
{
"epoch": 0.8945420028476507,
"grad_norm": 0.328125,
"learning_rate": 6.288622255974741e-07,
"loss": 1.1552,
"step": 1178
},
{
"epoch": 0.8953013763644992,
"grad_norm": 0.341796875,
"learning_rate": 6.198763122123208e-07,
"loss": 1.1639,
"step": 1179
},
{
"epoch": 0.8960607498813479,
"grad_norm": 0.2890625,
"learning_rate": 6.109530084257043e-07,
"loss": 1.1234,
"step": 1180
},
{
"epoch": 0.8968201233981965,
"grad_norm": 0.353515625,
"learning_rate": 6.020923737980877e-07,
"loss": 1.1633,
"step": 1181
},
{
"epoch": 0.8975794969150451,
"grad_norm": 0.318359375,
"learning_rate": 5.932944674716279e-07,
"loss": 1.1606,
"step": 1182
},
{
"epoch": 0.8983388704318936,
"grad_norm": 0.322265625,
"learning_rate": 5.845593481697931e-07,
"loss": 1.1113,
"step": 1183
},
{
"epoch": 0.8990982439487423,
"grad_norm": 0.341796875,
"learning_rate": 5.758870741969635e-07,
"loss": 1.1429,
"step": 1184
},
{
"epoch": 0.8998576174655909,
"grad_norm": 0.306640625,
"learning_rate": 5.672777034380483e-07,
"loss": 1.1521,
"step": 1185
},
{
"epoch": 0.9006169909824395,
"grad_norm": 0.30078125,
"learning_rate": 5.587312933580946e-07,
"loss": 1.1341,
"step": 1186
},
{
"epoch": 0.901376364499288,
"grad_norm": 0.318359375,
"learning_rate": 5.502479010019046e-07,
"loss": 1.143,
"step": 1187
},
{
"epoch": 0.9021357380161367,
"grad_norm": 0.337890625,
"learning_rate": 5.418275829936537e-07,
"loss": 1.1586,
"step": 1188
},
{
"epoch": 0.9028951115329853,
"grad_norm": 0.33203125,
"learning_rate": 5.334703955365183e-07,
"loss": 1.1349,
"step": 1189
},
{
"epoch": 0.9036544850498339,
"grad_norm": 0.3671875,
"learning_rate": 5.251763944122956e-07,
"loss": 1.2187,
"step": 1190
},
{
"epoch": 0.9044138585666824,
"grad_norm": 0.349609375,
"learning_rate": 5.169456349810342e-07,
"loss": 1.2073,
"step": 1191
},
{
"epoch": 0.9051732320835311,
"grad_norm": 0.369140625,
"learning_rate": 5.087781721806539e-07,
"loss": 1.162,
"step": 1192
},
{
"epoch": 0.9059326056003797,
"grad_norm": 0.36328125,
"learning_rate": 5.00674060526598e-07,
"loss": 1.1938,
"step": 1193
},
{
"epoch": 0.9066919791172283,
"grad_norm": 0.345703125,
"learning_rate": 4.926333541114558e-07,
"loss": 1.1564,
"step": 1194
},
{
"epoch": 0.9074513526340768,
"grad_norm": 0.412109375,
"learning_rate": 4.846561066046063e-07,
"loss": 1.2107,
"step": 1195
},
{
"epoch": 0.9082107261509255,
"grad_norm": 0.380859375,
"learning_rate": 4.7674237125185597e-07,
"loss": 1.2019,
"step": 1196
},
{
"epoch": 0.9089700996677741,
"grad_norm": 0.37109375,
"learning_rate": 4.6889220087508514e-07,
"loss": 1.1731,
"step": 1197
},
{
"epoch": 0.9097294731846227,
"grad_norm": 0.341796875,
"learning_rate": 4.611056478719023e-07,
"loss": 1.1591,
"step": 1198
},
{
"epoch": 0.9104888467014712,
"grad_norm": 0.349609375,
"learning_rate": 4.5338276421528435e-07,
"loss": 1.1698,
"step": 1199
},
{
"epoch": 0.9112482202183199,
"grad_norm": 0.3359375,
"learning_rate": 4.45723601453234e-07,
"loss": 1.179,
"step": 1200
},
{
"epoch": 0.9120075937351685,
"grad_norm": 0.35546875,
"learning_rate": 4.3812821070843394e-07,
"loss": 1.1383,
"step": 1201
},
{
"epoch": 0.9127669672520171,
"grad_norm": 0.326171875,
"learning_rate": 4.305966426779118e-07,
"loss": 1.118,
"step": 1202
},
{
"epoch": 0.9135263407688657,
"grad_norm": 0.267578125,
"learning_rate": 4.2312894763269385e-07,
"loss": 1.1147,
"step": 1203
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.39453125,
"learning_rate": 4.1572517541747294e-07,
"loss": 1.2228,
"step": 1204
},
{
"epoch": 0.9150450878025629,
"grad_norm": 0.326171875,
"learning_rate": 4.0838537545027755e-07,
"loss": 1.144,
"step": 1205
},
{
"epoch": 0.9158044613194115,
"grad_norm": 0.32421875,
"learning_rate": 4.0110959672213676e-07,
"loss": 1.1403,
"step": 1206
},
{
"epoch": 0.9165638348362601,
"grad_norm": 0.341796875,
"learning_rate": 3.9389788779675806e-07,
"loss": 1.1552,
"step": 1207
},
{
"epoch": 0.9173232083531087,
"grad_norm": 0.349609375,
"learning_rate": 3.867502968102055e-07,
"loss": 1.1785,
"step": 1208
},
{
"epoch": 0.9180825818699573,
"grad_norm": 0.357421875,
"learning_rate": 3.7966687147056533e-07,
"loss": 1.1487,
"step": 1209
},
{
"epoch": 0.9188419553868059,
"grad_norm": 0.3125,
"learning_rate": 3.7264765905764776e-07,
"loss": 1.1304,
"step": 1210
},
{
"epoch": 0.9196013289036545,
"grad_norm": 0.28515625,
"learning_rate": 3.656927064226512e-07,
"loss": 1.1109,
"step": 1211
},
{
"epoch": 0.920360702420503,
"grad_norm": 0.298828125,
"learning_rate": 3.588020599878639e-07,
"loss": 1.148,
"step": 1212
},
{
"epoch": 0.9211200759373517,
"grad_norm": 0.33984375,
"learning_rate": 3.519757657463474e-07,
"loss": 1.1745,
"step": 1213
},
{
"epoch": 0.9218794494542003,
"grad_norm": 0.34765625,
"learning_rate": 3.4521386926163134e-07,
"loss": 1.1452,
"step": 1214
},
{
"epoch": 0.9226388229710489,
"grad_norm": 0.330078125,
"learning_rate": 3.3851641566740813e-07,
"loss": 1.1598,
"step": 1215
},
{
"epoch": 0.9233981964878974,
"grad_norm": 0.365234375,
"learning_rate": 3.3188344966723516e-07,
"loss": 1.1889,
"step": 1216
},
{
"epoch": 0.9241575700047461,
"grad_norm": 0.345703125,
"learning_rate": 3.2531501553422884e-07,
"loss": 1.1822,
"step": 1217
},
{
"epoch": 0.9249169435215947,
"grad_norm": 0.318359375,
"learning_rate": 3.1881115711077994e-07,
"loss": 1.1675,
"step": 1218
},
{
"epoch": 0.9256763170384433,
"grad_norm": 0.294921875,
"learning_rate": 3.123719178082529e-07,
"loss": 1.1539,
"step": 1219
},
{
"epoch": 0.9264356905552918,
"grad_norm": 0.3671875,
"learning_rate": 3.059973406066963e-07,
"loss": 1.1554,
"step": 1220
},
{
"epoch": 0.9271950640721405,
"grad_norm": 0.361328125,
"learning_rate": 2.996874680545603e-07,
"loss": 1.1506,
"step": 1221
},
{
"epoch": 0.9279544375889891,
"grad_norm": 0.34765625,
"learning_rate": 2.9344234226840964e-07,
"loss": 1.167,
"step": 1222
},
{
"epoch": 0.9287138111058377,
"grad_norm": 0.302734375,
"learning_rate": 2.872620049326436e-07,
"loss": 1.1533,
"step": 1223
},
{
"epoch": 0.9294731846226862,
"grad_norm": 0.345703125,
"learning_rate": 2.811464972992195e-07,
"loss": 1.1686,
"step": 1224
},
{
"epoch": 0.9302325581395349,
"grad_norm": 0.361328125,
"learning_rate": 2.7509586018736764e-07,
"loss": 1.1638,
"step": 1225
},
{
"epoch": 0.9309919316563835,
"grad_norm": 0.361328125,
"learning_rate": 2.6911013398333464e-07,
"loss": 1.1969,
"step": 1226
},
{
"epoch": 0.9317513051732321,
"grad_norm": 0.333984375,
"learning_rate": 2.6318935864010133e-07,
"loss": 1.1527,
"step": 1227
},
{
"epoch": 0.9325106786900806,
"grad_norm": 0.357421875,
"learning_rate": 2.573335736771254e-07,
"loss": 1.1725,
"step": 1228
},
{
"epoch": 0.9332700522069293,
"grad_norm": 0.259765625,
"learning_rate": 2.51542818180065e-07,
"loss": 1.0826,
"step": 1229
},
{
"epoch": 0.9340294257237779,
"grad_norm": 0.314453125,
"learning_rate": 2.458171308005308e-07,
"loss": 1.1372,
"step": 1230
},
{
"epoch": 0.9347887992406265,
"grad_norm": 0.29296875,
"learning_rate": 2.4015654975582225e-07,
"loss": 1.1359,
"step": 1231
},
{
"epoch": 0.935548172757475,
"grad_norm": 0.294921875,
"learning_rate": 2.3456111282867178e-07,
"loss": 1.1214,
"step": 1232
},
{
"epoch": 0.9363075462743237,
"grad_norm": 0.28125,
"learning_rate": 2.2903085736699414e-07,
"loss": 1.0865,
"step": 1233
},
{
"epoch": 0.9370669197911723,
"grad_norm": 0.3828125,
"learning_rate": 2.2356582028363548e-07,
"loss": 1.1849,
"step": 1234
},
{
"epoch": 0.9378262933080209,
"grad_norm": 0.28125,
"learning_rate": 2.1816603805613012e-07,
"loss": 1.137,
"step": 1235
},
{
"epoch": 0.9385856668248694,
"grad_norm": 0.30859375,
"learning_rate": 2.1283154672645522e-07,
"loss": 1.1179,
"step": 1236
},
{
"epoch": 0.9393450403417181,
"grad_norm": 0.333984375,
"learning_rate": 2.0756238190078991e-07,
"loss": 1.1576,
"step": 1237
},
{
"epoch": 0.9401044138585667,
"grad_norm": 0.3359375,
"learning_rate": 2.0235857874927655e-07,
"loss": 1.1685,
"step": 1238
},
{
"epoch": 0.9408637873754153,
"grad_norm": 0.359375,
"learning_rate": 1.9722017200578757e-07,
"loss": 1.167,
"step": 1239
},
{
"epoch": 0.9416231608922638,
"grad_norm": 0.302734375,
"learning_rate": 1.921471959676957e-07,
"loss": 1.0967,
"step": 1240
},
{
"epoch": 0.9423825344091125,
"grad_norm": 0.35546875,
"learning_rate": 1.8713968449564079e-07,
"loss": 1.185,
"step": 1241
},
{
"epoch": 0.9431419079259611,
"grad_norm": 0.265625,
"learning_rate": 1.8219767101330442e-07,
"loss": 1.1248,
"step": 1242
},
{
"epoch": 0.9439012814428097,
"grad_norm": 0.353515625,
"learning_rate": 1.7732118850719237e-07,
"loss": 1.1056,
"step": 1243
},
{
"epoch": 0.9446606549596582,
"grad_norm": 0.322265625,
"learning_rate": 1.7251026952640583e-07,
"loss": 1.1053,
"step": 1244
},
{
"epoch": 0.9454200284765069,
"grad_norm": 0.328125,
"learning_rate": 1.6776494618243156e-07,
"loss": 1.1511,
"step": 1245
},
{
"epoch": 0.9461794019933555,
"grad_norm": 0.341796875,
"learning_rate": 1.6308525014892217e-07,
"loss": 1.1568,
"step": 1246
},
{
"epoch": 0.9469387755102041,
"grad_norm": 0.357421875,
"learning_rate": 1.5847121266148847e-07,
"loss": 1.1354,
"step": 1247
},
{
"epoch": 0.9476981490270526,
"grad_norm": 0.345703125,
"learning_rate": 1.539228645174895e-07,
"loss": 1.2015,
"step": 1248
},
{
"epoch": 0.9484575225439013,
"grad_norm": 0.29296875,
"learning_rate": 1.4944023607582737e-07,
"loss": 1.1045,
"step": 1249
},
{
"epoch": 0.9492168960607499,
"grad_norm": 0.318359375,
"learning_rate": 1.4502335725674165e-07,
"loss": 1.1576,
"step": 1250
},
{
"epoch": 0.9499762695775985,
"grad_norm": 0.32421875,
"learning_rate": 1.406722575416164e-07,
"loss": 1.1525,
"step": 1251
},
{
"epoch": 0.9507356430944471,
"grad_norm": 0.384765625,
"learning_rate": 1.3638696597277678e-07,
"loss": 1.1828,
"step": 1252
},
{
"epoch": 0.9514950166112957,
"grad_norm": 0.322265625,
"learning_rate": 1.3216751115329718e-07,
"loss": 1.1428,
"step": 1253
},
{
"epoch": 0.9522543901281443,
"grad_norm": 0.294921875,
"learning_rate": 1.2801392124681233e-07,
"loss": 1.1528,
"step": 1254
},
{
"epoch": 0.9530137636449929,
"grad_norm": 0.318359375,
"learning_rate": 1.2392622397732756e-07,
"loss": 1.1491,
"step": 1255
},
{
"epoch": 0.9537731371618415,
"grad_norm": 0.326171875,
"learning_rate": 1.1990444662903445e-07,
"loss": 1.2012,
"step": 1256
},
{
"epoch": 0.95453251067869,
"grad_norm": 0.275390625,
"learning_rate": 1.159486160461265e-07,
"loss": 1.1128,
"step": 1257
},
{
"epoch": 0.9552918841955387,
"grad_norm": 0.33203125,
"learning_rate": 1.1205875863262272e-07,
"loss": 1.1725,
"step": 1258
},
{
"epoch": 0.9560512577123873,
"grad_norm": 0.359375,
"learning_rate": 1.0823490035218986e-07,
"loss": 1.1942,
"step": 1259
},
{
"epoch": 0.9568106312292359,
"grad_norm": 0.33984375,
"learning_rate": 1.0447706672797264e-07,
"loss": 1.1906,
"step": 1260
},
{
"epoch": 0.9575700047460844,
"grad_norm": 0.3671875,
"learning_rate": 1.0078528284241606e-07,
"loss": 1.1831,
"step": 1261
},
{
"epoch": 0.9583293782629331,
"grad_norm": 0.388671875,
"learning_rate": 9.715957333710447e-08,
"loss": 1.1504,
"step": 1262
},
{
"epoch": 0.9590887517797817,
"grad_norm": 0.322265625,
"learning_rate": 9.359996241259384e-08,
"loss": 1.1406,
"step": 1263
},
{
"epoch": 0.9598481252966303,
"grad_norm": 0.330078125,
"learning_rate": 9.010647382825421e-08,
"loss": 1.1464,
"step": 1264
},
{
"epoch": 0.9606074988134788,
"grad_norm": 0.341796875,
"learning_rate": 8.667913090210534e-08,
"loss": 1.1418,
"step": 1265
},
{
"epoch": 0.9613668723303275,
"grad_norm": 0.40234375,
"learning_rate": 8.331795651066455e-08,
"loss": 1.1785,
"step": 1266
},
{
"epoch": 0.9621262458471761,
"grad_norm": 0.333984375,
"learning_rate": 8.002297308879359e-08,
"loss": 1.1703,
"step": 1267
},
{
"epoch": 0.9628856193640247,
"grad_norm": 0.349609375,
"learning_rate": 7.679420262954984e-08,
"loss": 1.1569,
"step": 1268
},
{
"epoch": 0.9636449928808732,
"grad_norm": 0.3125,
"learning_rate": 7.363166668403643e-08,
"loss": 1.1488,
"step": 1269
},
{
"epoch": 0.9644043663977219,
"grad_norm": 0.37890625,
"learning_rate": 7.053538636126123e-08,
"loss": 1.1948,
"step": 1270
},
{
"epoch": 0.9651637399145705,
"grad_norm": 0.341796875,
"learning_rate": 6.750538232799586e-08,
"loss": 1.1496,
"step": 1271
},
{
"epoch": 0.9659231134314191,
"grad_norm": 0.330078125,
"learning_rate": 6.454167480863694e-08,
"loss": 1.1463,
"step": 1272
},
{
"epoch": 0.9666824869482676,
"grad_norm": 0.302734375,
"learning_rate": 6.164428358506947e-08,
"loss": 1.1507,
"step": 1273
},
{
"epoch": 0.9674418604651163,
"grad_norm": 0.369140625,
"learning_rate": 5.881322799653699e-08,
"loss": 1.1549,
"step": 1274
},
{
"epoch": 0.9682012339819649,
"grad_norm": 0.3203125,
"learning_rate": 5.6048526939512794e-08,
"loss": 1.1406,
"step": 1275
},
{
"epoch": 0.9689606074988135,
"grad_norm": 0.30859375,
"learning_rate": 5.3350198867574424e-08,
"loss": 1.1267,
"step": 1276
},
{
"epoch": 0.969719981015662,
"grad_norm": 0.31640625,
"learning_rate": 5.0718261791274924e-08,
"loss": 1.147,
"step": 1277
},
{
"epoch": 0.9704793545325107,
"grad_norm": 0.30078125,
"learning_rate": 4.815273327803183e-08,
"loss": 1.1504,
"step": 1278
},
{
"epoch": 0.9712387280493593,
"grad_norm": 0.29296875,
"learning_rate": 4.5653630451998335e-08,
"loss": 1.1471,
"step": 1279
},
{
"epoch": 0.9719981015662079,
"grad_norm": 0.3203125,
"learning_rate": 4.32209699939623e-08,
"loss": 1.1204,
"step": 1280
},
{
"epoch": 0.9727574750830564,
"grad_norm": 0.353515625,
"learning_rate": 4.085476814122413e-08,
"loss": 1.1692,
"step": 1281
},
{
"epoch": 0.9735168485999051,
"grad_norm": 0.310546875,
"learning_rate": 3.8555040687493494e-08,
"loss": 1.1089,
"step": 1282
},
{
"epoch": 0.9742762221167537,
"grad_norm": 0.279296875,
"learning_rate": 3.632180298278165e-08,
"loss": 1.0833,
"step": 1283
},
{
"epoch": 0.9750355956336023,
"grad_norm": 0.322265625,
"learning_rate": 3.4155069933301535e-08,
"loss": 1.1362,
"step": 1284
},
{
"epoch": 0.9757949691504508,
"grad_norm": 0.365234375,
"learning_rate": 3.2054856001366706e-08,
"loss": 1.2,
"step": 1285
},
{
"epoch": 0.9765543426672995,
"grad_norm": 0.439453125,
"learning_rate": 3.0021175205294794e-08,
"loss": 1.2642,
"step": 1286
},
{
"epoch": 0.9773137161841481,
"grad_norm": 0.365234375,
"learning_rate": 2.805404111931198e-08,
"loss": 1.1712,
"step": 1287
},
{
"epoch": 0.9780730897009967,
"grad_norm": 0.373046875,
"learning_rate": 2.6153466873468646e-08,
"loss": 1.1773,
"step": 1288
},
{
"epoch": 0.9788324632178452,
"grad_norm": 0.314453125,
"learning_rate": 2.4319465153543886e-08,
"loss": 1.1556,
"step": 1289
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.326171875,
"learning_rate": 2.255204820096668e-08,
"loss": 1.1467,
"step": 1290
},
{
"epoch": 0.9803512102515425,
"grad_norm": 0.34375,
"learning_rate": 2.0851227812731523e-08,
"loss": 1.1793,
"step": 1291
},
{
"epoch": 0.9811105837683911,
"grad_norm": 0.326171875,
"learning_rate": 1.9217015341318478e-08,
"loss": 1.1366,
"step": 1292
},
{
"epoch": 0.9818699572852396,
"grad_norm": 0.33984375,
"learning_rate": 1.764942169462325e-08,
"loss": 1.1893,
"step": 1293
},
{
"epoch": 0.9826293308020883,
"grad_norm": 0.291015625,
"learning_rate": 1.6148457335876112e-08,
"loss": 1.1308,
"step": 1294
},
{
"epoch": 0.9833887043189369,
"grad_norm": 0.4453125,
"learning_rate": 1.4714132283577543e-08,
"loss": 1.2597,
"step": 1295
},
{
"epoch": 0.9841480778357855,
"grad_norm": 0.294921875,
"learning_rate": 1.3346456111430484e-08,
"loss": 1.1048,
"step": 1296
},
{
"epoch": 0.984907451352634,
"grad_norm": 0.30078125,
"learning_rate": 1.2045437948275952e-08,
"loss": 1.1165,
"step": 1297
},
{
"epoch": 0.9856668248694826,
"grad_norm": 0.365234375,
"learning_rate": 1.0811086478031973e-08,
"loss": 1.1419,
"step": 1298
},
{
"epoch": 0.9864261983863313,
"grad_norm": 0.330078125,
"learning_rate": 9.643409939636972e-09,
"loss": 1.1656,
"step": 1299
},
{
"epoch": 0.9871855719031799,
"grad_norm": 0.318359375,
"learning_rate": 8.542416126989805e-09,
"loss": 1.1344,
"step": 1300
},
{
"epoch": 0.9879449454200285,
"grad_norm": 0.34375,
"learning_rate": 7.508112388905363e-09,
"loss": 1.1509,
"step": 1301
},
{
"epoch": 0.988704318936877,
"grad_norm": 0.365234375,
"learning_rate": 6.540505629061278e-09,
"loss": 1.1836,
"step": 1302
},
{
"epoch": 0.9894636924537257,
"grad_norm": 0.3359375,
"learning_rate": 5.639602305950176e-09,
"loss": 1.1659,
"step": 1303
},
{
"epoch": 0.9902230659705743,
"grad_norm": 0.32421875,
"learning_rate": 4.80540843283972e-09,
"loss": 1.1539,
"step": 1304
},
{
"epoch": 0.9909824394874229,
"grad_norm": 0.298828125,
"learning_rate": 4.037929577732636e-09,
"loss": 1.1051,
"step": 1305
},
{
"epoch": 0.9917418130042714,
"grad_norm": 0.3203125,
"learning_rate": 3.3371708633267443e-09,
"loss": 1.153,
"step": 1306
},
{
"epoch": 0.9925011865211201,
"grad_norm": 0.3828125,
"learning_rate": 2.7031369669816566e-09,
"loss": 1.1997,
"step": 1307
},
{
"epoch": 0.9932605600379687,
"grad_norm": 0.283203125,
"learning_rate": 2.1358321206899067e-09,
"loss": 1.1305,
"step": 1308
},
{
"epoch": 0.9940199335548173,
"grad_norm": 0.265625,
"learning_rate": 1.6352601110469768e-09,
"loss": 1.0931,
"step": 1309
},
{
"epoch": 0.9947793070716658,
"grad_norm": 0.333984375,
"learning_rate": 1.20142427922465e-09,
"loss": 1.1754,
"step": 1310
},
{
"epoch": 0.9955386805885145,
"grad_norm": 0.39453125,
"learning_rate": 8.343275209521384e-10,
"loss": 1.2122,
"step": 1311
},
{
"epoch": 0.9962980541053631,
"grad_norm": 0.345703125,
"learning_rate": 5.339722864927677e-10,
"loss": 1.1428,
"step": 1312
},
{
"epoch": 0.9970574276222117,
"grad_norm": 0.326171875,
"learning_rate": 3.003605806306542e-10,
"loss": 1.1282,
"step": 1313
},
{
"epoch": 0.9978168011390602,
"grad_norm": 0.322265625,
"learning_rate": 1.3349396265516235e-10,
"loss": 1.1608,
"step": 1314
},
{
"epoch": 0.9985761746559089,
"grad_norm": 0.3203125,
"learning_rate": 3.3373546353132614e-11,
"loss": 1.1562,
"step": 1315
},
{
"epoch": 0.9993355481727575,
"grad_norm": 0.28515625,
"learning_rate": 0.0,
"loss": 1.1401,
"step": 1316
},
{
"epoch": 0.9993355481727575,
"eval_loss": 1.151589274406433,
"eval_runtime": 640.5297,
"eval_samples_per_second": 92.364,
"eval_steps_per_second": 7.698,
"step": 1316
}
],
"logging_steps": 1,
"max_steps": 1316,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.953779885289767e+18,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}