{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.7915611293012912, "eval_steps": 500, "global_step": 14500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.5140399932861328, "learning_rate": 3.722126480018763e-05, "loss": 1.6355, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.0, "grad_norm": 1.3762837648391724, "learning_rate": 4.8425981981596005e-05, "loss": 1.3277, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.0, "grad_norm": 1.4097976684570312, "learning_rate": 5.4980321363905964e-05, "loss": 1.2103, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.0, "grad_norm": 0.8763718605041504, "learning_rate": 5.963069916300439e-05, "loss": 1.1981, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 0.7860246300697327, "learning_rate": 6.323781241896688e-05, "loss": 1.1533, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 0.8660722970962524, "learning_rate": 6.618503854531432e-05, "loss": 1.1533, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 0.9666918516159058, "learning_rate": 6.867688272967785e-05, "loss": 1.1299, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.17, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 0.9307446479797363, "learning_rate": 7.083541634441276e-05, "loss": 1.1127, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 1.1029677391052246, "learning_rate": 7.273937792762429e-05, "loss": 1.0781, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 1.121899127960205, "learning_rate": 7.444252960037526e-05, "loss": 1.1192, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 0.9134292602539062, "learning_rate": 7.598321769544036e-05, "loss": 1.1036, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.01, "grad_norm": 1.0284743309020996, "learning_rate": 7.738975572672271e-05, "loss": 1.0748, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 0.9237496852874756, "learning_rate": 7.86836452888091e-05, "loss": 1.083, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 1.0437508821487427, "learning_rate": 7.988159991108621e-05, "loss": 1.041, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 1.0131335258483887, "learning_rate": 8.09968689826852e-05, "loss": 1.0847, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 0.9940443634986877, "learning_rate": 8.204013352582114e-05, "loss": 1.0519, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 0.9474312663078308, "learning_rate": 8.302012992591362e-05, "loss": 1.0354, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 1.0124878883361816, "learning_rate": 8.394409510903266e-05, "loss": 1.0626, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 0.8570514917373657, "learning_rate": 8.481809119544634e-05, "loss": 1.0267, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 0.8720101118087769, "learning_rate": 8.564724678178364e-05, "loss": 1.0212, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.8913308382034302, "learning_rate": 8.643593929339616e-05, "loss": 1.0065, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.7943713068962097, "learning_rate": 8.718793487684874e-05, "loss": 1.0079, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.9145928025245667, "learning_rate": 8.790649717038493e-05, "loss": 1.0043, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 1.0095473527908325, "learning_rate": 8.859447290813108e-05, "loss": 1.0172, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.8466660380363464, "learning_rate": 8.925436003774613e-05, "loss": 1.0382, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.8690722584724426, "learning_rate": 8.988836247021747e-05, "loss": 0.9887, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.901485025882721, "learning_rate": 9.04984344913426e-05, "loss": 1.0159, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.7459328174591064, "learning_rate": 9.108631709249458e-05, "loss": 0.9864, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.8639645576477051, "learning_rate": 9.16535679232489e-05, "loss": 0.9763, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.9529710412025452, "learning_rate": 9.220158616409358e-05, "loss": 0.9864, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.697185754776001, "learning_rate": 9.273163331924945e-05, "loss": 0.9927, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.7157967686653137, "learning_rate": 9.324485070722952e-05, "loss": 1.017, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.848825991153717, "learning_rate": 9.374227425915866e-05, "loss": 1.0055, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.8817904591560364, "learning_rate": 9.4224847107322e-05, "loss": 0.9788, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.7796025276184082, "learning_rate": 9.469343034845709e-05, "loss": 0.999, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 1.0431816577911377, "learning_rate": 9.514881229044104e-05, "loss": 0.969, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.8022053241729736, "learning_rate": 9.559171643179602e-05, "loss": 1.0107, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.7660630345344543, "learning_rate": 9.602280837685472e-05, "loss": 0.9832, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.7798706293106079, "learning_rate": 9.644270185252743e-05, "loss": 0.9871, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.6778541207313538, "learning_rate": 9.685196396319201e-05, "loss": 0.9831, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.7524168491363525, "learning_rate": 9.725111979662076e-05, "loss": 0.9801, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.7357635498046875, "learning_rate": 9.764065647480455e-05, "loss": 0.9778, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.7956339716911316, "learning_rate": 9.802102672806894e-05, "loss": 0.981, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.6962536573410034, "learning_rate": 9.83926520582571e-05, "loss": 0.9459, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.7492054104804993, "learning_rate": 9.875592554640354e-05, "loss": 0.9785, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.6673711538314819, "learning_rate": 9.911121435179331e-05, "loss": 0.985, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.6954076290130615, "learning_rate": 9.945886194223949e-05, "loss": 0.975, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.7642614841461182, "learning_rate": 9.979919008953946e-05, "loss": 0.9806, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.7033060193061829, "learning_rate": 9.998089171974523e-05, "loss": 0.9694, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.6798336505889893, "learning_rate": 9.99171974522293e-05, "loss": 0.9697, "max_memory_allocated (GB)": 83.34, "memory_allocated (GB)": 14.97, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.7565618753433228, "learning_rate": 9.985350318471339e-05, "loss": 0.9813, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.738100528717041, "learning_rate": 9.978980891719746e-05, "loss": 0.9562, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.6972020864486694, "learning_rate": 9.972611464968154e-05, "loss": 0.9444, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.7740529179573059, "learning_rate": 9.966242038216561e-05, "loss": 0.9693, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.713945209980011, "learning_rate": 9.95987261146497e-05, "loss": 0.9554, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.6673797965049744, "learning_rate": 9.953503184713376e-05, "loss": 0.9745, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.8715893626213074, "learning_rate": 9.947133757961783e-05, "loss": 0.9689, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.7116804718971252, "learning_rate": 9.940764331210192e-05, "loss": 0.9527, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.6342630386352539, "learning_rate": 9.934394904458599e-05, "loss": 0.9637, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.6425337791442871, "learning_rate": 9.928025477707007e-05, "loss": 0.9807, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.72014981508255, "learning_rate": 9.921656050955414e-05, "loss": 0.9873, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.660266637802124, "learning_rate": 9.915286624203823e-05, "loss": 0.9898, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.6311970949172974, "learning_rate": 9.90891719745223e-05, "loss": 0.9508, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.92966628074646, "learning_rate": 9.902547770700638e-05, "loss": 0.9427, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.6435150504112244, "learning_rate": 9.896178343949045e-05, "loss": 0.9434, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.6682836413383484, "learning_rate": 9.889808917197453e-05, "loss": 0.9619, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 1.0358749628067017, "learning_rate": 9.88343949044586e-05, "loss": 0.9774, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.6212781667709351, "learning_rate": 9.877070063694269e-05, "loss": 0.9387, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.5973750948905945, "learning_rate": 9.870700636942676e-05, "loss": 0.9479, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.6477555632591248, "learning_rate": 9.864331210191083e-05, "loss": 0.972, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.6448289752006531, "learning_rate": 9.857961783439491e-05, "loss": 0.9639, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.6806017160415649, "learning_rate": 9.851592356687898e-05, "loss": 0.9501, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.6851417422294617, "learning_rate": 9.845222929936306e-05, "loss": 0.9411, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.6251152753829956, "learning_rate": 9.838853503184714e-05, "loss": 0.9818, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.6990805864334106, "learning_rate": 9.832484076433122e-05, "loss": 0.961, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.7680927515029907, "learning_rate": 9.826114649681529e-05, "loss": 0.9617, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.6304144263267517, "learning_rate": 9.819745222929937e-05, "loss": 0.9731, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.6871405839920044, "learning_rate": 9.813375796178344e-05, "loss": 0.9498, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.644216775894165, "learning_rate": 9.807006369426753e-05, "loss": 0.9698, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.6013162732124329, "learning_rate": 9.80063694267516e-05, "loss": 0.9843, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.8040651082992554, "learning_rate": 9.794267515923568e-05, "loss": 0.9436, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.7139380574226379, "learning_rate": 9.787898089171975e-05, "loss": 0.9364, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.6355264782905579, "learning_rate": 9.781528662420383e-05, "loss": 0.9567, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.7494617700576782, "learning_rate": 9.77515923566879e-05, "loss": 0.9189, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.6250610947608948, "learning_rate": 9.768789808917197e-05, "loss": 0.9525, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.7264392971992493, "learning_rate": 9.762420382165606e-05, "loss": 0.9183, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.626250684261322, "learning_rate": 9.756050955414013e-05, "loss": 0.938, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.5986000299453735, "learning_rate": 9.749681528662421e-05, "loss": 0.9471, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.6429228782653809, "learning_rate": 9.743312101910828e-05, "loss": 0.9328, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.5998768210411072, "learning_rate": 9.736942675159237e-05, "loss": 0.9378, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.7069288492202759, "learning_rate": 9.730573248407644e-05, "loss": 0.9636, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.6847216486930847, "learning_rate": 9.724203821656052e-05, "loss": 0.954, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.5854921936988831, "learning_rate": 9.717834394904459e-05, "loss": 0.9331, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.693719208240509, "learning_rate": 9.711464968152867e-05, "loss": 0.9195, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.6119904518127441, "learning_rate": 9.705095541401274e-05, "loss": 0.9364, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.6169638633728027, "learning_rate": 9.698726114649683e-05, "loss": 0.9508, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.6071642637252808, "learning_rate": 9.69235668789809e-05, "loss": 0.9629, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.5701698660850525, "learning_rate": 9.685987261146498e-05, "loss": 0.9256, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.6308379173278809, "learning_rate": 9.679617834394905e-05, "loss": 0.949, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.5775448083877563, "learning_rate": 9.673248407643312e-05, "loss": 0.9397, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.6638918519020081, "learning_rate": 9.66687898089172e-05, "loss": 0.9468, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6384360194206238, "learning_rate": 9.660509554140128e-05, "loss": 0.9562, "max_memory_allocated (GB)": 94.36, "memory_allocated (GB)": 14.97, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6402448415756226, "learning_rate": 9.654140127388536e-05, "loss": 0.9366, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6450813412666321, "learning_rate": 9.647770700636943e-05, "loss": 0.9163, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6314339637756348, "learning_rate": 9.641401273885351e-05, "loss": 0.9425, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.5695529580116272, "learning_rate": 9.635031847133758e-05, "loss": 0.938, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6315151453018188, "learning_rate": 9.628662420382167e-05, "loss": 0.9495, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6656625866889954, "learning_rate": 9.622292993630574e-05, "loss": 0.9412, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.6286743879318237, "learning_rate": 9.615923566878982e-05, "loss": 0.9577, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.596004843711853, "learning_rate": 9.609554140127389e-05, "loss": 0.9758, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.5832968354225159, "learning_rate": 9.603184713375797e-05, "loss": 0.9375, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.5997422933578491, "learning_rate": 9.596815286624204e-05, "loss": 0.905, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.6267014145851135, "learning_rate": 9.590445859872613e-05, "loss": 0.9496, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.6150197386741638, "learning_rate": 9.58407643312102e-05, "loss": 0.9399, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.5876752734184265, "learning_rate": 9.577707006369427e-05, "loss": 0.9193, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.5847297310829163, "learning_rate": 9.571337579617835e-05, "loss": 0.9299, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.6613140106201172, "learning_rate": 9.564968152866242e-05, "loss": 0.9367, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.625577986240387, "learning_rate": 9.55859872611465e-05, "loss": 0.9678, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.6265081167221069, "learning_rate": 9.552229299363058e-05, "loss": 0.9267, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.6331568956375122, "learning_rate": 9.545859872611466e-05, "loss": 0.9222, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.5775994658470154, "learning_rate": 9.539490445859873e-05, "loss": 0.9283, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.6138973236083984, "learning_rate": 9.533121019108281e-05, "loss": 0.9296, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.6381206512451172, "learning_rate": 9.526751592356688e-05, "loss": 0.9388, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.604809045791626, "learning_rate": 9.520382165605097e-05, "loss": 0.9428, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.6210758686065674, "learning_rate": 9.514012738853504e-05, "loss": 0.9438, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.6653516292572021, "learning_rate": 9.507643312101912e-05, "loss": 0.9396, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.5696619153022766, "learning_rate": 9.501273885350319e-05, "loss": 0.946, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.6504731178283691, "learning_rate": 9.494904458598726e-05, "loss": 0.9603, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.6198241710662842, "learning_rate": 9.488535031847135e-05, "loss": 0.9091, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.6397411823272705, "learning_rate": 9.482165605095542e-05, "loss": 0.933, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.6273577809333801, "learning_rate": 9.47579617834395e-05, "loss": 0.9171, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.677662193775177, "learning_rate": 9.469426751592357e-05, "loss": 0.9361, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.6163740754127502, "learning_rate": 9.463057324840765e-05, "loss": 0.9396, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.6019936203956604, "learning_rate": 9.456687898089172e-05, "loss": 0.9417, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.6324911117553711, "learning_rate": 9.45031847133758e-05, "loss": 0.9254, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.5347614288330078, "learning_rate": 9.443949044585988e-05, "loss": 0.9259, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.5964996814727783, "learning_rate": 9.437579617834396e-05, "loss": 0.9164, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.6841687560081482, "learning_rate": 9.431210191082803e-05, "loss": 0.9043, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.6945049166679382, "learning_rate": 9.424840764331211e-05, "loss": 0.9339, "max_memory_allocated (GB)": 94.43, "memory_allocated (GB)": 14.97, "step": 1390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.6683288812637329, "learning_rate": 9.418471337579618e-05, "loss": 0.929, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.5797848701477051, "learning_rate": 9.412101910828027e-05, "loss": 0.9099, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6015276312828064, "learning_rate": 9.405732484076434e-05, "loss": 0.9411, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6140068769454956, "learning_rate": 9.399363057324841e-05, "loss": 0.9213, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6447374820709229, "learning_rate": 9.392993630573249e-05, "loss": 0.8924, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6505595445632935, "learning_rate": 9.386624203821656e-05, "loss": 0.9281, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6256375312805176, "learning_rate": 9.380254777070065e-05, "loss": 0.9577, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6421294808387756, "learning_rate": 9.373885350318472e-05, "loss": 0.9125, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.7359905242919922, "learning_rate": 9.36751592356688e-05, "loss": 0.909, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.6073474884033203, "learning_rate": 9.361146496815287e-05, "loss": 0.9433, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.5646604299545288, "learning_rate": 9.354777070063695e-05, "loss": 0.9174, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.7164282202720642, "learning_rate": 9.348407643312102e-05, "loss": 0.934, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.6293795108795166, "learning_rate": 9.342038216560511e-05, "loss": 0.9134, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.6097316145896912, "learning_rate": 9.335668789808918e-05, "loss": 0.913, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.6106476783752441, "learning_rate": 9.329299363057326e-05, "loss": 0.9029, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.6438948512077332, "learning_rate": 9.322929936305733e-05, "loss": 0.9444, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.633876621723175, "learning_rate": 9.316560509554141e-05, "loss": 0.9239, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.6850993633270264, "learning_rate": 9.310191082802549e-05, "loss": 0.9173, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.5955223441123962, "learning_rate": 9.303821656050956e-05, "loss": 0.9402, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.6091153025627136, "learning_rate": 9.297452229299364e-05, "loss": 0.9753, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.5890401005744934, "learning_rate": 9.291082802547771e-05, "loss": 0.9372, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.6389315724372864, "learning_rate": 9.284713375796179e-05, "loss": 0.8853, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.5620580315589905, "learning_rate": 9.278343949044586e-05, "loss": 0.942, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.5825024843215942, "learning_rate": 9.271974522292995e-05, "loss": 0.9107, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.544722318649292, "learning_rate": 9.265605095541402e-05, "loss": 0.907, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.6667870879173279, "learning_rate": 9.25923566878981e-05, "loss": 0.9207, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.599666953086853, "learning_rate": 9.252866242038217e-05, "loss": 0.9117, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.6073837280273438, "learning_rate": 9.246496815286625e-05, "loss": 0.93, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.6553378701210022, "learning_rate": 9.240127388535032e-05, "loss": 0.9076, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.6511078476905823, "learning_rate": 9.233757961783441e-05, "loss": 0.9015, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.5960680246353149, "learning_rate": 9.227388535031848e-05, "loss": 0.915, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.6395820379257202, "learning_rate": 9.221019108280255e-05, "loss": 0.9336, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.6158560514450073, "learning_rate": 9.214649681528663e-05, "loss": 0.9315, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.5883167386054993, "learning_rate": 9.20828025477707e-05, "loss": 0.9243, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.5660105347633362, "learning_rate": 9.201910828025479e-05, "loss": 0.9141, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.5553457736968994, "learning_rate": 9.195541401273886e-05, "loss": 0.9309, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.6037126183509827, "learning_rate": 9.189171974522294e-05, "loss": 0.9239, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.6285903453826904, "learning_rate": 9.182802547770701e-05, "loss": 0.9223, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.5817345976829529, "learning_rate": 9.17643312101911e-05, "loss": 0.9434, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.6466053128242493, "learning_rate": 9.170063694267516e-05, "loss": 0.928, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.6852598190307617, "learning_rate": 9.163694267515925e-05, "loss": 0.9415, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.5549845099449158, "learning_rate": 9.157324840764332e-05, "loss": 0.902, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.5786306262016296, "learning_rate": 9.15095541401274e-05, "loss": 0.9417, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.6237860321998596, "learning_rate": 9.144585987261147e-05, "loss": 0.9348, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.6448583006858826, "learning_rate": 9.138216560509555e-05, "loss": 0.972, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.5415520668029785, "learning_rate": 9.131847133757963e-05, "loss": 0.9277, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.5853114128112793, "learning_rate": 9.12547770700637e-05, "loss": 0.9421, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.5774340033531189, "learning_rate": 9.119108280254778e-05, "loss": 0.9241, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.6107351779937744, "learning_rate": 9.112738853503185e-05, "loss": 0.9133, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.6424444317817688, "learning_rate": 9.106369426751593e-05, "loss": 0.9546, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.5167615413665771, "learning_rate": 9.1e-05, "loss": 0.9397, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.6501650214195251, "learning_rate": 9.093630573248409e-05, "loss": 0.9138, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.6346274614334106, "learning_rate": 9.087261146496816e-05, "loss": 0.9447, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.6025852560997009, "learning_rate": 9.080891719745224e-05, "loss": 0.8942, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.5486747026443481, "learning_rate": 9.074522292993631e-05, "loss": 0.9147, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.6243796348571777, "learning_rate": 9.06815286624204e-05, "loss": 0.9298, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.6881577372550964, "learning_rate": 9.061783439490446e-05, "loss": 0.9086, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.6005951166152954, "learning_rate": 9.055414012738855e-05, "loss": 0.9158, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.5638641119003296, "learning_rate": 9.049044585987262e-05, "loss": 0.9297, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.505285918712616, "learning_rate": 9.04267515923567e-05, "loss": 0.9313, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 1990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.6503742337226868, "learning_rate": 9.036305732484077e-05, "loss": 0.8957, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.5132340788841248, "learning_rate": 9.029936305732484e-05, "loss": 0.9292, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.6249287724494934, "learning_rate": 9.023566878980893e-05, "loss": 0.9238, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.7638124823570251, "learning_rate": 9.0171974522293e-05, "loss": 0.9347, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.5246148705482483, "learning_rate": 9.010828025477708e-05, "loss": 0.9245, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.7826683521270752, "learning_rate": 9.004458598726115e-05, "loss": 0.9292, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.5587345361709595, "learning_rate": 8.998089171974523e-05, "loss": 0.928, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5445036292076111, "learning_rate": 8.99171974522293e-05, "loss": 0.8869, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5840720534324646, "learning_rate": 8.985350318471339e-05, "loss": 0.9201, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.649189829826355, "learning_rate": 8.978980891719746e-05, "loss": 0.9178, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.6557360291481018, "learning_rate": 8.972611464968154e-05, "loss": 0.9426, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.6620033383369446, "learning_rate": 8.966242038216561e-05, "loss": 0.8703, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.6042346358299255, "learning_rate": 8.95987261146497e-05, "loss": 0.9214, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5340855121612549, "learning_rate": 8.953503184713377e-05, "loss": 0.9448, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5642148852348328, "learning_rate": 8.947133757961785e-05, "loss": 0.9163, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5716170072555542, "learning_rate": 8.940764331210192e-05, "loss": 0.9159, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5505129098892212, "learning_rate": 8.934394904458599e-05, "loss": 0.9149, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.6169817447662354, "learning_rate": 8.928025477707007e-05, "loss": 0.9258, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5575990676879883, "learning_rate": 8.921656050955414e-05, "loss": 0.916, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5870167016983032, "learning_rate": 8.915286624203823e-05, "loss": 0.8911, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5575961470603943, "learning_rate": 8.90891719745223e-05, "loss": 0.9301, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.6070531010627747, "learning_rate": 8.902547770700638e-05, "loss": 0.9031, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5783848762512207, "learning_rate": 8.896178343949045e-05, "loss": 0.8998, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.6437045931816101, "learning_rate": 8.889808917197453e-05, "loss": 0.8992, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.608791708946228, "learning_rate": 8.88343949044586e-05, "loss": 0.9362, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.6209393739700317, "learning_rate": 8.877070063694269e-05, "loss": 0.92, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.5823927521705627, "learning_rate": 8.870700636942676e-05, "loss": 0.9126, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.6524648666381836, "learning_rate": 8.864331210191084e-05, "loss": 0.9168, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.5724565982818604, "learning_rate": 8.857961783439491e-05, "loss": 0.9302, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.6382232904434204, "learning_rate": 8.851592356687898e-05, "loss": 0.9029, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.6507483720779419, "learning_rate": 8.845222929936307e-05, "loss": 0.9335, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.6949073076248169, "learning_rate": 8.838853503184714e-05, "loss": 0.9127, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.5478443503379822, "learning_rate": 8.832484076433122e-05, "loss": 0.9072, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.5452962517738342, "learning_rate": 8.826114649681529e-05, "loss": 0.8968, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.5772174000740051, "learning_rate": 8.819745222929937e-05, "loss": 0.9237, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.5888592004776001, "learning_rate": 8.813375796178344e-05, "loss": 0.9021, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.5698549747467041, "learning_rate": 8.807006369426753e-05, "loss": 0.9173, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.5790499448776245, "learning_rate": 8.80063694267516e-05, "loss": 0.9431, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.6478803157806396, "learning_rate": 8.794267515923568e-05, "loss": 0.9167, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.6243005394935608, "learning_rate": 8.787898089171975e-05, "loss": 0.9052, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.6000833511352539, "learning_rate": 8.781528662420383e-05, "loss": 0.8992, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.5554850101470947, "learning_rate": 8.77515923566879e-05, "loss": 0.9277, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.6706589460372925, "learning_rate": 8.768789808917199e-05, "loss": 0.9232, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.6054849624633789, "learning_rate": 8.762420382165606e-05, "loss": 0.8939, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.5416914224624634, "learning_rate": 8.756050955414013e-05, "loss": 0.8967, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.6584843993186951, "learning_rate": 8.749681528662421e-05, "loss": 0.9156, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.6535277366638184, "learning_rate": 8.743312101910828e-05, "loss": 0.9269, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.6150842905044556, "learning_rate": 8.736942675159237e-05, "loss": 0.9336, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.5612450838088989, "learning_rate": 8.730573248407644e-05, "loss": 0.897, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.5459498167037964, "learning_rate": 8.724203821656052e-05, "loss": 0.9181, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.5725070238113403, "learning_rate": 8.717834394904459e-05, "loss": 0.9039, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.5893815755844116, "learning_rate": 8.711464968152867e-05, "loss": 0.8934, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.6075538992881775, "learning_rate": 8.705095541401274e-05, "loss": 0.9199, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.7179542183876038, "learning_rate": 8.698726114649683e-05, "loss": 0.8964, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.6916159987449646, "learning_rate": 8.69235668789809e-05, "loss": 0.9228, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.5973563194274902, "learning_rate": 8.685987261146498e-05, "loss": 0.9109, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.5797328352928162, "learning_rate": 8.679617834394905e-05, "loss": 0.9006, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.6468456983566284, "learning_rate": 8.673248407643314e-05, "loss": 0.9306, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.5766948461532593, "learning_rate": 8.66687898089172e-05, "loss": 0.8813, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.5691495537757874, "learning_rate": 8.660509554140128e-05, "loss": 0.9204, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.6611936092376709, "learning_rate": 8.654140127388536e-05, "loss": 0.935, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.5880815386772156, "learning_rate": 8.647770700636943e-05, "loss": 0.9257, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.581061840057373, "learning_rate": 8.641401273885351e-05, "loss": 0.8615, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.5449854731559753, "learning_rate": 8.635031847133758e-05, "loss": 0.8968, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.5454363822937012, "learning_rate": 8.628662420382167e-05, "loss": 0.9069, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.567973792552948, "learning_rate": 8.622292993630574e-05, "loss": 0.9045, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.5044744610786438, "learning_rate": 8.615923566878982e-05, "loss": 0.9307, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.5225447416305542, "learning_rate": 8.609554140127389e-05, "loss": 0.9395, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.6234666109085083, "learning_rate": 8.603184713375798e-05, "loss": 0.9077, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.5319868922233582, "learning_rate": 8.596815286624205e-05, "loss": 0.9213, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.5291190147399902, "learning_rate": 8.590445859872613e-05, "loss": 0.909, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.6539480686187744, "learning_rate": 8.58407643312102e-05, "loss": 0.8908, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.6586701273918152, "learning_rate": 8.577707006369428e-05, "loss": 0.9112, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.5719462633132935, "learning_rate": 8.571337579617835e-05, "loss": 0.8926, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.6857839822769165, "learning_rate": 8.564968152866242e-05, "loss": 0.9244, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.5097509026527405, "learning_rate": 8.55859872611465e-05, "loss": 0.9195, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.626815676689148, "learning_rate": 8.552229299363058e-05, "loss": 0.9034, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.5483399033546448, "learning_rate": 8.545859872611466e-05, "loss": 0.9295, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.5154463648796082, "learning_rate": 8.539490445859873e-05, "loss": 0.936, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.5392872095108032, "learning_rate": 8.533121019108281e-05, "loss": 0.914, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5366926193237305, "learning_rate": 8.526751592356688e-05, "loss": 0.9067, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5744665861129761, "learning_rate": 8.520382165605097e-05, "loss": 0.9105, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5835170745849609, "learning_rate": 8.514012738853504e-05, "loss": 0.917, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.6016770601272583, "learning_rate": 8.507643312101912e-05, "loss": 0.9023, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5826542973518372, "learning_rate": 8.501273885350319e-05, "loss": 0.8775, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5647284388542175, "learning_rate": 8.494904458598728e-05, "loss": 0.8926, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5165654420852661, "learning_rate": 8.488535031847135e-05, "loss": 0.9084, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.5151524543762207, "learning_rate": 8.482165605095542e-05, "loss": 0.9008, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.581031322479248, "learning_rate": 8.47579617834395e-05, "loss": 0.9088, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.5811273455619812, "learning_rate": 8.469426751592357e-05, "loss": 0.8841, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.5163042545318604, "learning_rate": 8.463057324840765e-05, "loss": 0.8814, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.5550446510314941, "learning_rate": 8.456687898089172e-05, "loss": 0.8873, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.6043127775192261, "learning_rate": 8.450318471337581e-05, "loss": 0.9096, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.5609890818595886, "learning_rate": 8.443949044585988e-05, "loss": 0.9039, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.578622579574585, "learning_rate": 8.437579617834396e-05, "loss": 0.9134, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.5179211497306824, "learning_rate": 8.431210191082803e-05, "loss": 0.8848, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.5544730424880981, "learning_rate": 8.424840764331212e-05, "loss": 0.881, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.5601953864097595, "learning_rate": 8.418471337579619e-05, "loss": 0.9211, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.5997374653816223, "learning_rate": 8.412101910828027e-05, "loss": 0.9003, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.5613746643066406, "learning_rate": 8.405732484076434e-05, "loss": 0.922, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 2990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.5812062621116638, "learning_rate": 8.399363057324842e-05, "loss": 0.9153, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.6788537502288818, "learning_rate": 8.392993630573249e-05, "loss": 0.898, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.5436647534370422, "learning_rate": 8.386624203821656e-05, "loss": 0.9003, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.7214387655258179, "learning_rate": 8.380254777070065e-05, "loss": 0.912, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.5388379096984863, "learning_rate": 8.373885350318472e-05, "loss": 0.9196, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.5177743434906006, "learning_rate": 8.36751592356688e-05, "loss": 0.8901, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.584418535232544, "learning_rate": 8.361146496815287e-05, "loss": 0.8822, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.6588314771652222, "learning_rate": 8.354777070063695e-05, "loss": 0.9292, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.6969806551933289, "learning_rate": 8.348407643312102e-05, "loss": 0.897, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.567183256149292, "learning_rate": 8.342038216560511e-05, "loss": 0.9072, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.5422699451446533, "learning_rate": 8.335668789808918e-05, "loss": 0.9206, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.606003999710083, "learning_rate": 8.329299363057326e-05, "loss": 0.9029, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.5588945150375366, "learning_rate": 8.322929936305733e-05, "loss": 0.8914, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.7723147869110107, "learning_rate": 8.316560509554142e-05, "loss": 0.9052, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.5690342783927917, "learning_rate": 8.310191082802549e-05, "loss": 0.8959, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.4744355082511902, "learning_rate": 8.303821656050957e-05, "loss": 0.9225, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.5398902893066406, "learning_rate": 8.297452229299364e-05, "loss": 0.8994, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.5458796620368958, "learning_rate": 8.291082802547771e-05, "loss": 0.9018, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.6232591867446899, "learning_rate": 8.28471337579618e-05, "loss": 0.9131, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.5887508988380432, "learning_rate": 8.278343949044586e-05, "loss": 0.8985, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5530678629875183, "learning_rate": 8.271974522292995e-05, "loss": 0.8922, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5570492744445801, "learning_rate": 8.265605095541402e-05, "loss": 0.9188, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5902305841445923, "learning_rate": 8.25923566878981e-05, "loss": 0.8917, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5945755243301392, "learning_rate": 8.252866242038217e-05, "loss": 0.8966, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5670562982559204, "learning_rate": 8.246496815286626e-05, "loss": 0.9239, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.542846143245697, "learning_rate": 8.240127388535033e-05, "loss": 0.9198, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5747506618499756, "learning_rate": 8.233757961783441e-05, "loss": 0.8817, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.5135393738746643, "learning_rate": 8.227388535031848e-05, "loss": 0.9082, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5435426831245422, "learning_rate": 8.221019108280256e-05, "loss": 0.9095, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5716527700424194, "learning_rate": 8.214649681528663e-05, "loss": 0.9126, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5149152278900146, "learning_rate": 8.20828025477707e-05, "loss": 0.8981, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.6820017695426941, "learning_rate": 8.201910828025479e-05, "loss": 0.8949, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5794596076011658, "learning_rate": 8.195541401273886e-05, "loss": 0.9184, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5630435943603516, "learning_rate": 8.189171974522294e-05, "loss": 0.9107, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5356283187866211, "learning_rate": 8.182802547770701e-05, "loss": 0.909, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.5827937126159668, "learning_rate": 8.17643312101911e-05, "loss": 0.9021, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.7308997511863708, "learning_rate": 8.170063694267516e-05, "loss": 0.8983, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.5804486274719238, "learning_rate": 8.163694267515925e-05, "loss": 0.8959, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.6027332544326782, "learning_rate": 8.157324840764332e-05, "loss": 0.9061, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.5659801959991455, "learning_rate": 8.15095541401274e-05, "loss": 0.893, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.6081441044807434, "learning_rate": 8.144585987261147e-05, "loss": 0.8899, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.703420102596283, "learning_rate": 8.138216560509556e-05, "loss": 0.9179, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.5405531525611877, "learning_rate": 8.131847133757963e-05, "loss": 0.9216, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.6257489919662476, "learning_rate": 8.125477707006371e-05, "loss": 0.9134, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5303512215614319, "learning_rate": 8.119108280254778e-05, "loss": 0.8945, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.7373655438423157, "learning_rate": 8.112738853503185e-05, "loss": 0.8916, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5460899472236633, "learning_rate": 8.106369426751593e-05, "loss": 0.8931, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5521990060806274, "learning_rate": 8.1e-05, "loss": 0.8566, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5547364950180054, "learning_rate": 8.093630573248409e-05, "loss": 0.897, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5535370111465454, "learning_rate": 8.087261146496816e-05, "loss": 0.8899, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5962079763412476, "learning_rate": 8.080891719745224e-05, "loss": 0.9031, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.5219169855117798, "learning_rate": 8.074522292993631e-05, "loss": 0.9025, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.6031842827796936, "learning_rate": 8.06815286624204e-05, "loss": 0.8659, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.5150040984153748, "learning_rate": 8.061783439490447e-05, "loss": 0.9043, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.5902099013328552, "learning_rate": 8.055414012738855e-05, "loss": 0.9039, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.5089321732521057, "learning_rate": 8.049044585987262e-05, "loss": 0.8955, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.5322747826576233, "learning_rate": 8.04267515923567e-05, "loss": 0.896, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.5516270995140076, "learning_rate": 8.036305732484077e-05, "loss": 0.9095, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.5034855604171753, "learning_rate": 8.029936305732486e-05, "loss": 0.9156, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.6564231514930725, "learning_rate": 8.023566878980893e-05, "loss": 0.8812, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.530149519443512, "learning_rate": 8.0171974522293e-05, "loss": 0.9207, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5480339527130127, "learning_rate": 8.010828025477708e-05, "loss": 0.8734, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5565770864486694, "learning_rate": 8.004458598726115e-05, "loss": 0.8867, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5027381181716919, "learning_rate": 7.998089171974522e-05, "loss": 0.8744, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5091252326965332, "learning_rate": 7.991719745222929e-05, "loss": 0.9363, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5606957077980042, "learning_rate": 7.985350318471337e-05, "loss": 0.9081, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5584906935691833, "learning_rate": 7.978980891719744e-05, "loss": 0.8972, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5510685443878174, "learning_rate": 7.972611464968153e-05, "loss": 0.9095, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.5009828209877014, "learning_rate": 7.96624203821656e-05, "loss": 0.8797, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5698838829994202, "learning_rate": 7.959872611464968e-05, "loss": 0.9026, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5882912874221802, "learning_rate": 7.953503184713375e-05, "loss": 0.9035, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5969718098640442, "learning_rate": 7.947133757961784e-05, "loss": 0.8707, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5464320182800293, "learning_rate": 7.94076433121019e-05, "loss": 0.8946, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5218807458877563, "learning_rate": 7.934394904458599e-05, "loss": 0.893, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5073594450950623, "learning_rate": 7.928025477707006e-05, "loss": 0.9295, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.5315717458724976, "learning_rate": 7.921656050955414e-05, "loss": 0.874, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.640521764755249, "learning_rate": 7.915286624203821e-05, "loss": 0.8927, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.6283209323883057, "learning_rate": 7.90891719745223e-05, "loss": 0.8859, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.5683282017707825, "learning_rate": 7.902547770700637e-05, "loss": 0.922, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.5616042613983154, "learning_rate": 7.896178343949044e-05, "loss": 0.9092, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.582125186920166, "learning_rate": 7.889808917197452e-05, "loss": 0.8862, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.5669243335723877, "learning_rate": 7.883439490445859e-05, "loss": 0.9283, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.6402415633201599, "learning_rate": 7.877070063694268e-05, "loss": 0.9037, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.5741037726402283, "learning_rate": 7.870700636942675e-05, "loss": 0.9033, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.5755082368850708, "learning_rate": 7.864331210191083e-05, "loss": 0.8608, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5075112581253052, "learning_rate": 7.85796178343949e-05, "loss": 0.8774, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5267348289489746, "learning_rate": 7.851592356687898e-05, "loss": 0.8916, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5770332217216492, "learning_rate": 7.845222929936305e-05, "loss": 0.8453, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5677030086517334, "learning_rate": 7.838853503184714e-05, "loss": 0.895, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.577979564666748, "learning_rate": 7.832484076433121e-05, "loss": 0.9107, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5809403657913208, "learning_rate": 7.826114649681529e-05, "loss": 0.8957, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5649895668029785, "learning_rate": 7.819745222929936e-05, "loss": 0.8991, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.5361667275428772, "learning_rate": 7.813375796178344e-05, "loss": 0.8966, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.5365725159645081, "learning_rate": 7.807006369426751e-05, "loss": 0.8991, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.5496806502342224, "learning_rate": 7.800636942675158e-05, "loss": 0.8738, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.5392926931381226, "learning_rate": 7.794267515923567e-05, "loss": 0.8847, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.5666927099227905, "learning_rate": 7.787898089171974e-05, "loss": 0.8732, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.5134681463241577, "learning_rate": 7.781528662420382e-05, "loss": 0.896, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.4999007284641266, "learning_rate": 7.775159235668789e-05, "loss": 0.8835, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.6310585141181946, "learning_rate": 7.768789808917198e-05, "loss": 0.8467, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 3990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.6398172974586487, "learning_rate": 7.762420382165605e-05, "loss": 0.9006, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5285815000534058, "learning_rate": 7.756050955414013e-05, "loss": 0.8968, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5276922583580017, "learning_rate": 7.74968152866242e-05, "loss": 0.8926, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5247613787651062, "learning_rate": 7.743312101910828e-05, "loss": 0.8891, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5284489393234253, "learning_rate": 7.736942675159235e-05, "loss": 0.9014, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.6317757964134216, "learning_rate": 7.730573248407644e-05, "loss": 0.8714, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5911426544189453, "learning_rate": 7.724203821656051e-05, "loss": 0.8982, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5255340337753296, "learning_rate": 7.717834394904459e-05, "loss": 0.8884, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.5683048963546753, "learning_rate": 7.711464968152866e-05, "loss": 0.8908, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.5523961782455444, "learning_rate": 7.705095541401273e-05, "loss": 0.8745, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.5336757302284241, "learning_rate": 7.698726114649682e-05, "loss": 0.8728, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.5282894372940063, "learning_rate": 7.692356687898089e-05, "loss": 0.8826, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.5190914869308472, "learning_rate": 7.685987261146497e-05, "loss": 0.8914, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.5775402188301086, "learning_rate": 7.679617834394904e-05, "loss": 0.9145, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.5857258439064026, "learning_rate": 7.673248407643312e-05, "loss": 0.8854, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.567958652973175, "learning_rate": 7.666878980891719e-05, "loss": 0.8926, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.576958954334259, "learning_rate": 7.660509554140128e-05, "loss": 0.906, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.5861126184463501, "learning_rate": 7.654140127388535e-05, "loss": 0.898, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.5804855227470398, "learning_rate": 7.647770700636943e-05, "loss": 0.8925, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.5505447387695312, "learning_rate": 7.64140127388535e-05, "loss": 0.8918, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.5158912539482117, "learning_rate": 7.635031847133758e-05, "loss": 0.9067, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.6285148859024048, "learning_rate": 7.628662420382165e-05, "loss": 0.8937, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.60187828540802, "learning_rate": 7.622292993630572e-05, "loss": 0.9046, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.5804048776626587, "learning_rate": 7.615923566878981e-05, "loss": 0.8849, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.5304928421974182, "learning_rate": 7.609554140127388e-05, "loss": 0.89, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5056673288345337, "learning_rate": 7.603184713375796e-05, "loss": 0.9253, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5457956194877625, "learning_rate": 7.596815286624203e-05, "loss": 0.8704, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5564907789230347, "learning_rate": 7.590445859872612e-05, "loss": 0.8779, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5742987990379333, "learning_rate": 7.584076433121019e-05, "loss": 0.8948, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5531176328659058, "learning_rate": 7.577707006369427e-05, "loss": 0.875, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5973535180091858, "learning_rate": 7.571337579617834e-05, "loss": 0.8809, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5450004935264587, "learning_rate": 7.564968152866242e-05, "loss": 0.8827, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.6022359728813171, "learning_rate": 7.55859872611465e-05, "loss": 0.8934, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.5830252170562744, "learning_rate": 7.552229299363058e-05, "loss": 0.9135, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.6169658303260803, "learning_rate": 7.545859872611465e-05, "loss": 0.8866, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.5571846961975098, "learning_rate": 7.539490445859873e-05, "loss": 0.8698, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.6459663510322571, "learning_rate": 7.53312101910828e-05, "loss": 0.8953, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.629196047782898, "learning_rate": 7.526751592356687e-05, "loss": 0.8968, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.6803416013717651, "learning_rate": 7.520382165605096e-05, "loss": 0.8984, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.5971508026123047, "learning_rate": 7.514012738853503e-05, "loss": 0.9216, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.5492663979530334, "learning_rate": 7.507643312101911e-05, "loss": 0.8535, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.581967294216156, "learning_rate": 7.501273885350318e-05, "loss": 0.8945, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5171147584915161, "learning_rate": 7.494904458598726e-05, "loss": 0.8509, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5407620668411255, "learning_rate": 7.488535031847133e-05, "loss": 0.879, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5820145606994629, "learning_rate": 7.482165605095542e-05, "loss": 0.9072, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5734186768531799, "learning_rate": 7.475796178343949e-05, "loss": 0.9084, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5551985502243042, "learning_rate": 7.469426751592357e-05, "loss": 0.869, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.6070141196250916, "learning_rate": 7.463057324840764e-05, "loss": 0.8956, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5512224435806274, "learning_rate": 7.456687898089172e-05, "loss": 0.8824, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.5932292342185974, "learning_rate": 7.45031847133758e-05, "loss": 0.8911, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.5633782744407654, "learning_rate": 7.443949044585988e-05, "loss": 0.884, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.5297742486000061, "learning_rate": 7.437579617834395e-05, "loss": 0.8981, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.49679142236709595, "learning_rate": 7.431210191082802e-05, "loss": 0.8798, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.6201618909835815, "learning_rate": 7.42484076433121e-05, "loss": 0.8898, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.5749346017837524, "learning_rate": 7.418471337579617e-05, "loss": 0.8833, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.539679229259491, "learning_rate": 7.412101910828026e-05, "loss": 0.9201, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.5480098128318787, "learning_rate": 7.405732484076433e-05, "loss": 0.8727, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.6261775493621826, "learning_rate": 7.399363057324841e-05, "loss": 0.9153, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.5050274729728699, "learning_rate": 7.392993630573248e-05, "loss": 0.9331, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.6069965958595276, "learning_rate": 7.386624203821656e-05, "loss": 0.8867, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.573829174041748, "learning_rate": 7.380254777070063e-05, "loss": 0.9034, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.5090081095695496, "learning_rate": 7.373885350318472e-05, "loss": 0.8707, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.5630531311035156, "learning_rate": 7.367515923566879e-05, "loss": 0.8904, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.559716522693634, "learning_rate": 7.361146496815287e-05, "loss": 0.8998, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.49673840403556824, "learning_rate": 7.354777070063694e-05, "loss": 0.9104, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.5938435792922974, "learning_rate": 7.348407643312101e-05, "loss": 0.8874, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5486466884613037, "learning_rate": 7.34203821656051e-05, "loss": 0.8756, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5997244119644165, "learning_rate": 7.335668789808917e-05, "loss": 0.8907, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5366829633712769, "learning_rate": 7.329299363057325e-05, "loss": 0.9097, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5445237755775452, "learning_rate": 7.322929936305732e-05, "loss": 0.8628, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5116376876831055, "learning_rate": 7.31656050955414e-05, "loss": 0.86, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.579694390296936, "learning_rate": 7.310191082802547e-05, "loss": 0.9075, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5973986387252808, "learning_rate": 7.303821656050956e-05, "loss": 0.8966, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.5258294343948364, "learning_rate": 7.297452229299363e-05, "loss": 0.8845, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.4957127571105957, "learning_rate": 7.291082802547771e-05, "loss": 0.8919, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.5788044929504395, "learning_rate": 7.284713375796178e-05, "loss": 0.867, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.6266525983810425, "learning_rate": 7.278343949044586e-05, "loss": 0.9157, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.5797058939933777, "learning_rate": 7.271974522292993e-05, "loss": 0.8995, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.5284995436668396, "learning_rate": 7.265605095541402e-05, "loss": 0.8798, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.5411540269851685, "learning_rate": 7.259235668789809e-05, "loss": 0.86, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.7692059874534607, "learning_rate": 7.252866242038216e-05, "loss": 0.8797, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.5444372892379761, "learning_rate": 7.246496815286624e-05, "loss": 0.9047, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.511956512928009, "learning_rate": 7.240127388535031e-05, "loss": 0.8824, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.6188622713088989, "learning_rate": 7.23375796178344e-05, "loss": 0.87, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.5659939050674438, "learning_rate": 7.227388535031847e-05, "loss": 0.8822, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.5688533186912537, "learning_rate": 7.221019108280255e-05, "loss": 0.892, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.627356767654419, "learning_rate": 7.214649681528662e-05, "loss": 0.8911, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.6712976098060608, "learning_rate": 7.20828025477707e-05, "loss": 0.8704, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.5349586606025696, "learning_rate": 7.201910828025477e-05, "loss": 0.8951, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.608233630657196, "learning_rate": 7.195541401273886e-05, "loss": 0.8927, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5030810832977295, "learning_rate": 7.189171974522293e-05, "loss": 0.8674, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5219730734825134, "learning_rate": 7.182802547770701e-05, "loss": 0.893, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5737751722335815, "learning_rate": 7.176433121019108e-05, "loss": 0.9008, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5733808875083923, "learning_rate": 7.170063694267517e-05, "loss": 0.8723, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5448349118232727, "learning_rate": 7.163694267515924e-05, "loss": 0.9006, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.6214298009872437, "learning_rate": 7.15732484076433e-05, "loss": 0.8829, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5896222591400146, "learning_rate": 7.150955414012739e-05, "loss": 0.9047, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.5450072884559631, "learning_rate": 7.144585987261146e-05, "loss": 0.8964, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.5411213636398315, "learning_rate": 7.138216560509554e-05, "loss": 0.8717, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.4791530966758728, "learning_rate": 7.131847133757961e-05, "loss": 0.8823, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 4990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.5541684627532959, "learning_rate": 7.12547770700637e-05, "loss": 0.8912, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.6349590420722961, "learning_rate": 7.119108280254777e-05, "loss": 0.9002, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.5731371641159058, "learning_rate": 7.112738853503185e-05, "loss": 0.8675, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.5998538732528687, "learning_rate": 7.106369426751592e-05, "loss": 0.8972, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.605619490146637, "learning_rate": 7.1e-05, "loss": 0.8887, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.5330061316490173, "learning_rate": 7.093630573248407e-05, "loss": 0.8896, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.5728380084037781, "learning_rate": 7.087261146496816e-05, "loss": 0.8993, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.6582812070846558, "learning_rate": 7.080891719745223e-05, "loss": 0.8896, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.5117426514625549, "learning_rate": 7.074522292993631e-05, "loss": 0.9062, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.5359607338905334, "learning_rate": 7.068152866242038e-05, "loss": 0.8734, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.5526896715164185, "learning_rate": 7.061783439490445e-05, "loss": 0.8754, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.5385130643844604, "learning_rate": 7.055414012738854e-05, "loss": 0.8882, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.5222037434577942, "learning_rate": 7.04904458598726e-05, "loss": 0.8818, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.6482069492340088, "learning_rate": 7.042675159235669e-05, "loss": 0.8702, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.504509687423706, "learning_rate": 7.036305732484076e-05, "loss": 0.8957, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.5588439106941223, "learning_rate": 7.029936305732484e-05, "loss": 0.8873, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.610141396522522, "learning_rate": 7.023566878980891e-05, "loss": 0.897, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.5777451992034912, "learning_rate": 7.0171974522293e-05, "loss": 0.9118, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.6056225895881653, "learning_rate": 7.010828025477707e-05, "loss": 0.9095, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.5970057845115662, "learning_rate": 7.004458598726115e-05, "loss": 0.8801, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.6030189394950867, "learning_rate": 6.998089171974522e-05, "loss": 0.8945, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.5925199389457703, "learning_rate": 6.99171974522293e-05, "loss": 0.8782, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.5551109313964844, "learning_rate": 6.985350318471338e-05, "loss": 0.8821, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.5022830963134766, "learning_rate": 6.978980891719745e-05, "loss": 0.8675, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.5868439674377441, "learning_rate": 6.972611464968153e-05, "loss": 0.8728, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.5532885193824768, "learning_rate": 6.96624203821656e-05, "loss": 0.8811, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.5532572865486145, "learning_rate": 6.959872611464968e-05, "loss": 0.901, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.594550371170044, "learning_rate": 6.953503184713375e-05, "loss": 0.9012, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.5424590110778809, "learning_rate": 6.947133757961784e-05, "loss": 0.8559, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.5972351431846619, "learning_rate": 6.940764331210191e-05, "loss": 0.8713, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.6569409966468811, "learning_rate": 6.934394904458599e-05, "loss": 0.8878, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.597029447555542, "learning_rate": 6.928025477707006e-05, "loss": 0.8804, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.5740005373954773, "learning_rate": 6.921656050955414e-05, "loss": 0.8933, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.548782467842102, "learning_rate": 6.915286624203821e-05, "loss": 0.8706, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.5242761969566345, "learning_rate": 6.90891719745223e-05, "loss": 0.8872, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.6307296752929688, "learning_rate": 6.902547770700637e-05, "loss": 0.8865, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.5386359095573425, "learning_rate": 6.896178343949045e-05, "loss": 0.8894, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.5430437326431274, "learning_rate": 6.889808917197452e-05, "loss": 0.8823, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.49671536684036255, "learning_rate": 6.883439490445859e-05, "loss": 0.8665, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.6163560748100281, "learning_rate": 6.877070063694268e-05, "loss": 0.8845, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.5099320411682129, "learning_rate": 6.870700636942675e-05, "loss": 0.8801, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.5513092279434204, "learning_rate": 6.864331210191083e-05, "loss": 0.8994, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.49879205226898193, "learning_rate": 6.85796178343949e-05, "loss": 0.8943, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.5449738502502441, "learning_rate": 6.851592356687898e-05, "loss": 0.8962, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.46554479002952576, "learning_rate": 6.845222929936305e-05, "loss": 0.888, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.5366774201393127, "learning_rate": 6.838853503184714e-05, "loss": 0.8937, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.614962637424469, "learning_rate": 6.832484076433121e-05, "loss": 0.8691, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.4922376275062561, "learning_rate": 6.826114649681529e-05, "loss": 0.8842, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.6051047444343567, "learning_rate": 6.819745222929936e-05, "loss": 0.9041, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.5461781024932861, "learning_rate": 6.813375796178345e-05, "loss": 0.9112, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.5873090028762817, "learning_rate": 6.807006369426752e-05, "loss": 0.888, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.5473777651786804, "learning_rate": 6.80063694267516e-05, "loss": 0.9067, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.5596609115600586, "learning_rate": 6.794267515923567e-05, "loss": 0.8925, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.48069074749946594, "learning_rate": 6.787898089171974e-05, "loss": 0.8738, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.6374889016151428, "learning_rate": 6.781528662420382e-05, "loss": 0.8667, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.5413127541542053, "learning_rate": 6.77515923566879e-05, "loss": 0.886, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.4677344858646393, "learning_rate": 6.768789808917198e-05, "loss": 0.8752, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.5758306980133057, "learning_rate": 6.762420382165605e-05, "loss": 0.9012, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.4969116151332855, "learning_rate": 6.756050955414013e-05, "loss": 0.9112, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.5981295108795166, "learning_rate": 6.74968152866242e-05, "loss": 0.9015, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.5720081329345703, "learning_rate": 6.743312101910828e-05, "loss": 0.9096, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.569769024848938, "learning_rate": 6.736942675159235e-05, "loss": 0.8706, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.5617003440856934, "learning_rate": 6.730573248407644e-05, "loss": 0.87, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.5824927687644958, "learning_rate": 6.724203821656051e-05, "loss": 0.9072, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.5539324283599854, "learning_rate": 6.717834394904459e-05, "loss": 0.8934, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.5366997122764587, "learning_rate": 6.711464968152866e-05, "loss": 0.8784, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.5396209359169006, "learning_rate": 6.705095541401275e-05, "loss": 0.9027, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.5348963141441345, "learning_rate": 6.698726114649682e-05, "loss": 0.866, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.5780596137046814, "learning_rate": 6.692356687898089e-05, "loss": 0.8894, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.6637961268424988, "learning_rate": 6.685987261146497e-05, "loss": 0.8851, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.864047110080719, "learning_rate": 6.679617834394904e-05, "loss": 0.8916, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5552239418029785, "learning_rate": 6.673248407643312e-05, "loss": 0.9218, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5573702454566956, "learning_rate": 6.66687898089172e-05, "loss": 0.8956, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5623142719268799, "learning_rate": 6.660509554140128e-05, "loss": 0.8905, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5609655380249023, "learning_rate": 6.654140127388535e-05, "loss": 0.8929, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5581201314926147, "learning_rate": 6.647770700636943e-05, "loss": 0.8699, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5425609946250916, "learning_rate": 6.64140127388535e-05, "loss": 0.8707, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5254411697387695, "learning_rate": 6.635031847133759e-05, "loss": 0.8608, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.5528599619865417, "learning_rate": 6.628662420382166e-05, "loss": 0.8677, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.5470523238182068, "learning_rate": 6.622292993630574e-05, "loss": 0.9024, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.5529558062553406, "learning_rate": 6.615923566878981e-05, "loss": 0.888, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.6355565786361694, "learning_rate": 6.609554140127388e-05, "loss": 0.8635, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.5301648378372192, "learning_rate": 6.603184713375796e-05, "loss": 0.9049, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.5182161927223206, "learning_rate": 6.596815286624203e-05, "loss": 0.8601, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.6221626400947571, "learning_rate": 6.590445859872612e-05, "loss": 0.8746, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.5320441722869873, "learning_rate": 6.584076433121019e-05, "loss": 0.892, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.5502296686172485, "learning_rate": 6.577707006369427e-05, "loss": 0.8951, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5433371067047119, "learning_rate": 6.571337579617834e-05, "loss": 0.881, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.565592885017395, "learning_rate": 6.564968152866242e-05, "loss": 0.8857, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5991478562355042, "learning_rate": 6.55859872611465e-05, "loss": 0.8834, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5779725909233093, "learning_rate": 6.552229299363058e-05, "loss": 0.9006, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5620351433753967, "learning_rate": 6.545859872611465e-05, "loss": 0.8641, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5702232718467712, "learning_rate": 6.539490445859873e-05, "loss": 0.8617, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5302250385284424, "learning_rate": 6.53312101910828e-05, "loss": 0.8962, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.5041064620018005, "learning_rate": 6.526751592356689e-05, "loss": 0.8696, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5761558413505554, "learning_rate": 6.520382165605096e-05, "loss": 0.862, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5102734565734863, "learning_rate": 6.514012738853503e-05, "loss": 0.8937, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.49469760060310364, "learning_rate": 6.507643312101911e-05, "loss": 0.8656, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5822766423225403, "learning_rate": 6.501273885350318e-05, "loss": 0.8728, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5382155776023865, "learning_rate": 6.494904458598726e-05, "loss": 0.884, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 5990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5767959952354431, "learning_rate": 6.488535031847133e-05, "loss": 0.8785, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5036946535110474, "learning_rate": 6.482165605095542e-05, "loss": 0.891, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.5634387135505676, "learning_rate": 6.475796178343949e-05, "loss": 0.8866, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.6301183700561523, "learning_rate": 6.469426751592357e-05, "loss": 0.8764, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.6013332009315491, "learning_rate": 6.463057324840764e-05, "loss": 0.9015, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.5528202056884766, "learning_rate": 6.456687898089173e-05, "loss": 0.8977, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.568086564540863, "learning_rate": 6.45031847133758e-05, "loss": 0.8669, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.6520410776138306, "learning_rate": 6.443949044585988e-05, "loss": 0.8907, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.5794298648834229, "learning_rate": 6.437579617834395e-05, "loss": 0.8712, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.5090157985687256, "learning_rate": 6.431210191082803e-05, "loss": 0.878, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.6202682852745056, "learning_rate": 6.42484076433121e-05, "loss": 0.8534, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.5463171005249023, "learning_rate": 6.418471337579617e-05, "loss": 0.852, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.5335412621498108, "learning_rate": 6.412101910828026e-05, "loss": 0.8868, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.5212587118148804, "learning_rate": 6.405732484076433e-05, "loss": 0.9003, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.5295570492744446, "learning_rate": 6.399363057324841e-05, "loss": 0.9077, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.5488661527633667, "learning_rate": 6.392993630573248e-05, "loss": 0.9139, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.548369824886322, "learning_rate": 6.386624203821656e-05, "loss": 0.8591, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.5016814470291138, "learning_rate": 6.380254777070063e-05, "loss": 0.8944, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.5105929374694824, "learning_rate": 6.373885350318472e-05, "loss": 0.8983, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.521321177482605, "learning_rate": 6.367515923566879e-05, "loss": 0.8745, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5385405421257019, "learning_rate": 6.361146496815287e-05, "loss": 0.8836, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5338440537452698, "learning_rate": 6.354777070063694e-05, "loss": 0.9225, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5258084535598755, "learning_rate": 6.348407643312103e-05, "loss": 0.8843, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5688164830207825, "learning_rate": 6.34203821656051e-05, "loss": 0.8958, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5619838237762451, "learning_rate": 6.335668789808917e-05, "loss": 0.8578, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5111443400382996, "learning_rate": 6.329299363057325e-05, "loss": 0.8763, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5403845310211182, "learning_rate": 6.322929936305732e-05, "loss": 0.8788, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.5215356945991516, "learning_rate": 6.31656050955414e-05, "loss": 0.8656, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.5903708934783936, "learning_rate": 6.310191082802547e-05, "loss": 0.8882, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.6022163033485413, "learning_rate": 6.303821656050956e-05, "loss": 0.8575, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.5367187857627869, "learning_rate": 6.297452229299363e-05, "loss": 0.9121, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.5830124020576477, "learning_rate": 6.291082802547771e-05, "loss": 0.8605, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.5332449078559875, "learning_rate": 6.284713375796178e-05, "loss": 0.9164, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.5586867928504944, "learning_rate": 6.278343949044587e-05, "loss": 0.8642, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.5963526964187622, "learning_rate": 6.271974522292994e-05, "loss": 0.8693, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.6327101588249207, "learning_rate": 6.265605095541402e-05, "loss": 0.8843, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.5778951048851013, "learning_rate": 6.259235668789809e-05, "loss": 0.8837, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.5711691379547119, "learning_rate": 6.252866242038217e-05, "loss": 0.9106, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.5818186402320862, "learning_rate": 6.246496815286624e-05, "loss": 0.8736, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.6049737930297852, "learning_rate": 6.240127388535031e-05, "loss": 0.9108, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.5459161996841431, "learning_rate": 6.23375796178344e-05, "loss": 0.8905, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.510424017906189, "learning_rate": 6.227388535031847e-05, "loss": 0.9056, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.5309520363807678, "learning_rate": 6.221019108280255e-05, "loss": 0.8693, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.5280246138572693, "learning_rate": 6.214649681528662e-05, "loss": 0.8727, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5652052164077759, "learning_rate": 6.20828025477707e-05, "loss": 0.8704, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5129522681236267, "learning_rate": 6.201910828025477e-05, "loss": 0.8761, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5963137149810791, "learning_rate": 6.195541401273886e-05, "loss": 0.8544, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5989205241203308, "learning_rate": 6.189171974522293e-05, "loss": 0.89, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5194454193115234, "learning_rate": 6.182802547770701e-05, "loss": 0.8731, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.4959360957145691, "learning_rate": 6.176433121019108e-05, "loss": 0.8929, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.551125705242157, "learning_rate": 6.170063694267517e-05, "loss": 0.8616, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5451139211654663, "learning_rate": 6.163694267515924e-05, "loss": 0.8643, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.5444925427436829, "learning_rate": 6.157324840764332e-05, "loss": 0.8831, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.5570912957191467, "learning_rate": 6.150955414012739e-05, "loss": 0.8836, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.5151851773262024, "learning_rate": 6.144585987261146e-05, "loss": 0.8639, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.49056312441825867, "learning_rate": 6.138216560509554e-05, "loss": 0.8785, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.4749782979488373, "learning_rate": 6.131847133757961e-05, "loss": 0.8987, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.5096744298934937, "learning_rate": 6.12547770700637e-05, "loss": 0.8636, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.5247639417648315, "learning_rate": 6.119108280254777e-05, "loss": 0.8822, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.5125365257263184, "learning_rate": 6.112738853503185e-05, "loss": 0.8612, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.6495028138160706, "learning_rate": 6.106369426751592e-05, "loss": 0.8804, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.6130849123001099, "learning_rate": 6.1e-05, "loss": 0.8603, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.570441484451294, "learning_rate": 6.0936305732484076e-05, "loss": 0.8976, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.5690892934799194, "learning_rate": 6.087261146496815e-05, "loss": 0.843, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.6043858528137207, "learning_rate": 6.080891719745223e-05, "loss": 0.8872, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.521172821521759, "learning_rate": 6.0745222929936306e-05, "loss": 0.8623, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.6279696822166443, "learning_rate": 6.068152866242038e-05, "loss": 0.8774, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.5914869904518127, "learning_rate": 6.061783439490446e-05, "loss": 0.8738, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.5309963822364807, "learning_rate": 6.055414012738854e-05, "loss": 0.8961, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.49851086735725403, "learning_rate": 6.0490445859872614e-05, "loss": 0.8677, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.5877674221992493, "learning_rate": 6.042675159235669e-05, "loss": 0.8684, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.48666176199913025, "learning_rate": 6.036305732484077e-05, "loss": 0.8774, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.6143956184387207, "learning_rate": 6.0299363057324845e-05, "loss": 0.8747, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.5709465742111206, "learning_rate": 6.023566878980892e-05, "loss": 0.8844, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.56462162733078, "learning_rate": 6.0171974522293e-05, "loss": 0.8559, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.5849558711051941, "learning_rate": 6.010828025477707e-05, "loss": 0.8786, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.5224260091781616, "learning_rate": 6.0044585987261146e-05, "loss": 0.8532, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.5706215500831604, "learning_rate": 5.998089171974522e-05, "loss": 0.8549, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.5041067004203796, "learning_rate": 5.99171974522293e-05, "loss": 0.8703, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.6147697567939758, "learning_rate": 5.9853503184713376e-05, "loss": 0.8786, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.6441134214401245, "learning_rate": 5.978980891719745e-05, "loss": 0.8992, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.6264499425888062, "learning_rate": 5.972611464968153e-05, "loss": 0.8845, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.5924432277679443, "learning_rate": 5.966242038216561e-05, "loss": 0.8828, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.6159273982048035, "learning_rate": 5.9598726114649684e-05, "loss": 0.8686, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.5710685849189758, "learning_rate": 5.953503184713376e-05, "loss": 0.894, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.5524258613586426, "learning_rate": 5.947133757961784e-05, "loss": 0.8899, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.5787354111671448, "learning_rate": 5.9407643312101915e-05, "loss": 0.8546, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.5477069020271301, "learning_rate": 5.934394904458599e-05, "loss": 0.8787, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.6147220730781555, "learning_rate": 5.928025477707007e-05, "loss": 0.8879, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.5261998176574707, "learning_rate": 5.921656050955414e-05, "loss": 0.9033, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.5160650610923767, "learning_rate": 5.9152866242038216e-05, "loss": 0.8578, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.6226661205291748, "learning_rate": 5.908917197452229e-05, "loss": 0.8895, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.4986201226711273, "learning_rate": 5.902547770700637e-05, "loss": 0.8696, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.5445514917373657, "learning_rate": 5.8961783439490446e-05, "loss": 0.8936, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.5744483470916748, "learning_rate": 5.889808917197452e-05, "loss": 0.8541, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.5101067423820496, "learning_rate": 5.88343949044586e-05, "loss": 0.8972, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.54083651304245, "learning_rate": 5.877070063694268e-05, "loss": 0.8575, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.5288860201835632, "learning_rate": 5.8707006369426754e-05, "loss": 0.8859, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.5304664373397827, "learning_rate": 5.864331210191083e-05, "loss": 0.9006, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.5717320442199707, "learning_rate": 5.857961783439491e-05, "loss": 0.8684, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 6990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.521751880645752, "learning_rate": 5.8515923566878985e-05, "loss": 0.8773, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.5460343360900879, "learning_rate": 5.845222929936306e-05, "loss": 0.8676, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.5551382303237915, "learning_rate": 5.838853503184714e-05, "loss": 0.8459, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.5307554602622986, "learning_rate": 5.8324840764331216e-05, "loss": 0.8972, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.5830961465835571, "learning_rate": 5.8261146496815286e-05, "loss": 0.8601, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.5745844841003418, "learning_rate": 5.819745222929936e-05, "loss": 0.8742, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7050, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.5107027888298035, "learning_rate": 5.813375796178344e-05, "loss": 0.878, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7060, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.7025989890098572, "learning_rate": 5.8070063694267516e-05, "loss": 0.8642, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7070, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.6303359866142273, "learning_rate": 5.800636942675159e-05, "loss": 0.8602, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7080, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5137262344360352, "learning_rate": 5.794267515923567e-05, "loss": 0.8707, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7090, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.554009735584259, "learning_rate": 5.787898089171975e-05, "loss": 0.8572, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5526527166366577, "learning_rate": 5.7815286624203824e-05, "loss": 0.8688, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.9742128252983093, "learning_rate": 5.77515923566879e-05, "loss": 0.8824, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5232354998588562, "learning_rate": 5.768789808917198e-05, "loss": 0.8901, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5452825427055359, "learning_rate": 5.7624203821656055e-05, "loss": 0.872, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5305787324905396, "learning_rate": 5.756050955414013e-05, "loss": 0.9046, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5473122000694275, "learning_rate": 5.749681528662421e-05, "loss": 0.8804, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.6258863210678101, "learning_rate": 5.7433121019108286e-05, "loss": 0.8933, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.5017375946044922, "learning_rate": 5.7369426751592356e-05, "loss": 0.8921, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.5204535126686096, "learning_rate": 5.730573248407643e-05, "loss": 0.8694, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.5876609086990356, "learning_rate": 5.724203821656051e-05, "loss": 0.8849, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.5704731345176697, "learning_rate": 5.7178343949044586e-05, "loss": 0.8844, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.49567875266075134, "learning_rate": 5.711464968152866e-05, "loss": 0.872, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.5774997472763062, "learning_rate": 5.705095541401274e-05, "loss": 0.8693, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.5102982521057129, "learning_rate": 5.698726114649682e-05, "loss": 0.8591, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.5977953672409058, "learning_rate": 5.6923566878980894e-05, "loss": 0.8624, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.664761483669281, "learning_rate": 5.685987261146497e-05, "loss": 0.8862, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.5394580960273743, "learning_rate": 5.679617834394905e-05, "loss": 0.8796, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.5266260504722595, "learning_rate": 5.6732484076433125e-05, "loss": 0.8768, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.571178138256073, "learning_rate": 5.66687898089172e-05, "loss": 0.8949, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.5399868488311768, "learning_rate": 5.660509554140128e-05, "loss": 0.8656, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.6034677028656006, "learning_rate": 5.6541401273885356e-05, "loss": 0.8687, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.537738025188446, "learning_rate": 5.647770700636943e-05, "loss": 0.8712, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.5704653263092041, "learning_rate": 5.64140127388535e-05, "loss": 0.8606, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.5335469245910645, "learning_rate": 5.635031847133758e-05, "loss": 0.8853, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.5269140005111694, "learning_rate": 5.6286624203821656e-05, "loss": 0.8588, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.602112889289856, "learning_rate": 5.622292993630573e-05, "loss": 0.8652, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.5989587903022766, "learning_rate": 5.615923566878981e-05, "loss": 0.8425, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.6427887082099915, "learning_rate": 5.609554140127389e-05, "loss": 0.8702, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.5564777255058289, "learning_rate": 5.6031847133757964e-05, "loss": 0.8633, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.5418772101402283, "learning_rate": 5.596815286624204e-05, "loss": 0.8957, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5809254050254822, "learning_rate": 5.590445859872612e-05, "loss": 0.8711, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5313619375228882, "learning_rate": 5.5840764331210195e-05, "loss": 0.8698, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5611067414283752, "learning_rate": 5.577707006369427e-05, "loss": 0.8674, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5353450775146484, "learning_rate": 5.571337579617835e-05, "loss": 0.8638, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5202521681785583, "learning_rate": 5.5649681528662426e-05, "loss": 0.8724, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5684260725975037, "learning_rate": 5.55859872611465e-05, "loss": 0.8813, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.5583212375640869, "learning_rate": 5.552229299363057e-05, "loss": 0.8937, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.569209098815918, "learning_rate": 5.545859872611465e-05, "loss": 0.8703, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5366013050079346, "learning_rate": 5.5394904458598726e-05, "loss": 0.8725, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5401418805122375, "learning_rate": 5.53312101910828e-05, "loss": 0.8772, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5705187320709229, "learning_rate": 5.526751592356688e-05, "loss": 0.8893, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5963625907897949, "learning_rate": 5.520382165605096e-05, "loss": 0.8797, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5229465961456299, "learning_rate": 5.5140127388535034e-05, "loss": 0.8889, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5574226975440979, "learning_rate": 5.507643312101911e-05, "loss": 0.8738, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.6087568998336792, "learning_rate": 5.501273885350319e-05, "loss": 0.8904, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.5513929128646851, "learning_rate": 5.4949044585987265e-05, "loss": 0.9023, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.5194180011749268, "learning_rate": 5.488535031847134e-05, "loss": 0.8888, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.5172723531723022, "learning_rate": 5.482165605095542e-05, "loss": 0.8731, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.5943834185600281, "learning_rate": 5.4757961783439496e-05, "loss": 0.8772, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.536495566368103, "learning_rate": 5.469426751592357e-05, "loss": 0.8665, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.5841646194458008, "learning_rate": 5.463057324840764e-05, "loss": 0.8712, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.5669993162155151, "learning_rate": 5.456687898089172e-05, "loss": 0.8597, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.6329278349876404, "learning_rate": 5.4503184713375796e-05, "loss": 0.8751, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.5109385251998901, "learning_rate": 5.443949044585987e-05, "loss": 0.8942, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.5742037892341614, "learning_rate": 5.437579617834395e-05, "loss": 0.9001, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.661705493927002, "learning_rate": 5.431210191082803e-05, "loss": 0.8458, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.5366435050964355, "learning_rate": 5.4248407643312104e-05, "loss": 0.86, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.6003596186637878, "learning_rate": 5.418471337579618e-05, "loss": 0.8696, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.533245325088501, "learning_rate": 5.412101910828026e-05, "loss": 0.8522, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.5306530594825745, "learning_rate": 5.4057324840764335e-05, "loss": 0.8847, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.6527922749519348, "learning_rate": 5.399363057324841e-05, "loss": 0.8789, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.595000147819519, "learning_rate": 5.392993630573249e-05, "loss": 0.8355, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5394546985626221, "learning_rate": 5.3866242038216566e-05, "loss": 0.8548, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5733621716499329, "learning_rate": 5.380254777070064e-05, "loss": 0.8653, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5508468747138977, "learning_rate": 5.373885350318472e-05, "loss": 0.8945, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.610894501209259, "learning_rate": 5.367515923566879e-05, "loss": 0.8976, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5553873777389526, "learning_rate": 5.3611464968152866e-05, "loss": 0.867, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5115483999252319, "learning_rate": 5.354777070063694e-05, "loss": 0.8523, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.513729989528656, "learning_rate": 5.348407643312102e-05, "loss": 0.8988, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5983762741088867, "learning_rate": 5.34203821656051e-05, "loss": 0.8499, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.5823088884353638, "learning_rate": 5.3356687898089174e-05, "loss": 0.866, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.573600709438324, "learning_rate": 5.329299363057325e-05, "loss": 0.8701, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5541461706161499, "learning_rate": 5.322929936305733e-05, "loss": 0.8724, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5558524131774902, "learning_rate": 5.3165605095541405e-05, "loss": 0.8657, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5285508036613464, "learning_rate": 5.310191082802548e-05, "loss": 0.8425, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5635260939598083, "learning_rate": 5.303821656050956e-05, "loss": 0.89, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5565703511238098, "learning_rate": 5.2974522292993636e-05, "loss": 0.8612, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5692863464355469, "learning_rate": 5.291082802547771e-05, "loss": 0.8719, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.5936442613601685, "learning_rate": 5.284713375796179e-05, "loss": 0.8786, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.5127562284469604, "learning_rate": 5.278343949044586e-05, "loss": 0.8467, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.5491020083427429, "learning_rate": 5.2719745222929936e-05, "loss": 0.8602, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.5560590624809265, "learning_rate": 5.2656050955414013e-05, "loss": 0.8702, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.6465674638748169, "learning_rate": 5.259235668789809e-05, "loss": 0.8869, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.6076213717460632, "learning_rate": 5.252866242038217e-05, "loss": 0.8791, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.5880044102668762, "learning_rate": 5.2464968152866244e-05, "loss": 0.8703, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.5579984784126282, "learning_rate": 5.240127388535032e-05, "loss": 0.8722, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.5295410752296448, "learning_rate": 5.23375796178344e-05, "loss": 0.8454, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.5681312680244446, "learning_rate": 5.2273885350318475e-05, "loss": 0.8592, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.6963818669319153, "learning_rate": 5.221019108280255e-05, "loss": 0.8483, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 7990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.6498535871505737, "learning_rate": 5.214649681528663e-05, "loss": 0.8913, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8000, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.5272806286811829, "learning_rate": 5.2082802547770706e-05, "loss": 0.8522, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8010, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.524757444858551, "learning_rate": 5.201910828025478e-05, "loss": 0.9146, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8020, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.8040810227394104, "learning_rate": 5.195541401273886e-05, "loss": 0.8646, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8030, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.637153148651123, "learning_rate": 5.1891719745222936e-05, "loss": 0.8595, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8040, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.6900390386581421, "learning_rate": 5.1828025477707006e-05, "loss": 0.8787, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.5754966139793396, "learning_rate": 5.1764331210191083e-05, "loss": 0.8605, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.5973451137542725, "learning_rate": 5.170063694267516e-05, "loss": 0.8691, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.6348568797111511, "learning_rate": 5.163694267515924e-05, "loss": 0.8745, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.540838360786438, "learning_rate": 5.1573248407643314e-05, "loss": 0.8585, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.6029226183891296, "learning_rate": 5.150955414012739e-05, "loss": 0.857, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.5446135997772217, "learning_rate": 5.144585987261147e-05, "loss": 0.8467, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.5528589487075806, "learning_rate": 5.1382165605095545e-05, "loss": 0.8552, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.5417741537094116, "learning_rate": 5.131847133757962e-05, "loss": 0.8544, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.5020217895507812, "learning_rate": 5.12547770700637e-05, "loss": 0.8615, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.5379871726036072, "learning_rate": 5.1191082802547776e-05, "loss": 0.853, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.6144492030143738, "learning_rate": 5.112738853503185e-05, "loss": 0.8443, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.566996693611145, "learning_rate": 5.106369426751593e-05, "loss": 0.8595, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.5716440677642822, "learning_rate": 5.1000000000000006e-05, "loss": 0.8758, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.54944908618927, "learning_rate": 5.0936305732484077e-05, "loss": 0.8385, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.5852215886116028, "learning_rate": 5.0872611464968153e-05, "loss": 0.874, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.6428478360176086, "learning_rate": 5.080891719745223e-05, "loss": 0.8527, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.5332466959953308, "learning_rate": 5.074522292993631e-05, "loss": 0.8697, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.6330180168151855, "learning_rate": 5.0681528662420384e-05, "loss": 0.8447, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.6093716025352478, "learning_rate": 5.061783439490446e-05, "loss": 0.8576, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.5464934706687927, "learning_rate": 5.055414012738854e-05, "loss": 0.8563, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.5934207439422607, "learning_rate": 5.0490445859872615e-05, "loss": 0.8379, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.5612914562225342, "learning_rate": 5.042675159235669e-05, "loss": 0.8502, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.545253574848175, "learning_rate": 5.036305732484077e-05, "loss": 0.8509, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.631450891494751, "learning_rate": 5.0299363057324846e-05, "loss": 0.8606, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.5951800346374512, "learning_rate": 5.023566878980892e-05, "loss": 0.8716, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.6067357063293457, "learning_rate": 5.0171974522293e-05, "loss": 0.8929, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.5289549827575684, "learning_rate": 5.0108280254777076e-05, "loss": 0.865, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.5379547476768494, "learning_rate": 5.004458598726115e-05, "loss": 0.8649, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.5237485766410828, "learning_rate": 4.9980891719745223e-05, "loss": 0.8835, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.5215060710906982, "learning_rate": 4.99171974522293e-05, "loss": 0.8549, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.7047269940376282, "learning_rate": 4.985350318471338e-05, "loss": 0.8595, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.5435574054718018, "learning_rate": 4.9789808917197454e-05, "loss": 0.8762, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.5348605513572693, "learning_rate": 4.972611464968153e-05, "loss": 0.8524, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.6164429187774658, "learning_rate": 4.966242038216561e-05, "loss": 0.8243, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.5418780446052551, "learning_rate": 4.9598726114649685e-05, "loss": 0.8464, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.5288802981376648, "learning_rate": 4.953503184713376e-05, "loss": 0.834, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.5178465843200684, "learning_rate": 4.947133757961784e-05, "loss": 0.869, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.5777456760406494, "learning_rate": 4.9407643312101916e-05, "loss": 0.8598, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.6265929937362671, "learning_rate": 4.934394904458599e-05, "loss": 0.8382, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.5676342248916626, "learning_rate": 4.928025477707007e-05, "loss": 0.8545, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.6321595311164856, "learning_rate": 4.9216560509554146e-05, "loss": 0.8541, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.5599589943885803, "learning_rate": 4.915286624203822e-05, "loss": 0.8798, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.5524100661277771, "learning_rate": 4.9089171974522293e-05, "loss": 0.8318, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.5421915650367737, "learning_rate": 4.902547770700637e-05, "loss": 0.8652, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.7382321357727051, "learning_rate": 4.896178343949045e-05, "loss": 0.8511, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.5415564179420471, "learning_rate": 4.8898089171974524e-05, "loss": 0.8542, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8510, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.5908517241477966, "learning_rate": 4.88343949044586e-05, "loss": 0.8717, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8520, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.5226035714149475, "learning_rate": 4.877070063694268e-05, "loss": 0.8306, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8530, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5351910591125488, "learning_rate": 4.8707006369426755e-05, "loss": 0.841, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8540, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5828281044960022, "learning_rate": 4.864331210191083e-05, "loss": 0.8322, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8550, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5739461779594421, "learning_rate": 4.857961783439491e-05, "loss": 0.8474, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8560, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5811489820480347, "learning_rate": 4.8515923566878986e-05, "loss": 0.8686, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8570, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5632668137550354, "learning_rate": 4.845222929936306e-05, "loss": 0.8386, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8580, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.6513923406600952, "learning_rate": 4.838853503184714e-05, "loss": 0.8776, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8590, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5276843309402466, "learning_rate": 4.8324840764331216e-05, "loss": 0.847, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8600, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.5792731642723083, "learning_rate": 4.826114649681529e-05, "loss": 0.8615, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8610, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5763053894042969, "learning_rate": 4.819745222929937e-05, "loss": 0.8468, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8620, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5236169099807739, "learning_rate": 4.813375796178344e-05, "loss": 0.828, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8630, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5243390798568726, "learning_rate": 4.807006369426752e-05, "loss": 0.8745, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8640, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5692167282104492, "learning_rate": 4.8006369426751594e-05, "loss": 0.8349, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8650, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.7056101560592651, "learning_rate": 4.794267515923567e-05, "loss": 0.8303, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8660, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5863968133926392, "learning_rate": 4.787898089171975e-05, "loss": 0.8409, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8670, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5797470808029175, "learning_rate": 4.7815286624203825e-05, "loss": 0.8504, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8680, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.6207801699638367, "learning_rate": 4.77515923566879e-05, "loss": 0.8596, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8690, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.5625149011611938, "learning_rate": 4.768789808917198e-05, "loss": 0.836, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8700, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.5690678358078003, "learning_rate": 4.7624203821656056e-05, "loss": 0.8604, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8710, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.518943727016449, "learning_rate": 4.756050955414013e-05, "loss": 0.8846, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8720, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.5701858401298523, "learning_rate": 4.749681528662421e-05, "loss": 0.8752, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8730, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.5501466989517212, "learning_rate": 4.7433121019108286e-05, "loss": 0.8431, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8740, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.692132830619812, "learning_rate": 4.736942675159236e-05, "loss": 0.8645, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8750, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.5666806697845459, "learning_rate": 4.730573248407644e-05, "loss": 0.8563, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8760, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.5708613991737366, "learning_rate": 4.724203821656051e-05, "loss": 0.8322, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8770, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.5461627244949341, "learning_rate": 4.717834394904459e-05, "loss": 0.8367, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8780, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.506532609462738, "learning_rate": 4.7114649681528664e-05, "loss": 0.8346, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8790, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.5624059438705444, "learning_rate": 4.705095541401274e-05, "loss": 0.8164, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8800, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.5525660514831543, "learning_rate": 4.698726114649682e-05, "loss": 0.8566, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8810, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.5464274883270264, "learning_rate": 4.6923566878980895e-05, "loss": 0.8471, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8820, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.5622477531433105, "learning_rate": 4.685987261146497e-05, "loss": 0.8667, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8830, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.608081042766571, "learning_rate": 4.679617834394905e-05, "loss": 0.8832, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8840, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.608669638633728, "learning_rate": 4.6732484076433126e-05, "loss": 0.8612, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8850, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.6040840148925781, "learning_rate": 4.66687898089172e-05, "loss": 0.8529, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8860, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.6103033423423767, "learning_rate": 4.660509554140128e-05, "loss": 0.8572, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8870, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.5756964683532715, "learning_rate": 4.6541401273885356e-05, "loss": 0.8712, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8880, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.5669918656349182, "learning_rate": 4.647770700636943e-05, "loss": 0.8262, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8890, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.6642317175865173, "learning_rate": 4.641401273885351e-05, "loss": 0.8717, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8900, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.5726878643035889, "learning_rate": 4.635031847133758e-05, "loss": 0.8703, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8910, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.5667121410369873, "learning_rate": 4.628662420382166e-05, "loss": 0.8469, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8920, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.5571203231811523, "learning_rate": 4.6222929936305734e-05, "loss": 0.8476, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8930, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.5069302916526794, "learning_rate": 4.615923566878981e-05, "loss": 0.8566, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8940, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.517417311668396, "learning_rate": 4.609554140127389e-05, "loss": 0.8732, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8950, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.5386157631874084, "learning_rate": 4.6031847133757965e-05, "loss": 0.8324, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8960, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.6298375725746155, "learning_rate": 4.596815286624204e-05, "loss": 0.7908, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8970, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.5578718185424805, "learning_rate": 4.590445859872612e-05, "loss": 0.8724, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8980, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.5550695657730103, "learning_rate": 4.5840764331210196e-05, "loss": 0.8522, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 8990, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.5744197964668274, "learning_rate": 4.577707006369427e-05, "loss": 0.8519, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.5899156332015991, "learning_rate": 4.571337579617835e-05, "loss": 0.8519, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.6079444289207458, "learning_rate": 4.5649681528662426e-05, "loss": 0.8505, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.5901761054992676, "learning_rate": 4.55859872611465e-05, "loss": 0.8574, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.5695390701293945, "learning_rate": 4.552229299363058e-05, "loss": 0.85, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.5685015320777893, "learning_rate": 4.545859872611466e-05, "loss": 0.8223, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.6257502436637878, "learning_rate": 4.539490445859873e-05, "loss": 0.8676, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.5832564234733582, "learning_rate": 4.5331210191082804e-05, "loss": 0.8564, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.6030582785606384, "learning_rate": 4.526751592356688e-05, "loss": 0.8492, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.6871100068092346, "learning_rate": 4.520382165605096e-05, "loss": 0.8662, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.53432297706604, "learning_rate": 4.5140127388535035e-05, "loss": 0.8542, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.5585580468177795, "learning_rate": 4.507643312101911e-05, "loss": 0.7867, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.5414518117904663, "learning_rate": 4.501273885350319e-05, "loss": 0.8376, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.6098955273628235, "learning_rate": 4.4949044585987266e-05, "loss": 0.8725, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.5669853091239929, "learning_rate": 4.488535031847134e-05, "loss": 0.8332, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.5340268611907959, "learning_rate": 4.482165605095542e-05, "loss": 0.8553, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.5587529540061951, "learning_rate": 4.4757961783439496e-05, "loss": 0.8343, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.6109263896942139, "learning_rate": 4.469426751592357e-05, "loss": 0.8252, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.6285218596458435, "learning_rate": 4.463057324840765e-05, "loss": 0.8346, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.5180321931838989, "learning_rate": 4.456687898089173e-05, "loss": 0.8289, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.6208270788192749, "learning_rate": 4.45031847133758e-05, "loss": 0.8531, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.6124671697616577, "learning_rate": 4.4439490445859874e-05, "loss": 0.8379, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.5522865653038025, "learning_rate": 4.437579617834395e-05, "loss": 0.8407, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.6094708442687988, "learning_rate": 4.431210191082803e-05, "loss": 0.8587, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.5936129689216614, "learning_rate": 4.4248407643312105e-05, "loss": 0.8488, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.515078604221344, "learning_rate": 4.418471337579618e-05, "loss": 0.8551, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.622717022895813, "learning_rate": 4.412101910828026e-05, "loss": 0.8645, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.5956246256828308, "learning_rate": 4.4057324840764336e-05, "loss": 0.8643, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.5683963894844055, "learning_rate": 4.399363057324841e-05, "loss": 0.8389, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.5116027593612671, "learning_rate": 4.392993630573249e-05, "loss": 0.8504, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.5620011687278748, "learning_rate": 4.3866242038216566e-05, "loss": 0.8764, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.5658126473426819, "learning_rate": 4.380254777070064e-05, "loss": 0.8733, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.6153251528739929, "learning_rate": 4.373885350318472e-05, "loss": 0.8496, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.6150373220443726, "learning_rate": 4.36751592356688e-05, "loss": 0.8285, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.5434516072273254, "learning_rate": 4.3611464968152874e-05, "loss": 0.863, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.6302858591079712, "learning_rate": 4.3547770700636944e-05, "loss": 0.8682, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.61929851770401, "learning_rate": 4.348407643312102e-05, "loss": 0.886, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.5373562574386597, "learning_rate": 4.34203821656051e-05, "loss": 0.8298, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.5400745868682861, "learning_rate": 4.3356687898089175e-05, "loss": 0.838, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.5738040804862976, "learning_rate": 4.329299363057325e-05, "loss": 0.8631, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.5795983672142029, "learning_rate": 4.322929936305733e-05, "loss": 0.8565, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.55351722240448, "learning_rate": 4.3165605095541406e-05, "loss": 0.8902, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.5806913375854492, "learning_rate": 4.310191082802548e-05, "loss": 0.881, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.6037645936012268, "learning_rate": 4.303821656050956e-05, "loss": 0.8624, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.5471616387367249, "learning_rate": 4.2974522292993636e-05, "loss": 0.8388, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.5801625847816467, "learning_rate": 4.2910828025477713e-05, "loss": 0.8382, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.544641375541687, "learning_rate": 4.284713375796179e-05, "loss": 0.8442, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.5814592838287354, "learning_rate": 4.278343949044587e-05, "loss": 0.8339, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.5295001268386841, "learning_rate": 4.2719745222929944e-05, "loss": 0.8476, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.6271728277206421, "learning_rate": 4.2656050955414014e-05, "loss": 0.8626, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.6040265560150146, "learning_rate": 4.259235668789809e-05, "loss": 0.8506, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5334275960922241, "learning_rate": 4.252866242038217e-05, "loss": 0.8227, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9510, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.647208034992218, "learning_rate": 4.246496815286624e-05, "loss": 0.8627, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9520, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5936146378517151, "learning_rate": 4.2401273885350315e-05, "loss": 0.888, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9530, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5416791439056396, "learning_rate": 4.233757961783439e-05, "loss": 0.8823, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9540, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5779086351394653, "learning_rate": 4.227388535031847e-05, "loss": 0.8465, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9550, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5653147101402283, "learning_rate": 4.2210191082802546e-05, "loss": 0.8227, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9560, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5841782093048096, "learning_rate": 4.214649681528662e-05, "loss": 0.8703, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9570, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5924214720726013, "learning_rate": 4.20828025477707e-05, "loss": 0.8601, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9580, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.5881546139717102, "learning_rate": 4.201910828025478e-05, "loss": 0.8651, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9590, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5617165565490723, "learning_rate": 4.1955414012738854e-05, "loss": 0.8447, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9600, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5984334945678711, "learning_rate": 4.189171974522293e-05, "loss": 0.8694, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9610, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5753591656684875, "learning_rate": 4.182802547770701e-05, "loss": 0.8598, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9620, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5675076246261597, "learning_rate": 4.1764331210191084e-05, "loss": 0.8652, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9630, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5429391860961914, "learning_rate": 4.170063694267516e-05, "loss": 0.8722, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9640, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.6187253594398499, "learning_rate": 4.163694267515924e-05, "loss": 0.8256, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9650, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5988272428512573, "learning_rate": 4.157324840764331e-05, "loss": 0.8266, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9660, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.5375120043754578, "learning_rate": 4.1509554140127385e-05, "loss": 0.8702, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9670, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.584695041179657, "learning_rate": 4.144585987261146e-05, "loss": 0.8497, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9680, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.5349233150482178, "learning_rate": 4.138216560509554e-05, "loss": 0.8539, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9690, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.6189013719558716, "learning_rate": 4.1318471337579616e-05, "loss": 0.8756, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9700, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.5804276466369629, "learning_rate": 4.125477707006369e-05, "loss": 0.8553, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9710, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.6997498869895935, "learning_rate": 4.119108280254777e-05, "loss": 0.87, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9720, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.5317782759666443, "learning_rate": 4.112738853503185e-05, "loss": 0.8476, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9730, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.5482212901115417, "learning_rate": 4.1063694267515924e-05, "loss": 0.8163, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9740, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.5931942462921143, "learning_rate": 4.1e-05, "loss": 0.8261, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9750, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.5837103724479675, "learning_rate": 4.093630573248408e-05, "loss": 0.8701, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9760, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.5434841513633728, "learning_rate": 4.0872611464968154e-05, "loss": 0.8242, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9770, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.629984974861145, "learning_rate": 4.080891719745223e-05, "loss": 0.8705, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9780, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.6990328431129456, "learning_rate": 4.074522292993631e-05, "loss": 0.8586, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9790, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.5907762050628662, "learning_rate": 4.0681528662420385e-05, "loss": 0.8655, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9800, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.5725825428962708, "learning_rate": 4.0617834394904455e-05, "loss": 0.8504, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9810, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.5525303483009338, "learning_rate": 4.055414012738853e-05, "loss": 0.8422, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9820, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.601482629776001, "learning_rate": 4.049044585987261e-05, "loss": 0.8686, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9830, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.5497685670852661, "learning_rate": 4.0426751592356686e-05, "loss": 0.8397, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9840, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.5859342813491821, "learning_rate": 4.036305732484076e-05, "loss": 0.8906, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9850, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.5428025722503662, "learning_rate": 4.029936305732484e-05, "loss": 0.8413, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9860, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.6050729751586914, "learning_rate": 4.023566878980892e-05, "loss": 0.8755, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9870, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.5653709173202515, "learning_rate": 4.0171974522292994e-05, "loss": 0.8666, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9880, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.5361793041229248, "learning_rate": 4.010828025477707e-05, "loss": 0.8218, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9890, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.6140942573547363, "learning_rate": 4.004458598726115e-05, "loss": 0.8487, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9900, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.5644840002059937, "learning_rate": 3.9980891719745224e-05, "loss": 0.857, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9910, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.5795630812644958, "learning_rate": 3.99171974522293e-05, "loss": 0.8377, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9920, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.5633305311203003, "learning_rate": 3.985350318471338e-05, "loss": 0.8679, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9930, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.60102778673172, "learning_rate": 3.9789808917197455e-05, "loss": 0.8748, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9940, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.6072971224784851, "learning_rate": 3.9726114649681525e-05, "loss": 0.8537, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9950, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.6720110774040222, "learning_rate": 3.96624203821656e-05, "loss": 0.8485, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9960, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.6156752705574036, "learning_rate": 3.959872611464968e-05, "loss": 0.8478, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9970, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.5758206844329834, "learning_rate": 3.9535031847133756e-05, "loss": 0.8449, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9980, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.5701586008071899, "learning_rate": 3.947133757961783e-05, "loss": 0.8346, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 9990, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.52237868309021, "learning_rate": 3.940764331210191e-05, "loss": 0.8253, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.5953871607780457, "learning_rate": 3.934394904458599e-05, "loss": 0.8402, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.6187193989753723, "learning_rate": 3.9280254777070064e-05, "loss": 0.84, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.5820570588111877, "learning_rate": 3.921656050955414e-05, "loss": 0.8452, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.5865253806114197, "learning_rate": 3.915286624203822e-05, "loss": 0.8622, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.594710648059845, "learning_rate": 3.9089171974522294e-05, "loss": 0.8575, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.5631176233291626, "learning_rate": 3.902547770700637e-05, "loss": 0.8374, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.5939119458198547, "learning_rate": 3.896178343949045e-05, "loss": 0.8763, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.573266327381134, "learning_rate": 3.8898089171974525e-05, "loss": 0.8529, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.49202847480773926, "learning_rate": 3.88343949044586e-05, "loss": 0.8263, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.5387163162231445, "learning_rate": 3.877070063694267e-05, "loss": 0.8625, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.5848548412322998, "learning_rate": 3.870700636942675e-05, "loss": 0.8307, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.5548748970031738, "learning_rate": 3.8643312101910826e-05, "loss": 0.8285, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.5727143287658691, "learning_rate": 3.85796178343949e-05, "loss": 0.8509, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.5815598964691162, "learning_rate": 3.851592356687898e-05, "loss": 0.8836, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.5907566547393799, "learning_rate": 3.845222929936306e-05, "loss": 0.8398, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.6278606653213501, "learning_rate": 3.8388535031847134e-05, "loss": 0.8435, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.5442222952842712, "learning_rate": 3.832484076433121e-05, "loss": 0.8505, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.5859748721122742, "learning_rate": 3.826114649681529e-05, "loss": 0.8566, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.6682499647140503, "learning_rate": 3.8197452229299364e-05, "loss": 0.8712, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.5633233785629272, "learning_rate": 3.813375796178344e-05, "loss": 0.8656, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.6651773452758789, "learning_rate": 3.807006369426752e-05, "loss": 0.8707, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.5781067609786987, "learning_rate": 3.8006369426751595e-05, "loss": 0.8598, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.6810473799705505, "learning_rate": 3.794267515923567e-05, "loss": 0.8373, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.6265770792961121, "learning_rate": 3.787898089171974e-05, "loss": 0.8729, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.5550416707992554, "learning_rate": 3.781528662420382e-05, "loss": 0.861, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.5630444884300232, "learning_rate": 3.7751592356687896e-05, "loss": 0.8614, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.5981015563011169, "learning_rate": 3.768789808917197e-05, "loss": 0.8417, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.5597262978553772, "learning_rate": 3.762420382165605e-05, "loss": 0.8552, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.5736472606658936, "learning_rate": 3.756050955414013e-05, "loss": 0.8444, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.5704269409179688, "learning_rate": 3.7496815286624204e-05, "loss": 0.8333, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.549435555934906, "learning_rate": 3.743312101910828e-05, "loss": 0.849, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5624418258666992, "learning_rate": 3.736942675159236e-05, "loss": 0.84, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5569450259208679, "learning_rate": 3.7305732484076434e-05, "loss": 0.82, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5782328844070435, "learning_rate": 3.724203821656051e-05, "loss": 0.8365, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.6573158502578735, "learning_rate": 3.717834394904459e-05, "loss": 0.835, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5627766847610474, "learning_rate": 3.7114649681528665e-05, "loss": 0.8338, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5935114026069641, "learning_rate": 3.705095541401274e-05, "loss": 0.8528, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.6130989193916321, "learning_rate": 3.698726114649681e-05, "loss": 0.8499, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5816967487335205, "learning_rate": 3.692356687898089e-05, "loss": 0.8541, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5552312135696411, "learning_rate": 3.6859872611464966e-05, "loss": 0.8494, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.5926358103752136, "learning_rate": 3.679617834394904e-05, "loss": 0.8701, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.6585802435874939, "learning_rate": 3.673248407643312e-05, "loss": 0.8697, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.5160651206970215, "learning_rate": 3.66687898089172e-05, "loss": 0.8637, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.8365841507911682, "learning_rate": 3.6605095541401274e-05, "loss": 0.8572, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.6559211611747742, "learning_rate": 3.654140127388535e-05, "loss": 0.8221, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.603316068649292, "learning_rate": 3.647770700636943e-05, "loss": 0.8237, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.5332236289978027, "learning_rate": 3.6414012738853504e-05, "loss": 0.8604, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.6046949028968811, "learning_rate": 3.635031847133758e-05, "loss": 0.8606, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.6080541014671326, "learning_rate": 3.628662420382166e-05, "loss": 0.8231, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.5540225505828857, "learning_rate": 3.6222929936305735e-05, "loss": 0.8622, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.6116626858711243, "learning_rate": 3.615923566878981e-05, "loss": 0.8452, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10510, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.5268499255180359, "learning_rate": 3.609554140127389e-05, "loss": 0.8643, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10520, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.593361496925354, "learning_rate": 3.603184713375796e-05, "loss": 0.8679, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10530, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.6413514614105225, "learning_rate": 3.5968152866242036e-05, "loss": 0.8436, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10540, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.6277458667755127, "learning_rate": 3.590445859872611e-05, "loss": 0.8149, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10550, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.5620959401130676, "learning_rate": 3.584076433121019e-05, "loss": 0.8427, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10560, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.7740691900253296, "learning_rate": 3.577707006369427e-05, "loss": 0.8194, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10570, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.5390883088111877, "learning_rate": 3.5713375796178344e-05, "loss": 0.8462, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10580, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.6066960096359253, "learning_rate": 3.564968152866242e-05, "loss": 0.8438, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10590, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.5721805691719055, "learning_rate": 3.55859872611465e-05, "loss": 0.8852, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10600, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.5543267130851746, "learning_rate": 3.5522292993630574e-05, "loss": 0.8418, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10610, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.551352858543396, "learning_rate": 3.545859872611465e-05, "loss": 0.8575, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10620, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.5895906090736389, "learning_rate": 3.539490445859873e-05, "loss": 0.8322, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10630, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.6208714842796326, "learning_rate": 3.5331210191082805e-05, "loss": 0.8592, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10640, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.6484043002128601, "learning_rate": 3.526751592356688e-05, "loss": 0.8437, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10650, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.5603862404823303, "learning_rate": 3.520382165605096e-05, "loss": 0.8414, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10660, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.6064278483390808, "learning_rate": 3.514012738853503e-05, "loss": 0.851, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10670, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.5677962303161621, "learning_rate": 3.5076433121019106e-05, "loss": 0.8463, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10680, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.5643174648284912, "learning_rate": 3.501273885350318e-05, "loss": 0.8446, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10690, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.5769677758216858, "learning_rate": 3.494904458598726e-05, "loss": 0.8244, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10700, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.6470271348953247, "learning_rate": 3.488535031847134e-05, "loss": 0.8484, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10710, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.5943115949630737, "learning_rate": 3.4821656050955414e-05, "loss": 0.8719, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10720, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.5173988342285156, "learning_rate": 3.475796178343949e-05, "loss": 0.8202, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10730, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.5133519172668457, "learning_rate": 3.469426751592357e-05, "loss": 0.8607, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10740, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.6479659676551819, "learning_rate": 3.4630573248407644e-05, "loss": 0.8357, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10750, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.6060785055160522, "learning_rate": 3.456687898089172e-05, "loss": 0.8443, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10760, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.5761645436286926, "learning_rate": 3.45031847133758e-05, "loss": 0.8721, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10770, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.5355122685432434, "learning_rate": 3.4439490445859875e-05, "loss": 0.8414, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10780, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.6006566286087036, "learning_rate": 3.437579617834395e-05, "loss": 0.8738, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10790, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.5849606394767761, "learning_rate": 3.431210191082803e-05, "loss": 0.8332, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10800, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.5841776132583618, "learning_rate": 3.4248407643312106e-05, "loss": 0.852, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10810, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.5938459634780884, "learning_rate": 3.4184713375796176e-05, "loss": 0.8796, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10820, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.5698296427726746, "learning_rate": 3.412101910828025e-05, "loss": 0.8333, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10830, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.580596923828125, "learning_rate": 3.405732484076433e-05, "loss": 0.8534, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10840, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.5333631038665771, "learning_rate": 3.399363057324841e-05, "loss": 0.8348, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10850, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.6496769189834595, "learning_rate": 3.3929936305732484e-05, "loss": 0.8932, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10860, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.7200677394866943, "learning_rate": 3.386624203821656e-05, "loss": 0.8346, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10870, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.5436977744102478, "learning_rate": 3.380254777070064e-05, "loss": 0.8464, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10880, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.5795146226882935, "learning_rate": 3.3738853503184714e-05, "loss": 0.8072, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10890, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.5177848935127258, "learning_rate": 3.367515923566879e-05, "loss": 0.8545, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10900, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.6202844381332397, "learning_rate": 3.361146496815287e-05, "loss": 0.851, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10910, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.5789759159088135, "learning_rate": 3.3547770700636945e-05, "loss": 0.8693, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10920, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.5165707468986511, "learning_rate": 3.348407643312102e-05, "loss": 0.8541, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10930, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.5957597494125366, "learning_rate": 3.34203821656051e-05, "loss": 0.8739, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10940, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.5551866888999939, "learning_rate": 3.3356687898089176e-05, "loss": 0.8696, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10950, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.6256672739982605, "learning_rate": 3.3292993630573246e-05, "loss": 0.8506, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10960, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.5564112663269043, "learning_rate": 3.322929936305732e-05, "loss": 0.8337, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10970, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.5981550216674805, "learning_rate": 3.31656050955414e-05, "loss": 0.8578, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10980, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.6769008040428162, "learning_rate": 3.310191082802548e-05, "loss": 0.8429, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 10990, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.5731686353683472, "learning_rate": 3.3038216560509554e-05, "loss": 0.8609, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.6229621767997742, "learning_rate": 3.297452229299363e-05, "loss": 0.8435, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.6300867199897766, "learning_rate": 3.291082802547771e-05, "loss": 0.8797, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.6082947850227356, "learning_rate": 3.2847133757961784e-05, "loss": 0.8697, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.5586636662483215, "learning_rate": 3.278343949044586e-05, "loss": 0.8422, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.6251539587974548, "learning_rate": 3.271974522292994e-05, "loss": 0.8124, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.6733857989311218, "learning_rate": 3.2656050955414015e-05, "loss": 0.8382, "max_memory_allocated (GB)": 94.49, "memory_allocated (GB)": 14.97, "step": 11060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.601640522480011, "learning_rate": 3.259235668789809e-05, "loss": 0.8371, "max_memory_allocated (GB)": 94.55, "memory_allocated (GB)": 14.97, "step": 11070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.5989189147949219, "learning_rate": 3.252866242038217e-05, "loss": 0.8841, "max_memory_allocated (GB)": 94.55, "memory_allocated (GB)": 14.97, "step": 11080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.5607017278671265, "learning_rate": 3.2464968152866246e-05, "loss": 0.8503, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.5420870780944824, "learning_rate": 3.240127388535032e-05, "loss": 0.8607, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.6338402032852173, "learning_rate": 3.233757961783439e-05, "loss": 0.8442, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.5808389782905579, "learning_rate": 3.227388535031847e-05, "loss": 0.8361, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.5183432698249817, "learning_rate": 3.221019108280255e-05, "loss": 0.8474, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.6294568777084351, "learning_rate": 3.2146496815286624e-05, "loss": 0.8316, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.5947890877723694, "learning_rate": 3.20828025477707e-05, "loss": 0.821, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.6249579787254333, "learning_rate": 3.201910828025478e-05, "loss": 0.8496, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.5387454032897949, "learning_rate": 3.1955414012738854e-05, "loss": 0.8555, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.5490169525146484, "learning_rate": 3.189171974522293e-05, "loss": 0.8417, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.6309676170349121, "learning_rate": 3.182802547770701e-05, "loss": 0.83, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.6202732920646667, "learning_rate": 3.1764331210191085e-05, "loss": 0.8344, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.6259014010429382, "learning_rate": 3.170063694267516e-05, "loss": 0.8042, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.5590731501579285, "learning_rate": 3.163694267515924e-05, "loss": 0.831, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.6487732529640198, "learning_rate": 3.1573248407643316e-05, "loss": 0.8368, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.6921071410179138, "learning_rate": 3.150955414012739e-05, "loss": 0.855, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.592799186706543, "learning_rate": 3.144585987261146e-05, "loss": 0.8247, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.6593350172042847, "learning_rate": 3.138216560509554e-05, "loss": 0.8399, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.5151501297950745, "learning_rate": 3.131847133757962e-05, "loss": 0.8577, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.5430037975311279, "learning_rate": 3.1254777070063694e-05, "loss": 0.8286, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.5675473809242249, "learning_rate": 3.119108280254777e-05, "loss": 0.8413, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.6474435925483704, "learning_rate": 3.112738853503185e-05, "loss": 0.8435, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.6244317293167114, "learning_rate": 3.1063694267515924e-05, "loss": 0.815, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.5554672479629517, "learning_rate": 3.1e-05, "loss": 0.849, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.5643844604492188, "learning_rate": 3.093630573248408e-05, "loss": 0.849, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.5386666655540466, "learning_rate": 3.0872611464968155e-05, "loss": 0.8601, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.6414317488670349, "learning_rate": 3.080891719745223e-05, "loss": 0.8351, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.5491330623626709, "learning_rate": 3.074522292993631e-05, "loss": 0.8409, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.6073495745658875, "learning_rate": 3.0681528662420386e-05, "loss": 0.8514, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.5687406659126282, "learning_rate": 3.061783439490446e-05, "loss": 0.847, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.6221336126327515, "learning_rate": 3.055414012738854e-05, "loss": 0.86, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.6338231563568115, "learning_rate": 3.0490445859872613e-05, "loss": 0.8505, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.6311108469963074, "learning_rate": 3.042675159235669e-05, "loss": 0.8503, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.5485032796859741, "learning_rate": 3.0363057324840767e-05, "loss": 0.8311, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.8035585880279541, "learning_rate": 3.029936305732484e-05, "loss": 0.8514, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.5874172449111938, "learning_rate": 3.0235668789808918e-05, "loss": 0.8374, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.5448861122131348, "learning_rate": 3.0171974522292994e-05, "loss": 0.8591, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.6101541519165039, "learning_rate": 3.010828025477707e-05, "loss": 0.808, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.644763708114624, "learning_rate": 3.0044585987261148e-05, "loss": 0.8493, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.6366714239120483, "learning_rate": 2.9980891719745225e-05, "loss": 0.8473, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.6037055850028992, "learning_rate": 2.9917197452229302e-05, "loss": 0.8512, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.5885847210884094, "learning_rate": 2.9853503184713376e-05, "loss": 0.7974, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.5361979007720947, "learning_rate": 2.9789808917197453e-05, "loss": 0.8347, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11510, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.5696050524711609, "learning_rate": 2.972611464968153e-05, "loss": 0.8567, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11520, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.5758958458900452, "learning_rate": 2.9662420382165606e-05, "loss": 0.8504, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11530, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.613055944442749, "learning_rate": 2.9598726114649683e-05, "loss": 0.8399, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11540, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.6125010251998901, "learning_rate": 2.953503184713376e-05, "loss": 0.843, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11550, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.622986376285553, "learning_rate": 2.9471337579617837e-05, "loss": 0.822, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11560, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.5726191401481628, "learning_rate": 2.9407643312101914e-05, "loss": 0.8118, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11570, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.6309647560119629, "learning_rate": 2.9343949044585988e-05, "loss": 0.8407, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11580, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.5828757286071777, "learning_rate": 2.9280254777070064e-05, "loss": 0.8288, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11590, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.6801642179489136, "learning_rate": 2.921656050955414e-05, "loss": 0.8244, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11600, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.6346856355667114, "learning_rate": 2.9152866242038218e-05, "loss": 0.8625, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11610, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.6004509329795837, "learning_rate": 2.9089171974522295e-05, "loss": 0.8064, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11620, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.5791013836860657, "learning_rate": 2.9025477707006372e-05, "loss": 0.8469, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11630, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.6295746564865112, "learning_rate": 2.896178343949045e-05, "loss": 0.8381, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11640, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.5519148707389832, "learning_rate": 2.8898089171974523e-05, "loss": 0.8291, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11650, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.6016730666160583, "learning_rate": 2.88343949044586e-05, "loss": 0.8671, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11660, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.584075391292572, "learning_rate": 2.8770700636942676e-05, "loss": 0.8405, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11670, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.5806698799133301, "learning_rate": 2.8707006369426753e-05, "loss": 0.8348, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11680, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.6517837047576904, "learning_rate": 2.864331210191083e-05, "loss": 0.8385, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11690, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.5573362112045288, "learning_rate": 2.8579617834394907e-05, "loss": 0.8797, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11700, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.6296159029006958, "learning_rate": 2.8515923566878984e-05, "loss": 0.8179, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11710, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.5505685210227966, "learning_rate": 2.8452229299363058e-05, "loss": 0.8069, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11720, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.5616992115974426, "learning_rate": 2.8388535031847134e-05, "loss": 0.8455, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11730, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.5606623291969299, "learning_rate": 2.832484076433121e-05, "loss": 0.8513, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11740, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.5369701981544495, "learning_rate": 2.8261146496815288e-05, "loss": 0.8493, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11750, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.6416178345680237, "learning_rate": 2.8197452229299365e-05, "loss": 0.8528, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11760, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.517137885093689, "learning_rate": 2.8133757961783442e-05, "loss": 0.8564, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11770, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.5944896936416626, "learning_rate": 2.807006369426752e-05, "loss": 0.8143, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11780, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.5841711759567261, "learning_rate": 2.8006369426751593e-05, "loss": 0.852, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11790, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.5721305012702942, "learning_rate": 2.794267515923567e-05, "loss": 0.8448, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11800, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.6192442774772644, "learning_rate": 2.7878980891719746e-05, "loss": 0.834, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11810, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.669075071811676, "learning_rate": 2.7815286624203823e-05, "loss": 0.8484, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11820, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.5460517406463623, "learning_rate": 2.77515923566879e-05, "loss": 0.8693, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11830, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.5798333883285522, "learning_rate": 2.7687898089171977e-05, "loss": 0.8223, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11840, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.7171079516410828, "learning_rate": 2.7624203821656054e-05, "loss": 0.8239, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11850, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.5694504976272583, "learning_rate": 2.7560509554140128e-05, "loss": 0.8415, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11860, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.5613299608230591, "learning_rate": 2.7496815286624204e-05, "loss": 0.8432, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11870, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.5206958651542664, "learning_rate": 2.743312101910828e-05, "loss": 0.8241, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11880, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.5295774340629578, "learning_rate": 2.736942675159236e-05, "loss": 0.8602, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11890, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.64802086353302, "learning_rate": 2.7305732484076435e-05, "loss": 0.8171, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11900, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.5394118428230286, "learning_rate": 2.7242038216560512e-05, "loss": 0.8296, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11910, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.5935512781143188, "learning_rate": 2.717834394904459e-05, "loss": 0.8535, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11920, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.6226528882980347, "learning_rate": 2.7114649681528666e-05, "loss": 0.8272, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11930, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.6066229343414307, "learning_rate": 2.705095541401274e-05, "loss": 0.8279, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11940, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.5835513472557068, "learning_rate": 2.6987261146496816e-05, "loss": 0.8512, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11950, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.5614724159240723, "learning_rate": 2.6923566878980893e-05, "loss": 0.8342, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11960, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.5533426403999329, "learning_rate": 2.685987261146497e-05, "loss": 0.8695, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11970, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.552813708782196, "learning_rate": 2.6796178343949047e-05, "loss": 0.8521, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11980, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.609652042388916, "learning_rate": 2.6732484076433124e-05, "loss": 0.8395, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 11990, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.6274034976959229, "learning_rate": 2.66687898089172e-05, "loss": 0.7963, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.614339292049408, "learning_rate": 2.6605095541401274e-05, "loss": 0.848, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.5958476066589355, "learning_rate": 2.654140127388535e-05, "loss": 0.86, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.6635801792144775, "learning_rate": 2.647770700636943e-05, "loss": 0.828, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.6173161268234253, "learning_rate": 2.6414012738853505e-05, "loss": 0.8657, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.621300995349884, "learning_rate": 2.6350318471337582e-05, "loss": 0.8653, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.6438680291175842, "learning_rate": 2.628662420382166e-05, "loss": 0.8388, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.5824630856513977, "learning_rate": 2.6222929936305736e-05, "loss": 0.8658, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.5131130814552307, "learning_rate": 2.615923566878981e-05, "loss": 0.8505, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.6061905026435852, "learning_rate": 2.6095541401273886e-05, "loss": 0.8219, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.6551744341850281, "learning_rate": 2.6031847133757963e-05, "loss": 0.8619, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.6331368088722229, "learning_rate": 2.596815286624204e-05, "loss": 0.8692, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.644982099533081, "learning_rate": 2.5904458598726117e-05, "loss": 0.8454, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.48831579089164734, "learning_rate": 2.5840764331210194e-05, "loss": 0.8423, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.5983461737632751, "learning_rate": 2.577707006369427e-05, "loss": 0.8453, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.6347223520278931, "learning_rate": 2.5713375796178345e-05, "loss": 0.8363, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.6169897317886353, "learning_rate": 2.564968152866242e-05, "loss": 0.8327, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.6166051030158997, "learning_rate": 2.55859872611465e-05, "loss": 0.858, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5, "grad_norm": 0.5569881200790405, "learning_rate": 2.5522292993630575e-05, "loss": 0.8209, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.5771543979644775, "learning_rate": 2.5458598726114652e-05, "loss": 0.8205, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.5933178067207336, "learning_rate": 2.539490445859873e-05, "loss": 0.8283, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.5819362998008728, "learning_rate": 2.5331210191082806e-05, "loss": 0.8343, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.6056768298149109, "learning_rate": 2.5267515923566883e-05, "loss": 0.8516, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.5443101525306702, "learning_rate": 2.5203821656050956e-05, "loss": 0.8524, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.6425015330314636, "learning_rate": 2.5140127388535033e-05, "loss": 0.8496, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.6068335175514221, "learning_rate": 2.507643312101911e-05, "loss": 0.8592, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.51, "grad_norm": 0.5697997212409973, "learning_rate": 2.5012738853503187e-05, "loss": 0.8089, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.6474873423576355, "learning_rate": 2.4949044585987264e-05, "loss": 0.8211, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.5467383861541748, "learning_rate": 2.488535031847134e-05, "loss": 0.813, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.7291449904441833, "learning_rate": 2.4821656050955418e-05, "loss": 0.8483, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.5775637626647949, "learning_rate": 2.475796178343949e-05, "loss": 0.8565, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.5808078050613403, "learning_rate": 2.469426751592357e-05, "loss": 0.8448, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.5515039563179016, "learning_rate": 2.4630573248407645e-05, "loss": 0.8158, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.6430584788322449, "learning_rate": 2.4566878980891722e-05, "loss": 0.8353, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.52, "grad_norm": 0.5688273310661316, "learning_rate": 2.45031847133758e-05, "loss": 0.8614, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.5493985414505005, "learning_rate": 2.4439490445859876e-05, "loss": 0.7994, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.5262490510940552, "learning_rate": 2.4375796178343953e-05, "loss": 0.8378, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.5832075476646423, "learning_rate": 2.4312101910828026e-05, "loss": 0.8549, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.7101218104362488, "learning_rate": 2.4248407643312103e-05, "loss": 0.8381, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.5871150493621826, "learning_rate": 2.418471337579618e-05, "loss": 0.8461, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.6132311224937439, "learning_rate": 2.4121019108280257e-05, "loss": 0.8333, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.6356542706489563, "learning_rate": 2.4057324840764334e-05, "loss": 0.855, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.53, "grad_norm": 0.5895008444786072, "learning_rate": 2.399363057324841e-05, "loss": 0.8649, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.5542967319488525, "learning_rate": 2.3929936305732488e-05, "loss": 0.8778, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.6304234266281128, "learning_rate": 2.386624203821656e-05, "loss": 0.8381, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.561419665813446, "learning_rate": 2.380254777070064e-05, "loss": 0.8467, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.660615086555481, "learning_rate": 2.3738853503184712e-05, "loss": 0.8296, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.5472921133041382, "learning_rate": 2.367515923566879e-05, "loss": 0.8705, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.5949731469154358, "learning_rate": 2.3611464968152866e-05, "loss": 0.849, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.5476264953613281, "learning_rate": 2.3547770700636943e-05, "loss": 0.8479, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.5857336521148682, "learning_rate": 2.348407643312102e-05, "loss": 0.8332, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.5827224850654602, "learning_rate": 2.3420382165605096e-05, "loss": 0.8449, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12510, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.6607509851455688, "learning_rate": 2.3356687898089173e-05, "loss": 0.8388, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12520, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.6671987771987915, "learning_rate": 2.3292993630573247e-05, "loss": 0.8314, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12530, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.6405848264694214, "learning_rate": 2.3229299363057324e-05, "loss": 0.8386, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12540, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.6223413348197937, "learning_rate": 2.31656050955414e-05, "loss": 0.8247, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12550, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.5649201273918152, "learning_rate": 2.3101910828025478e-05, "loss": 0.8382, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12560, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.5927321910858154, "learning_rate": 2.3038216560509555e-05, "loss": 0.843, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12570, "total_memory_available (GB)": 94.62 }, { "epoch": 1.55, "grad_norm": 0.6821181774139404, "learning_rate": 2.297452229299363e-05, "loss": 0.8801, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12580, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.6321542859077454, "learning_rate": 2.291082802547771e-05, "loss": 0.837, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12590, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.5311897993087769, "learning_rate": 2.2847133757961782e-05, "loss": 0.8429, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12600, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.5849913358688354, "learning_rate": 2.278343949044586e-05, "loss": 0.8539, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12610, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.570947527885437, "learning_rate": 2.2719745222929936e-05, "loss": 0.8271, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12620, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.6576603651046753, "learning_rate": 2.2656050955414013e-05, "loss": 0.8354, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12630, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.5636218190193176, "learning_rate": 2.259235668789809e-05, "loss": 0.8522, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12640, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.5577971339225769, "learning_rate": 2.2528662420382166e-05, "loss": 0.819, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12650, "total_memory_available (GB)": 94.62 }, { "epoch": 1.56, "grad_norm": 0.6458442211151123, "learning_rate": 2.2464968152866243e-05, "loss": 0.8263, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12660, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.5527836084365845, "learning_rate": 2.2401273885350317e-05, "loss": 0.8409, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12670, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.5587246417999268, "learning_rate": 2.2337579617834394e-05, "loss": 0.8321, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12680, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.5923686623573303, "learning_rate": 2.227388535031847e-05, "loss": 0.8603, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12690, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.6047967076301575, "learning_rate": 2.2210191082802548e-05, "loss": 0.8504, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12700, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.5479164123535156, "learning_rate": 2.2146496815286625e-05, "loss": 0.8074, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12710, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.5567662119865417, "learning_rate": 2.20828025477707e-05, "loss": 0.844, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12720, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.5728209614753723, "learning_rate": 2.201910828025478e-05, "loss": 0.8236, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12730, "total_memory_available (GB)": 94.62 }, { "epoch": 1.57, "grad_norm": 0.592235267162323, "learning_rate": 2.1955414012738852e-05, "loss": 0.8385, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12740, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.6159526109695435, "learning_rate": 2.189171974522293e-05, "loss": 0.8438, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12750, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.7097925543785095, "learning_rate": 2.1828025477707006e-05, "loss": 0.8409, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12760, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.6174066662788391, "learning_rate": 2.1764331210191083e-05, "loss": 0.8257, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12770, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.6272559762001038, "learning_rate": 2.170063694267516e-05, "loss": 0.855, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12780, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.6165140271186829, "learning_rate": 2.1636942675159236e-05, "loss": 0.8345, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12790, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.5537904500961304, "learning_rate": 2.1573248407643313e-05, "loss": 0.8403, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12800, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.6132100820541382, "learning_rate": 2.150955414012739e-05, "loss": 0.8394, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12810, "total_memory_available (GB)": 94.62 }, { "epoch": 1.58, "grad_norm": 0.6333381533622742, "learning_rate": 2.1445859872611464e-05, "loss": 0.8373, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12820, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.5513577461242676, "learning_rate": 2.138216560509554e-05, "loss": 0.8583, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12830, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.552699625492096, "learning_rate": 2.1318471337579618e-05, "loss": 0.8555, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12840, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.636202871799469, "learning_rate": 2.1254777070063695e-05, "loss": 0.825, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12850, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.6300379037857056, "learning_rate": 2.119108280254777e-05, "loss": 0.8203, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12860, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.6774061918258667, "learning_rate": 2.112738853503185e-05, "loss": 0.8373, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12870, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.6430328488349915, "learning_rate": 2.1063694267515925e-05, "loss": 0.8227, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12880, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.6144772171974182, "learning_rate": 2.1e-05, "loss": 0.8402, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12890, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 0.5309426188468933, "learning_rate": 2.0936305732484076e-05, "loss": 0.8686, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12900, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.5501454472541809, "learning_rate": 2.0872611464968153e-05, "loss": 0.8418, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12910, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.6468961238861084, "learning_rate": 2.080891719745223e-05, "loss": 0.8638, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12920, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.6229689717292786, "learning_rate": 2.0745222929936306e-05, "loss": 0.8216, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12930, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.5711995363235474, "learning_rate": 2.0681528662420383e-05, "loss": 0.8599, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12940, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.6419293880462646, "learning_rate": 2.061783439490446e-05, "loss": 0.8342, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12950, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.5840780138969421, "learning_rate": 2.0554140127388534e-05, "loss": 0.8778, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12960, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.5459557771682739, "learning_rate": 2.049044585987261e-05, "loss": 0.8165, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12970, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.5893337726593018, "learning_rate": 2.0426751592356688e-05, "loss": 0.8327, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12980, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6, "grad_norm": 0.5610139966011047, "learning_rate": 2.0363057324840765e-05, "loss": 0.8046, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 12990, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.6202096939086914, "learning_rate": 2.029936305732484e-05, "loss": 0.8562, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.6327053308486938, "learning_rate": 2.023566878980892e-05, "loss": 0.8477, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.5899960398674011, "learning_rate": 2.0171974522292995e-05, "loss": 0.829, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.5683597922325134, "learning_rate": 2.010828025477707e-05, "loss": 0.8641, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.5415876507759094, "learning_rate": 2.0044585987261146e-05, "loss": 0.8223, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.6744927763938904, "learning_rate": 1.9980891719745223e-05, "loss": 0.8265, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.6498674154281616, "learning_rate": 1.99171974522293e-05, "loss": 0.8558, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.61, "grad_norm": 0.561626136302948, "learning_rate": 1.9853503184713377e-05, "loss": 0.8205, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.590421199798584, "learning_rate": 1.9789808917197453e-05, "loss": 0.8283, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.5505582094192505, "learning_rate": 1.972611464968153e-05, "loss": 0.8293, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.6818318963050842, "learning_rate": 1.9662420382165607e-05, "loss": 0.8231, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.6751272082328796, "learning_rate": 1.959872611464968e-05, "loss": 0.8447, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.6087584495544434, "learning_rate": 1.9535031847133758e-05, "loss": 0.8527, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.5452800989151001, "learning_rate": 1.9471337579617835e-05, "loss": 0.8528, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.5750834345817566, "learning_rate": 1.940764331210191e-05, "loss": 0.8695, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.62, "grad_norm": 0.5984904170036316, "learning_rate": 1.934394904458599e-05, "loss": 0.8419, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.5514894723892212, "learning_rate": 1.9280254777070065e-05, "loss": 0.8175, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.5440700650215149, "learning_rate": 1.9216560509554142e-05, "loss": 0.8151, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.6085617542266846, "learning_rate": 1.9152866242038216e-05, "loss": 0.8153, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.5366870164871216, "learning_rate": 1.9089171974522293e-05, "loss": 0.839, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.5688049793243408, "learning_rate": 1.902547770700637e-05, "loss": 0.8257, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.6022163033485413, "learning_rate": 1.8961783439490447e-05, "loss": 0.8301, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.6411393880844116, "learning_rate": 1.8898089171974523e-05, "loss": 0.8222, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.63, "grad_norm": 0.5853878855705261, "learning_rate": 1.88343949044586e-05, "loss": 0.8393, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.7118549942970276, "learning_rate": 1.8770700636942677e-05, "loss": 0.8529, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.6585959196090698, "learning_rate": 1.870700636942675e-05, "loss": 0.8589, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.6305626034736633, "learning_rate": 1.8643312101910828e-05, "loss": 0.8297, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.5811955332756042, "learning_rate": 1.8579617834394905e-05, "loss": 0.8325, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.640131413936615, "learning_rate": 1.851592356687898e-05, "loss": 0.8724, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.556799054145813, "learning_rate": 1.845222929936306e-05, "loss": 0.853, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.5864406228065491, "learning_rate": 1.8388535031847135e-05, "loss": 0.8407, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 0.5744174122810364, "learning_rate": 1.8324840764331212e-05, "loss": 0.8251, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.6507772207260132, "learning_rate": 1.8261146496815286e-05, "loss": 0.8154, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.8050383925437927, "learning_rate": 1.8197452229299363e-05, "loss": 0.847, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.5747182369232178, "learning_rate": 1.813375796178344e-05, "loss": 0.8322, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.5775649547576904, "learning_rate": 1.8070063694267517e-05, "loss": 0.8501, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.60750812292099, "learning_rate": 1.8006369426751593e-05, "loss": 0.8755, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.5230236649513245, "learning_rate": 1.794267515923567e-05, "loss": 0.8329, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.5921688079833984, "learning_rate": 1.7878980891719747e-05, "loss": 0.8172, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.65, "grad_norm": 0.49644508957862854, "learning_rate": 1.781528662420382e-05, "loss": 0.8517, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.6654114723205566, "learning_rate": 1.7751592356687898e-05, "loss": 0.8535, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.6532041430473328, "learning_rate": 1.7687898089171975e-05, "loss": 0.8325, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.5903627872467041, "learning_rate": 1.762420382165605e-05, "loss": 0.8121, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.6084394454956055, "learning_rate": 1.756050955414013e-05, "loss": 0.8374, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.6949676275253296, "learning_rate": 1.7496815286624205e-05, "loss": 0.7976, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.5996536612510681, "learning_rate": 1.7433121019108282e-05, "loss": 0.8629, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.5606485605239868, "learning_rate": 1.736942675159236e-05, "loss": 0.8258, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.66, "grad_norm": 0.6412463784217834, "learning_rate": 1.7305732484076433e-05, "loss": 0.8287, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.7513819932937622, "learning_rate": 1.724203821656051e-05, "loss": 0.842, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.5666155219078064, "learning_rate": 1.7178343949044587e-05, "loss": 0.8332, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.5912254452705383, "learning_rate": 1.7114649681528663e-05, "loss": 0.8699, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13500, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.6989673376083374, "learning_rate": 1.705095541401274e-05, "loss": 0.8553, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13510, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.5839787125587463, "learning_rate": 1.6987261146496817e-05, "loss": 0.8446, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13520, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.6057241559028625, "learning_rate": 1.6923566878980894e-05, "loss": 0.8351, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13530, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.6739611625671387, "learning_rate": 1.6859872611464968e-05, "loss": 0.8358, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13540, "total_memory_available (GB)": 94.62 }, { "epoch": 1.67, "grad_norm": 0.5866987705230713, "learning_rate": 1.6796178343949045e-05, "loss": 0.8403, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13550, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.6147080659866333, "learning_rate": 1.673248407643312e-05, "loss": 0.8591, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13560, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.5726975798606873, "learning_rate": 1.66687898089172e-05, "loss": 0.8434, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13570, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.6229400038719177, "learning_rate": 1.6605095541401275e-05, "loss": 0.8634, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13580, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.7118831276893616, "learning_rate": 1.6541401273885352e-05, "loss": 0.8281, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13590, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.5629773736000061, "learning_rate": 1.647770700636943e-05, "loss": 0.8435, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13600, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.6835569143295288, "learning_rate": 1.6414012738853503e-05, "loss": 0.8255, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13610, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.5470395088195801, "learning_rate": 1.635031847133758e-05, "loss": 0.8411, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13620, "total_memory_available (GB)": 94.62 }, { "epoch": 1.68, "grad_norm": 0.65526282787323, "learning_rate": 1.6286624203821657e-05, "loss": 0.8264, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13630, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.6258267760276794, "learning_rate": 1.6222929936305733e-05, "loss": 0.8449, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13640, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.5335571765899658, "learning_rate": 1.615923566878981e-05, "loss": 0.8419, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13650, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.5841179490089417, "learning_rate": 1.6095541401273887e-05, "loss": 0.8447, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13660, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.5307532548904419, "learning_rate": 1.6031847133757964e-05, "loss": 0.8448, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13670, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.550396203994751, "learning_rate": 1.5968152866242038e-05, "loss": 0.8213, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13680, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.5528126955032349, "learning_rate": 1.5904458598726115e-05, "loss": 0.8329, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13690, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.5814225673675537, "learning_rate": 1.584076433121019e-05, "loss": 0.8762, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13700, "total_memory_available (GB)": 94.62 }, { "epoch": 1.69, "grad_norm": 0.5877110958099365, "learning_rate": 1.577707006369427e-05, "loss": 0.7988, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13710, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.5498624444007874, "learning_rate": 1.5713375796178345e-05, "loss": 0.839, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13720, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.5788300633430481, "learning_rate": 1.5649681528662422e-05, "loss": 0.8461, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13730, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.552385687828064, "learning_rate": 1.55859872611465e-05, "loss": 0.8353, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13740, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.612408459186554, "learning_rate": 1.5522292993630573e-05, "loss": 0.8423, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13750, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.5637169480323792, "learning_rate": 1.545859872611465e-05, "loss": 0.8301, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13760, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.5684060454368591, "learning_rate": 1.5394904458598727e-05, "loss": 0.8462, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13770, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.5924950242042542, "learning_rate": 1.5331210191082803e-05, "loss": 0.8544, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13780, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 0.6607167720794678, "learning_rate": 1.526751592356688e-05, "loss": 0.849, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13790, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.6007324457168579, "learning_rate": 1.5203821656050957e-05, "loss": 0.8168, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13800, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.592782199382782, "learning_rate": 1.5140127388535033e-05, "loss": 0.8577, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13810, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.559945821762085, "learning_rate": 1.507643312101911e-05, "loss": 0.8375, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13820, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.5329104661941528, "learning_rate": 1.5012738853503186e-05, "loss": 0.8273, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13830, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.5675476789474487, "learning_rate": 1.4949044585987263e-05, "loss": 0.8517, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13840, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.7505592703819275, "learning_rate": 1.4885350318471338e-05, "loss": 0.8623, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13850, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.6333470940589905, "learning_rate": 1.4821656050955415e-05, "loss": 0.8293, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13860, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.5732998251914978, "learning_rate": 1.4757961783439492e-05, "loss": 0.8065, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13870, "total_memory_available (GB)": 94.62 }, { "epoch": 1.71, "grad_norm": 0.762664794921875, "learning_rate": 1.4694267515923568e-05, "loss": 0.8442, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13880, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.6154226660728455, "learning_rate": 1.4630573248407644e-05, "loss": 0.8316, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13890, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.6072494387626648, "learning_rate": 1.4566878980891721e-05, "loss": 0.8282, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13900, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.5880212783813477, "learning_rate": 1.4503184713375798e-05, "loss": 0.8255, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13910, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.690006673336029, "learning_rate": 1.4439490445859873e-05, "loss": 0.8143, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13920, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.7075156569480896, "learning_rate": 1.437579617834395e-05, "loss": 0.8531, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13930, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.6131463646888733, "learning_rate": 1.4312101910828027e-05, "loss": 0.8441, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13940, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.5739683508872986, "learning_rate": 1.4248407643312103e-05, "loss": 0.8214, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13950, "total_memory_available (GB)": 94.62 }, { "epoch": 1.72, "grad_norm": 0.6377345323562622, "learning_rate": 1.418471337579618e-05, "loss": 0.8444, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13960, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.6518220901489258, "learning_rate": 1.4121019108280256e-05, "loss": 0.7968, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13970, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.607457160949707, "learning_rate": 1.4057324840764333e-05, "loss": 0.8544, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13980, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.6501799821853638, "learning_rate": 1.3993630573248408e-05, "loss": 0.816, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 13990, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.622092604637146, "learning_rate": 1.3929936305732485e-05, "loss": 0.821, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.6852797269821167, "learning_rate": 1.3866242038216562e-05, "loss": 0.8772, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.5740447640419006, "learning_rate": 1.380254777070064e-05, "loss": 0.8134, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.599518895149231, "learning_rate": 1.3738853503184714e-05, "loss": 0.8318, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.73, "grad_norm": 0.6162179708480835, "learning_rate": 1.3675159235668791e-05, "loss": 0.8333, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.6470344066619873, "learning_rate": 1.3611464968152868e-05, "loss": 0.8437, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.6147076487541199, "learning_rate": 1.3547770700636944e-05, "loss": 0.8394, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.5687283873558044, "learning_rate": 1.348407643312102e-05, "loss": 0.8219, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.5249031186103821, "learning_rate": 1.3420382165605097e-05, "loss": 0.8563, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.5840549468994141, "learning_rate": 1.3356687898089174e-05, "loss": 0.8249, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.6300859451293945, "learning_rate": 1.329299363057325e-05, "loss": 0.8453, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.6822474002838135, "learning_rate": 1.3229299363057326e-05, "loss": 0.8445, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.74, "grad_norm": 0.5768283605575562, "learning_rate": 1.3165605095541403e-05, "loss": 0.8067, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.6939039826393127, "learning_rate": 1.3101910828025479e-05, "loss": 0.8396, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.5618141889572144, "learning_rate": 1.3038216560509555e-05, "loss": 0.8161, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.6244920492172241, "learning_rate": 1.2974522292993632e-05, "loss": 0.8396, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.5430182814598083, "learning_rate": 1.291082802547771e-05, "loss": 0.8311, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.6174887418746948, "learning_rate": 1.2847133757961784e-05, "loss": 0.833, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.5842854976654053, "learning_rate": 1.2783439490445861e-05, "loss": 0.8638, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.6127358675003052, "learning_rate": 1.2719745222929938e-05, "loss": 0.8324, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 0.7250066995620728, "learning_rate": 1.2656050955414015e-05, "loss": 0.8343, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.5589357614517212, "learning_rate": 1.259235668789809e-05, "loss": 0.8547, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.5304135084152222, "learning_rate": 1.2528662420382167e-05, "loss": 0.8325, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.560059666633606, "learning_rate": 1.2464968152866243e-05, "loss": 0.8474, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.5708820223808289, "learning_rate": 1.240127388535032e-05, "loss": 0.8501, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.6215733885765076, "learning_rate": 1.2337579617834395e-05, "loss": 0.819, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.6077948212623596, "learning_rate": 1.2273885350318472e-05, "loss": 0.8235, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.5893474817276001, "learning_rate": 1.2210191082802549e-05, "loss": 0.8284, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.76, "grad_norm": 0.5367608070373535, "learning_rate": 1.2146496815286624e-05, "loss": 0.815, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.5464059114456177, "learning_rate": 1.20828025477707e-05, "loss": 0.8038, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.6341402530670166, "learning_rate": 1.2019108280254778e-05, "loss": 0.8415, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.6049284934997559, "learning_rate": 1.1955414012738854e-05, "loss": 0.8316, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.6299666166305542, "learning_rate": 1.189171974522293e-05, "loss": 0.8407, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.6380517482757568, "learning_rate": 1.1828025477707007e-05, "loss": 0.8387, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.6077808737754822, "learning_rate": 1.1764331210191084e-05, "loss": 0.8465, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.6068784594535828, "learning_rate": 1.170063694267516e-05, "loss": 0.8281, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.77, "grad_norm": 0.5653278827667236, "learning_rate": 1.1636942675159236e-05, "loss": 0.8244, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.6187404990196228, "learning_rate": 1.1573248407643313e-05, "loss": 0.8249, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.5997905731201172, "learning_rate": 1.150955414012739e-05, "loss": 0.8258, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.571714460849762, "learning_rate": 1.1445859872611465e-05, "loss": 0.8185, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.6664509177207947, "learning_rate": 1.1382165605095542e-05, "loss": 0.8445, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.5972470641136169, "learning_rate": 1.1318471337579619e-05, "loss": 0.8292, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.6396761536598206, "learning_rate": 1.1254777070063695e-05, "loss": 0.8392, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.6399985551834106, "learning_rate": 1.119108280254777e-05, "loss": 0.8365, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.78, "grad_norm": 0.7280391454696655, "learning_rate": 1.1127388535031848e-05, "loss": 0.862, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.79, "grad_norm": 0.542850136756897, "learning_rate": 1.1063694267515924e-05, "loss": 0.8261, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.79, "grad_norm": 0.5785331726074219, "learning_rate": 1.1000000000000001e-05, "loss": 0.8261, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.79, "grad_norm": 0.5825355052947998, "learning_rate": 1.0936305732484077e-05, "loss": 0.8389, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.79, "grad_norm": 0.5818856954574585, "learning_rate": 1.0872611464968154e-05, "loss": 0.82, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.79, "grad_norm": 0.5814858078956604, "learning_rate": 1.080891719745223e-05, "loss": 0.8477, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.79, "grad_norm": 0.6301888823509216, "learning_rate": 1.0745222929936306e-05, "loss": 0.8416, "max_memory_allocated (GB)": 94.6, "memory_allocated (GB)": 14.97, "step": 14500, "total_memory_available (GB)": 94.62 } ], "logging_steps": 10, "max_steps": 16186, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 4.062009042959073e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }