{ "best_metric": 0.9794921875, "best_model_checkpoint": "resnet-Alzheimer/checkpoint-3600", "epoch": 50.0, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "grad_norm": 2.1754038333892822, "learning_rate": 5e-05, "loss": 1.3812, "step": 10 }, { "epoch": 0.25, "grad_norm": 3.91097092628479, "learning_rate": 0.0001, "loss": 1.34, "step": 20 }, { "epoch": 0.38, "grad_norm": 4.134509563446045, "learning_rate": 0.00015, "loss": 1.2635, "step": 30 }, { "epoch": 0.5, "grad_norm": 6.377187252044678, "learning_rate": 0.0002, "loss": 1.1824, "step": 40 }, { "epoch": 0.62, "grad_norm": 7.713193416595459, "learning_rate": 0.00025, "loss": 1.1226, "step": 50 }, { "epoch": 0.75, "grad_norm": 3.569382429122925, "learning_rate": 0.0003, "loss": 1.0308, "step": 60 }, { "epoch": 0.88, "grad_norm": 5.650737285614014, "learning_rate": 0.00035, "loss": 1.0115, "step": 70 }, { "epoch": 1.0, "grad_norm": 5.970870494842529, "learning_rate": 0.0004, "loss": 1.0127, "step": 80 }, { "epoch": 1.0, "eval_accuracy": 0.5087890625, "eval_loss": 0.9888483285903931, "eval_runtime": 5.4153, "eval_samples_per_second": 189.093, "eval_steps_per_second": 11.818, "step": 80 }, { "epoch": 1.12, "grad_norm": 2.92154860496521, "learning_rate": 0.00045000000000000004, "loss": 1.0175, "step": 90 }, { "epoch": 1.25, "grad_norm": 4.131512641906738, "learning_rate": 0.0005, "loss": 0.918, "step": 100 }, { "epoch": 1.38, "grad_norm": 2.9838943481445312, "learning_rate": 0.00055, "loss": 0.9613, "step": 110 }, { "epoch": 1.5, "grad_norm": 2.4230854511260986, "learning_rate": 0.0006, "loss": 0.9188, "step": 120 }, { "epoch": 1.62, "grad_norm": 3.774385690689087, "learning_rate": 0.0006500000000000001, "loss": 0.9245, "step": 130 }, { "epoch": 1.75, "grad_norm": 3.175440549850464, "learning_rate": 0.0007, "loss": 0.8713, "step": 140 }, { "epoch": 1.88, "grad_norm": 2.7985305786132812, "learning_rate": 0.00075, "loss": 0.9264, "step": 150 }, { "epoch": 2.0, "grad_norm": 1.5870747566223145, "learning_rate": 0.0008, "loss": 0.9345, "step": 160 }, { "epoch": 2.0, "eval_accuracy": 0.5302734375, "eval_loss": 0.942151665687561, "eval_runtime": 5.1954, "eval_samples_per_second": 197.099, "eval_steps_per_second": 12.319, "step": 160 }, { "epoch": 2.12, "grad_norm": 1.3990691900253296, "learning_rate": 0.00085, "loss": 0.8851, "step": 170 }, { "epoch": 2.25, "grad_norm": 1.4066977500915527, "learning_rate": 0.0009000000000000001, "loss": 0.9081, "step": 180 }, { "epoch": 2.38, "grad_norm": 1.5531185865402222, "learning_rate": 0.00095, "loss": 0.8888, "step": 190 }, { "epoch": 2.5, "grad_norm": 1.960204839706421, "learning_rate": 0.001, "loss": 0.9206, "step": 200 }, { "epoch": 2.62, "grad_norm": 1.0218795537948608, "learning_rate": 0.0010500000000000002, "loss": 0.8735, "step": 210 }, { "epoch": 2.75, "grad_norm": 1.9216176271438599, "learning_rate": 0.0011, "loss": 0.924, "step": 220 }, { "epoch": 2.88, "grad_norm": 1.9017541408538818, "learning_rate": 0.00115, "loss": 0.9327, "step": 230 }, { "epoch": 3.0, "grad_norm": 1.556686282157898, "learning_rate": 0.0012, "loss": 0.8889, "step": 240 }, { "epoch": 3.0, "eval_accuracy": 0.578125, "eval_loss": 0.8723889589309692, "eval_runtime": 4.9225, "eval_samples_per_second": 208.026, "eval_steps_per_second": 13.002, "step": 240 }, { "epoch": 3.12, "grad_norm": 0.9459726810455322, "learning_rate": 0.00125, "loss": 0.8643, "step": 250 }, { "epoch": 3.25, "grad_norm": 0.749912679195404, "learning_rate": 0.0013000000000000002, "loss": 0.8978, "step": 260 }, { "epoch": 3.38, "grad_norm": 0.9228126406669617, "learning_rate": 0.00135, "loss": 0.8838, "step": 270 }, { "epoch": 3.5, "grad_norm": 1.0743939876556396, "learning_rate": 0.0014, "loss": 0.8868, "step": 280 }, { "epoch": 3.62, "grad_norm": 0.997053325176239, "learning_rate": 0.00145, "loss": 0.8632, "step": 290 }, { "epoch": 3.75, "grad_norm": 0.5891302227973938, "learning_rate": 0.0015, "loss": 0.8501, "step": 300 }, { "epoch": 3.88, "grad_norm": 1.0819345712661743, "learning_rate": 0.0015500000000000002, "loss": 0.884, "step": 310 }, { "epoch": 4.0, "grad_norm": 0.5622245073318481, "learning_rate": 0.0016, "loss": 0.8843, "step": 320 }, { "epoch": 4.0, "eval_accuracy": 0.5888671875, "eval_loss": 0.8535706996917725, "eval_runtime": 6.5338, "eval_samples_per_second": 156.725, "eval_steps_per_second": 9.795, "step": 320 }, { "epoch": 4.12, "grad_norm": 0.7301604747772217, "learning_rate": 0.00165, "loss": 0.8241, "step": 330 }, { "epoch": 4.25, "grad_norm": 1.222732424736023, "learning_rate": 0.0017, "loss": 0.8929, "step": 340 }, { "epoch": 4.38, "grad_norm": 0.8520879745483398, "learning_rate": 0.00175, "loss": 0.8726, "step": 350 }, { "epoch": 4.5, "grad_norm": 0.6151734590530396, "learning_rate": 0.0018000000000000002, "loss": 0.8851, "step": 360 }, { "epoch": 4.62, "grad_norm": 0.6786526441574097, "learning_rate": 0.00185, "loss": 0.8544, "step": 370 }, { "epoch": 4.75, "grad_norm": 0.8025469779968262, "learning_rate": 0.0019, "loss": 0.8432, "step": 380 }, { "epoch": 4.88, "grad_norm": 1.0158729553222656, "learning_rate": 0.00195, "loss": 0.8261, "step": 390 }, { "epoch": 5.0, "grad_norm": 0.7815405130386353, "learning_rate": 0.002, "loss": 0.8397, "step": 400 }, { "epoch": 5.0, "eval_accuracy": 0.615234375, "eval_loss": 0.8353910446166992, "eval_runtime": 5.3022, "eval_samples_per_second": 193.129, "eval_steps_per_second": 12.071, "step": 400 }, { "epoch": 5.12, "grad_norm": 0.9535025954246521, "learning_rate": 0.0019944444444444445, "loss": 0.8636, "step": 410 }, { "epoch": 5.25, "grad_norm": 0.43990448117256165, "learning_rate": 0.001988888888888889, "loss": 0.7931, "step": 420 }, { "epoch": 5.38, "grad_norm": 0.6062633395195007, "learning_rate": 0.0019833333333333335, "loss": 0.8345, "step": 430 }, { "epoch": 5.5, "grad_norm": 0.6349042057991028, "learning_rate": 0.001977777777777778, "loss": 0.8593, "step": 440 }, { "epoch": 5.62, "grad_norm": 0.6786915063858032, "learning_rate": 0.0019722222222222224, "loss": 0.8435, "step": 450 }, { "epoch": 5.75, "grad_norm": 0.7090786695480347, "learning_rate": 0.0019666666666666665, "loss": 0.8008, "step": 460 }, { "epoch": 5.88, "grad_norm": 0.6538481712341309, "learning_rate": 0.001961111111111111, "loss": 0.763, "step": 470 }, { "epoch": 6.0, "grad_norm": 0.4316461682319641, "learning_rate": 0.0019555555555555554, "loss": 0.8624, "step": 480 }, { "epoch": 6.0, "eval_accuracy": 0.5380859375, "eval_loss": 0.9221189022064209, "eval_runtime": 4.9173, "eval_samples_per_second": 208.245, "eval_steps_per_second": 13.015, "step": 480 }, { "epoch": 6.12, "grad_norm": 0.5696819424629211, "learning_rate": 0.00195, "loss": 0.8065, "step": 490 }, { "epoch": 6.25, "grad_norm": 0.6260728240013123, "learning_rate": 0.0019444444444444444, "loss": 0.7873, "step": 500 }, { "epoch": 6.38, "grad_norm": 0.6295855045318604, "learning_rate": 0.0019388888888888889, "loss": 0.7802, "step": 510 }, { "epoch": 6.5, "grad_norm": 0.6074417233467102, "learning_rate": 0.0019333333333333333, "loss": 0.7907, "step": 520 }, { "epoch": 6.62, "grad_norm": 0.6099679470062256, "learning_rate": 0.0019277777777777778, "loss": 0.7391, "step": 530 }, { "epoch": 6.75, "grad_norm": 0.9349565505981445, "learning_rate": 0.0019222222222222223, "loss": 0.7749, "step": 540 }, { "epoch": 6.88, "grad_norm": 0.6923946142196655, "learning_rate": 0.0019166666666666668, "loss": 0.8232, "step": 550 }, { "epoch": 7.0, "grad_norm": 0.5967056751251221, "learning_rate": 0.0019111111111111113, "loss": 0.7543, "step": 560 }, { "epoch": 7.0, "eval_accuracy": 0.6474609375, "eval_loss": 0.7568103671073914, "eval_runtime": 5.3014, "eval_samples_per_second": 193.156, "eval_steps_per_second": 12.072, "step": 560 }, { "epoch": 7.12, "grad_norm": 0.7925052642822266, "learning_rate": 0.0019055555555555555, "loss": 0.7086, "step": 570 }, { "epoch": 7.25, "grad_norm": 0.715761125087738, "learning_rate": 0.0019, "loss": 0.7901, "step": 580 }, { "epoch": 7.38, "grad_norm": 0.6602711081504822, "learning_rate": 0.0018944444444444445, "loss": 0.7375, "step": 590 }, { "epoch": 7.5, "grad_norm": 0.5104066729545593, "learning_rate": 0.001888888888888889, "loss": 0.7805, "step": 600 }, { "epoch": 7.62, "grad_norm": 0.6333702802658081, "learning_rate": 0.0018833333333333334, "loss": 0.7017, "step": 610 }, { "epoch": 7.75, "grad_norm": 0.5703239440917969, "learning_rate": 0.001877777777777778, "loss": 0.7086, "step": 620 }, { "epoch": 7.88, "grad_norm": 0.8939486742019653, "learning_rate": 0.0018722222222222222, "loss": 0.7399, "step": 630 }, { "epoch": 8.0, "grad_norm": 0.6808524131774902, "learning_rate": 0.0018666666666666666, "loss": 0.6993, "step": 640 }, { "epoch": 8.0, "eval_accuracy": 0.61328125, "eval_loss": 0.8830391764640808, "eval_runtime": 4.9073, "eval_samples_per_second": 208.667, "eval_steps_per_second": 13.042, "step": 640 }, { "epoch": 8.12, "grad_norm": 0.7670312523841858, "learning_rate": 0.0018611111111111111, "loss": 0.7304, "step": 650 }, { "epoch": 8.25, "grad_norm": 0.518883466720581, "learning_rate": 0.0018555555555555556, "loss": 0.6759, "step": 660 }, { "epoch": 8.38, "grad_norm": 0.6331384778022766, "learning_rate": 0.00185, "loss": 0.7323, "step": 670 }, { "epoch": 8.5, "grad_norm": 0.5934571027755737, "learning_rate": 0.0018444444444444446, "loss": 0.7109, "step": 680 }, { "epoch": 8.62, "grad_norm": 0.5555841326713562, "learning_rate": 0.0018388888888888888, "loss": 0.7361, "step": 690 }, { "epoch": 8.75, "grad_norm": 0.45028582215309143, "learning_rate": 0.0018333333333333333, "loss": 0.7209, "step": 700 }, { "epoch": 8.88, "grad_norm": 0.4313984811306, "learning_rate": 0.0018277777777777778, "loss": 0.692, "step": 710 }, { "epoch": 9.0, "grad_norm": 0.6221916675567627, "learning_rate": 0.0018222222222222223, "loss": 0.7045, "step": 720 }, { "epoch": 9.0, "eval_accuracy": 0.658203125, "eval_loss": 0.7372878789901733, "eval_runtime": 4.9218, "eval_samples_per_second": 208.053, "eval_steps_per_second": 13.003, "step": 720 }, { "epoch": 9.12, "grad_norm": 0.9794626832008362, "learning_rate": 0.0018166666666666667, "loss": 0.641, "step": 730 }, { "epoch": 9.25, "grad_norm": 0.8530990481376648, "learning_rate": 0.0018111111111111112, "loss": 0.6616, "step": 740 }, { "epoch": 9.38, "grad_norm": 0.5696712136268616, "learning_rate": 0.0018055555555555557, "loss": 0.6685, "step": 750 }, { "epoch": 9.5, "grad_norm": 0.6695945858955383, "learning_rate": 0.0018000000000000002, "loss": 0.6079, "step": 760 }, { "epoch": 9.62, "grad_norm": 0.9470874667167664, "learning_rate": 0.0017944444444444446, "loss": 0.6362, "step": 770 }, { "epoch": 9.75, "grad_norm": 1.0435755252838135, "learning_rate": 0.001788888888888889, "loss": 0.7036, "step": 780 }, { "epoch": 9.88, "grad_norm": 0.4934737980365753, "learning_rate": 0.0017833333333333334, "loss": 0.6955, "step": 790 }, { "epoch": 10.0, "grad_norm": 0.49625110626220703, "learning_rate": 0.0017777777777777776, "loss": 0.6557, "step": 800 }, { "epoch": 10.0, "eval_accuracy": 0.7451171875, "eval_loss": 0.6075544357299805, "eval_runtime": 5.2806, "eval_samples_per_second": 193.918, "eval_steps_per_second": 12.12, "step": 800 }, { "epoch": 10.12, "grad_norm": 0.49739229679107666, "learning_rate": 0.0017722222222222221, "loss": 0.605, "step": 810 }, { "epoch": 10.25, "grad_norm": 0.6317277550697327, "learning_rate": 0.0017666666666666666, "loss": 0.5332, "step": 820 }, { "epoch": 10.38, "grad_norm": 0.756879985332489, "learning_rate": 0.001761111111111111, "loss": 0.5619, "step": 830 }, { "epoch": 10.5, "grad_norm": 0.6143298149108887, "learning_rate": 0.0017555555555555556, "loss": 0.601, "step": 840 }, { "epoch": 10.62, "grad_norm": 0.7249147891998291, "learning_rate": 0.00175, "loss": 0.5935, "step": 850 }, { "epoch": 10.75, "grad_norm": 0.4532654285430908, "learning_rate": 0.0017444444444444445, "loss": 0.5988, "step": 860 }, { "epoch": 10.88, "grad_norm": 0.5738415718078613, "learning_rate": 0.001738888888888889, "loss": 0.6634, "step": 870 }, { "epoch": 11.0, "grad_norm": 0.5514868497848511, "learning_rate": 0.0017333333333333335, "loss": 0.5876, "step": 880 }, { "epoch": 11.0, "eval_accuracy": 0.69921875, "eval_loss": 0.7281272411346436, "eval_runtime": 4.8994, "eval_samples_per_second": 209.004, "eval_steps_per_second": 13.063, "step": 880 }, { "epoch": 11.12, "grad_norm": 0.7158863544464111, "learning_rate": 0.001727777777777778, "loss": 0.606, "step": 890 }, { "epoch": 11.25, "grad_norm": 0.7355363368988037, "learning_rate": 0.0017222222222222224, "loss": 0.5923, "step": 900 }, { "epoch": 11.38, "grad_norm": 0.7794367671012878, "learning_rate": 0.0017166666666666667, "loss": 0.5935, "step": 910 }, { "epoch": 11.5, "grad_norm": 0.9755826592445374, "learning_rate": 0.0017111111111111112, "loss": 0.644, "step": 920 }, { "epoch": 11.62, "grad_norm": 0.6257722973823547, "learning_rate": 0.0017055555555555554, "loss": 0.617, "step": 930 }, { "epoch": 11.75, "grad_norm": 0.8550503253936768, "learning_rate": 0.0017, "loss": 0.5854, "step": 940 }, { "epoch": 11.88, "grad_norm": 0.7347137928009033, "learning_rate": 0.0016944444444444444, "loss": 0.6358, "step": 950 }, { "epoch": 12.0, "grad_norm": 0.7867416739463806, "learning_rate": 0.0016888888888888889, "loss": 0.5732, "step": 960 }, { "epoch": 12.0, "eval_accuracy": 0.7509765625, "eval_loss": 0.5769097208976746, "eval_runtime": 5.275, "eval_samples_per_second": 194.122, "eval_steps_per_second": 12.133, "step": 960 }, { "epoch": 12.12, "grad_norm": 0.6022630333900452, "learning_rate": 0.0016833333333333333, "loss": 0.5643, "step": 970 }, { "epoch": 12.25, "grad_norm": 0.599958062171936, "learning_rate": 0.0016777777777777778, "loss": 0.5438, "step": 980 }, { "epoch": 12.38, "grad_norm": 0.6484814286231995, "learning_rate": 0.0016722222222222223, "loss": 0.5208, "step": 990 }, { "epoch": 12.5, "grad_norm": 0.8167735934257507, "learning_rate": 0.0016666666666666668, "loss": 0.5369, "step": 1000 }, { "epoch": 12.62, "grad_norm": 0.49088793992996216, "learning_rate": 0.0016611111111111113, "loss": 0.4803, "step": 1010 }, { "epoch": 12.75, "grad_norm": 0.6817615628242493, "learning_rate": 0.0016555555555555555, "loss": 0.5102, "step": 1020 }, { "epoch": 12.88, "grad_norm": 0.8656439781188965, "learning_rate": 0.00165, "loss": 0.5287, "step": 1030 }, { "epoch": 13.0, "grad_norm": 0.5195401310920715, "learning_rate": 0.0016444444444444445, "loss": 0.4864, "step": 1040 }, { "epoch": 13.0, "eval_accuracy": 0.8310546875, "eval_loss": 0.445728600025177, "eval_runtime": 4.9054, "eval_samples_per_second": 208.751, "eval_steps_per_second": 13.047, "step": 1040 }, { "epoch": 13.12, "grad_norm": 0.7564366459846497, "learning_rate": 0.001638888888888889, "loss": 0.4715, "step": 1050 }, { "epoch": 13.25, "grad_norm": 0.6976212859153748, "learning_rate": 0.0016333333333333334, "loss": 0.471, "step": 1060 }, { "epoch": 13.38, "grad_norm": 0.7652568817138672, "learning_rate": 0.001627777777777778, "loss": 0.4821, "step": 1070 }, { "epoch": 13.5, "grad_norm": 0.7834269404411316, "learning_rate": 0.0016222222222222222, "loss": 0.5091, "step": 1080 }, { "epoch": 13.62, "grad_norm": 0.8186032176017761, "learning_rate": 0.0016166666666666666, "loss": 0.4611, "step": 1090 }, { "epoch": 13.75, "grad_norm": 0.7720199227333069, "learning_rate": 0.0016111111111111111, "loss": 0.5397, "step": 1100 }, { "epoch": 13.88, "grad_norm": 0.6797453165054321, "learning_rate": 0.0016055555555555556, "loss": 0.5135, "step": 1110 }, { "epoch": 14.0, "grad_norm": 0.726184606552124, "learning_rate": 0.0016, "loss": 0.5175, "step": 1120 }, { "epoch": 14.0, "eval_accuracy": 0.7841796875, "eval_loss": 0.5278125405311584, "eval_runtime": 4.8906, "eval_samples_per_second": 209.383, "eval_steps_per_second": 13.086, "step": 1120 }, { "epoch": 14.12, "grad_norm": 0.6777172088623047, "learning_rate": 0.0015944444444444446, "loss": 0.4831, "step": 1130 }, { "epoch": 14.25, "grad_norm": 0.6228752732276917, "learning_rate": 0.0015888888888888888, "loss": 0.4657, "step": 1140 }, { "epoch": 14.38, "grad_norm": 0.7296370267868042, "learning_rate": 0.0015833333333333333, "loss": 0.5084, "step": 1150 }, { "epoch": 14.5, "grad_norm": 0.7809439897537231, "learning_rate": 0.0015777777777777778, "loss": 0.4749, "step": 1160 }, { "epoch": 14.62, "grad_norm": 0.4627506136894226, "learning_rate": 0.0015722222222222223, "loss": 0.4157, "step": 1170 }, { "epoch": 14.75, "grad_norm": 0.465811163187027, "learning_rate": 0.0015666666666666667, "loss": 0.4192, "step": 1180 }, { "epoch": 14.88, "grad_norm": 0.636384129524231, "learning_rate": 0.0015611111111111112, "loss": 0.4627, "step": 1190 }, { "epoch": 15.0, "grad_norm": 0.8339561223983765, "learning_rate": 0.0015555555555555557, "loss": 0.4865, "step": 1200 }, { "epoch": 15.0, "eval_accuracy": 0.837890625, "eval_loss": 0.4163576364517212, "eval_runtime": 5.3394, "eval_samples_per_second": 191.782, "eval_steps_per_second": 11.986, "step": 1200 }, { "epoch": 15.12, "grad_norm": 0.5218497514724731, "learning_rate": 0.0015500000000000002, "loss": 0.4253, "step": 1210 }, { "epoch": 15.25, "grad_norm": 0.6273193359375, "learning_rate": 0.0015444444444444446, "loss": 0.4474, "step": 1220 }, { "epoch": 15.38, "grad_norm": 0.6019622087478638, "learning_rate": 0.001538888888888889, "loss": 0.4008, "step": 1230 }, { "epoch": 15.5, "grad_norm": 0.7020573616027832, "learning_rate": 0.0015333333333333334, "loss": 0.3768, "step": 1240 }, { "epoch": 15.62, "grad_norm": 0.577691376209259, "learning_rate": 0.0015277777777777776, "loss": 0.4108, "step": 1250 }, { "epoch": 15.75, "grad_norm": 0.8489026427268982, "learning_rate": 0.0015222222222222221, "loss": 0.3994, "step": 1260 }, { "epoch": 15.88, "grad_norm": 0.42233309149742126, "learning_rate": 0.0015166666666666666, "loss": 0.4292, "step": 1270 }, { "epoch": 16.0, "grad_norm": 0.48867735266685486, "learning_rate": 0.001511111111111111, "loss": 0.4049, "step": 1280 }, { "epoch": 16.0, "eval_accuracy": 0.830078125, "eval_loss": 0.4204105734825134, "eval_runtime": 4.8855, "eval_samples_per_second": 209.602, "eval_steps_per_second": 13.1, "step": 1280 }, { "epoch": 16.12, "grad_norm": 0.6492818593978882, "learning_rate": 0.0015055555555555556, "loss": 0.3885, "step": 1290 }, { "epoch": 16.25, "grad_norm": 0.4546281397342682, "learning_rate": 0.0015, "loss": 0.4096, "step": 1300 }, { "epoch": 16.38, "grad_norm": 0.6827344298362732, "learning_rate": 0.0014944444444444445, "loss": 0.3618, "step": 1310 }, { "epoch": 16.5, "grad_norm": 0.454326868057251, "learning_rate": 0.001488888888888889, "loss": 0.3863, "step": 1320 }, { "epoch": 16.62, "grad_norm": 0.6911420226097107, "learning_rate": 0.0014833333333333335, "loss": 0.4264, "step": 1330 }, { "epoch": 16.75, "grad_norm": 0.6122339367866516, "learning_rate": 0.001477777777777778, "loss": 0.4205, "step": 1340 }, { "epoch": 16.88, "grad_norm": 0.5123728513717651, "learning_rate": 0.0014722222222222224, "loss": 0.4419, "step": 1350 }, { "epoch": 17.0, "grad_norm": 1.0908498764038086, "learning_rate": 0.0014666666666666667, "loss": 0.4167, "step": 1360 }, { "epoch": 17.0, "eval_accuracy": 0.828125, "eval_loss": 0.47203314304351807, "eval_runtime": 5.3495, "eval_samples_per_second": 191.421, "eval_steps_per_second": 11.964, "step": 1360 }, { "epoch": 17.12, "grad_norm": 0.42975571751594543, "learning_rate": 0.0014611111111111112, "loss": 0.4006, "step": 1370 }, { "epoch": 17.25, "grad_norm": 0.6392154693603516, "learning_rate": 0.0014555555555555554, "loss": 0.3581, "step": 1380 }, { "epoch": 17.38, "grad_norm": 0.6548070907592773, "learning_rate": 0.00145, "loss": 0.3672, "step": 1390 }, { "epoch": 17.5, "grad_norm": 0.6939528584480286, "learning_rate": 0.0014444444444444444, "loss": 0.3514, "step": 1400 }, { "epoch": 17.62, "grad_norm": 0.6098494529724121, "learning_rate": 0.0014388888888888889, "loss": 0.3835, "step": 1410 }, { "epoch": 17.75, "grad_norm": 0.5356572866439819, "learning_rate": 0.0014333333333333333, "loss": 0.3326, "step": 1420 }, { "epoch": 17.88, "grad_norm": 0.6472760438919067, "learning_rate": 0.0014277777777777778, "loss": 0.3829, "step": 1430 }, { "epoch": 18.0, "grad_norm": 0.67198646068573, "learning_rate": 0.0014222222222222223, "loss": 0.36, "step": 1440 }, { "epoch": 18.0, "eval_accuracy": 0.81640625, "eval_loss": 0.4660454988479614, "eval_runtime": 4.9124, "eval_samples_per_second": 208.451, "eval_steps_per_second": 13.028, "step": 1440 }, { "epoch": 18.12, "grad_norm": 0.4594449996948242, "learning_rate": 0.0014166666666666668, "loss": 0.3549, "step": 1450 }, { "epoch": 18.25, "grad_norm": 0.4456086754798889, "learning_rate": 0.0014111111111111112, "loss": 0.2899, "step": 1460 }, { "epoch": 18.38, "grad_norm": 0.724087119102478, "learning_rate": 0.0014055555555555555, "loss": 0.2976, "step": 1470 }, { "epoch": 18.5, "grad_norm": 0.8099024891853333, "learning_rate": 0.0014, "loss": 0.3706, "step": 1480 }, { "epoch": 18.62, "grad_norm": 0.6271733641624451, "learning_rate": 0.0013944444444444445, "loss": 0.3591, "step": 1490 }, { "epoch": 18.75, "grad_norm": 0.5864254236221313, "learning_rate": 0.001388888888888889, "loss": 0.3184, "step": 1500 }, { "epoch": 18.88, "grad_norm": 0.4915286898612976, "learning_rate": 0.0013833333333333334, "loss": 0.301, "step": 1510 }, { "epoch": 19.0, "grad_norm": 0.6932692527770996, "learning_rate": 0.001377777777777778, "loss": 0.3195, "step": 1520 }, { "epoch": 19.0, "eval_accuracy": 0.876953125, "eval_loss": 0.306354820728302, "eval_runtime": 5.2563, "eval_samples_per_second": 194.815, "eval_steps_per_second": 12.176, "step": 1520 }, { "epoch": 19.12, "grad_norm": 0.5778792500495911, "learning_rate": 0.0013722222222222222, "loss": 0.3493, "step": 1530 }, { "epoch": 19.25, "grad_norm": 0.951936662197113, "learning_rate": 0.0013666666666666666, "loss": 0.3305, "step": 1540 }, { "epoch": 19.38, "grad_norm": 0.6778426170349121, "learning_rate": 0.0013611111111111111, "loss": 0.32, "step": 1550 }, { "epoch": 19.5, "grad_norm": 0.6356533765792847, "learning_rate": 0.0013555555555555556, "loss": 0.2889, "step": 1560 }, { "epoch": 19.62, "grad_norm": 0.6476128697395325, "learning_rate": 0.00135, "loss": 0.2907, "step": 1570 }, { "epoch": 19.75, "grad_norm": 0.4664938151836395, "learning_rate": 0.0013444444444444445, "loss": 0.3261, "step": 1580 }, { "epoch": 19.88, "grad_norm": 1.06290602684021, "learning_rate": 0.0013388888888888888, "loss": 0.3365, "step": 1590 }, { "epoch": 20.0, "grad_norm": 0.5365467667579651, "learning_rate": 0.0013333333333333333, "loss": 0.3652, "step": 1600 }, { "epoch": 20.0, "eval_accuracy": 0.912109375, "eval_loss": 0.25709766149520874, "eval_runtime": 4.9952, "eval_samples_per_second": 204.995, "eval_steps_per_second": 12.812, "step": 1600 }, { "epoch": 20.12, "grad_norm": 0.5051919221878052, "learning_rate": 0.0013277777777777778, "loss": 0.3147, "step": 1610 }, { "epoch": 20.25, "grad_norm": 0.5098996162414551, "learning_rate": 0.0013222222222222222, "loss": 0.3085, "step": 1620 }, { "epoch": 20.38, "grad_norm": 0.5585361123085022, "learning_rate": 0.0013166666666666667, "loss": 0.3679, "step": 1630 }, { "epoch": 20.5, "grad_norm": 0.38560378551483154, "learning_rate": 0.0013111111111111112, "loss": 0.2987, "step": 1640 }, { "epoch": 20.62, "grad_norm": 0.3209057152271271, "learning_rate": 0.0013055555555555557, "loss": 0.2792, "step": 1650 }, { "epoch": 20.75, "grad_norm": 0.6471489667892456, "learning_rate": 0.0013000000000000002, "loss": 0.2755, "step": 1660 }, { "epoch": 20.88, "grad_norm": 0.8814804553985596, "learning_rate": 0.0012944444444444446, "loss": 0.2993, "step": 1670 }, { "epoch": 21.0, "grad_norm": 0.5392754673957825, "learning_rate": 0.001288888888888889, "loss": 0.2794, "step": 1680 }, { "epoch": 21.0, "eval_accuracy": 0.9150390625, "eval_loss": 0.24504294991493225, "eval_runtime": 4.8909, "eval_samples_per_second": 209.37, "eval_steps_per_second": 13.086, "step": 1680 }, { "epoch": 21.12, "grad_norm": 0.6234158873558044, "learning_rate": 0.0012833333333333334, "loss": 0.2926, "step": 1690 }, { "epoch": 21.25, "grad_norm": 0.4284802973270416, "learning_rate": 0.0012777777777777776, "loss": 0.2803, "step": 1700 }, { "epoch": 21.38, "grad_norm": 0.688140869140625, "learning_rate": 0.0012722222222222221, "loss": 0.2799, "step": 1710 }, { "epoch": 21.5, "grad_norm": 0.8576880097389221, "learning_rate": 0.0012666666666666666, "loss": 0.2868, "step": 1720 }, { "epoch": 21.62, "grad_norm": 0.6299762725830078, "learning_rate": 0.001261111111111111, "loss": 0.2971, "step": 1730 }, { "epoch": 21.75, "grad_norm": 0.7093678116798401, "learning_rate": 0.0012555555555555555, "loss": 0.2905, "step": 1740 }, { "epoch": 21.88, "grad_norm": 0.4271737039089203, "learning_rate": 0.00125, "loss": 0.3336, "step": 1750 }, { "epoch": 22.0, "grad_norm": 0.6771571040153503, "learning_rate": 0.0012444444444444445, "loss": 0.2704, "step": 1760 }, { "epoch": 22.0, "eval_accuracy": 0.9033203125, "eval_loss": 0.23907524347305298, "eval_runtime": 5.3054, "eval_samples_per_second": 193.012, "eval_steps_per_second": 12.063, "step": 1760 }, { "epoch": 22.12, "grad_norm": 0.44859397411346436, "learning_rate": 0.001238888888888889, "loss": 0.28, "step": 1770 }, { "epoch": 22.25, "grad_norm": 0.5617765784263611, "learning_rate": 0.0012333333333333335, "loss": 0.3093, "step": 1780 }, { "epoch": 22.38, "grad_norm": 0.6634913682937622, "learning_rate": 0.001227777777777778, "loss": 0.2417, "step": 1790 }, { "epoch": 22.5, "grad_norm": 0.670782744884491, "learning_rate": 0.0012222222222222224, "loss": 0.2932, "step": 1800 }, { "epoch": 22.62, "grad_norm": 0.6564796566963196, "learning_rate": 0.0012166666666666667, "loss": 0.3042, "step": 1810 }, { "epoch": 22.75, "grad_norm": 0.34089842438697815, "learning_rate": 0.0012111111111111112, "loss": 0.2925, "step": 1820 }, { "epoch": 22.88, "grad_norm": 0.5612368583679199, "learning_rate": 0.0012055555555555554, "loss": 0.2559, "step": 1830 }, { "epoch": 23.0, "grad_norm": 0.624458909034729, "learning_rate": 0.0012, "loss": 0.2612, "step": 1840 }, { "epoch": 23.0, "eval_accuracy": 0.927734375, "eval_loss": 0.23524078726768494, "eval_runtime": 4.902, "eval_samples_per_second": 208.893, "eval_steps_per_second": 13.056, "step": 1840 }, { "epoch": 23.12, "grad_norm": 0.6820557117462158, "learning_rate": 0.0011944444444444444, "loss": 0.2282, "step": 1850 }, { "epoch": 23.25, "grad_norm": 0.5979276895523071, "learning_rate": 0.0011888888888888889, "loss": 0.2569, "step": 1860 }, { "epoch": 23.38, "grad_norm": 0.5427021384239197, "learning_rate": 0.0011833333333333333, "loss": 0.2724, "step": 1870 }, { "epoch": 23.5, "grad_norm": 0.4382477104663849, "learning_rate": 0.0011777777777777778, "loss": 0.2616, "step": 1880 }, { "epoch": 23.62, "grad_norm": 0.6240445375442505, "learning_rate": 0.0011722222222222223, "loss": 0.2636, "step": 1890 }, { "epoch": 23.75, "grad_norm": 0.7440346479415894, "learning_rate": 0.0011666666666666668, "loss": 0.2913, "step": 1900 }, { "epoch": 23.88, "grad_norm": 0.4682701826095581, "learning_rate": 0.0011611111111111112, "loss": 0.2499, "step": 1910 }, { "epoch": 24.0, "grad_norm": 0.5112751722335815, "learning_rate": 0.0011555555555555555, "loss": 0.2425, "step": 1920 }, { "epoch": 24.0, "eval_accuracy": 0.828125, "eval_loss": 0.4720377027988434, "eval_runtime": 5.3156, "eval_samples_per_second": 192.639, "eval_steps_per_second": 12.04, "step": 1920 }, { "epoch": 24.12, "grad_norm": 0.765444278717041, "learning_rate": 0.00115, "loss": 0.2736, "step": 1930 }, { "epoch": 24.25, "grad_norm": 0.380066841840744, "learning_rate": 0.0011444444444444445, "loss": 0.2357, "step": 1940 }, { "epoch": 24.38, "grad_norm": 0.43320003151893616, "learning_rate": 0.001138888888888889, "loss": 0.2518, "step": 1950 }, { "epoch": 24.5, "grad_norm": 0.5003307461738586, "learning_rate": 0.0011333333333333334, "loss": 0.2898, "step": 1960 }, { "epoch": 24.62, "grad_norm": 0.41153478622436523, "learning_rate": 0.001127777777777778, "loss": 0.2209, "step": 1970 }, { "epoch": 24.75, "grad_norm": 0.41805940866470337, "learning_rate": 0.0011222222222222222, "loss": 0.235, "step": 1980 }, { "epoch": 24.88, "grad_norm": 0.5226410627365112, "learning_rate": 0.0011166666666666666, "loss": 0.2349, "step": 1990 }, { "epoch": 25.0, "grad_norm": 0.3767559826374054, "learning_rate": 0.0011111111111111111, "loss": 0.2567, "step": 2000 }, { "epoch": 25.0, "eval_accuracy": 0.9130859375, "eval_loss": 0.22960150241851807, "eval_runtime": 4.887, "eval_samples_per_second": 209.535, "eval_steps_per_second": 13.096, "step": 2000 }, { "epoch": 25.12, "grad_norm": 0.6860052943229675, "learning_rate": 0.0011055555555555556, "loss": 0.2426, "step": 2010 }, { "epoch": 25.25, "grad_norm": 0.3876688778400421, "learning_rate": 0.0011, "loss": 0.2243, "step": 2020 }, { "epoch": 25.38, "grad_norm": 0.3251183032989502, "learning_rate": 0.0010944444444444445, "loss": 0.234, "step": 2030 }, { "epoch": 25.5, "grad_norm": 0.5538493990898132, "learning_rate": 0.0010888888888888888, "loss": 0.2547, "step": 2040 }, { "epoch": 25.62, "grad_norm": 0.6539644598960876, "learning_rate": 0.0010833333333333333, "loss": 0.2382, "step": 2050 }, { "epoch": 25.75, "grad_norm": 0.6687932014465332, "learning_rate": 0.0010777777777777778, "loss": 0.2254, "step": 2060 }, { "epoch": 25.88, "grad_norm": 0.6210919618606567, "learning_rate": 0.0010722222222222222, "loss": 0.2356, "step": 2070 }, { "epoch": 26.0, "grad_norm": 0.5525135397911072, "learning_rate": 0.0010666666666666667, "loss": 0.2302, "step": 2080 }, { "epoch": 26.0, "eval_accuracy": 0.89453125, "eval_loss": 0.30673664808273315, "eval_runtime": 4.9576, "eval_samples_per_second": 206.552, "eval_steps_per_second": 12.909, "step": 2080 }, { "epoch": 26.12, "grad_norm": 0.5014208555221558, "learning_rate": 0.0010611111111111112, "loss": 0.2403, "step": 2090 }, { "epoch": 26.25, "grad_norm": 0.6093131303787231, "learning_rate": 0.0010555555555555557, "loss": 0.2356, "step": 2100 }, { "epoch": 26.38, "grad_norm": 0.3627248704433441, "learning_rate": 0.0010500000000000002, "loss": 0.2509, "step": 2110 }, { "epoch": 26.5, "grad_norm": 0.4119124114513397, "learning_rate": 0.0010444444444444446, "loss": 0.1915, "step": 2120 }, { "epoch": 26.62, "grad_norm": 0.5565811395645142, "learning_rate": 0.0010388888888888889, "loss": 0.191, "step": 2130 }, { "epoch": 26.75, "grad_norm": 0.44097578525543213, "learning_rate": 0.0010333333333333334, "loss": 0.2353, "step": 2140 }, { "epoch": 26.88, "grad_norm": 0.4542636275291443, "learning_rate": 0.0010277777777777776, "loss": 0.2144, "step": 2150 }, { "epoch": 27.0, "grad_norm": 0.4763772785663605, "learning_rate": 0.0010222222222222221, "loss": 0.2358, "step": 2160 }, { "epoch": 27.0, "eval_accuracy": 0.9375, "eval_loss": 0.17758239805698395, "eval_runtime": 5.3185, "eval_samples_per_second": 192.534, "eval_steps_per_second": 12.033, "step": 2160 }, { "epoch": 27.12, "grad_norm": 0.7219308614730835, "learning_rate": 0.0010166666666666666, "loss": 0.238, "step": 2170 }, { "epoch": 27.25, "grad_norm": 0.7707520127296448, "learning_rate": 0.001011111111111111, "loss": 0.1863, "step": 2180 }, { "epoch": 27.38, "grad_norm": 0.6878935098648071, "learning_rate": 0.0010055555555555555, "loss": 0.2493, "step": 2190 }, { "epoch": 27.5, "grad_norm": 0.5451861619949341, "learning_rate": 0.001, "loss": 0.2374, "step": 2200 }, { "epoch": 27.62, "grad_norm": 0.39642319083213806, "learning_rate": 0.0009944444444444445, "loss": 0.2382, "step": 2210 }, { "epoch": 27.75, "grad_norm": 0.4122956097126007, "learning_rate": 0.000988888888888889, "loss": 0.2176, "step": 2220 }, { "epoch": 27.88, "grad_norm": 0.6155421733856201, "learning_rate": 0.0009833333333333332, "loss": 0.2128, "step": 2230 }, { "epoch": 28.0, "grad_norm": 0.7283052206039429, "learning_rate": 0.0009777777777777777, "loss": 0.2173, "step": 2240 }, { "epoch": 28.0, "eval_accuracy": 0.94921875, "eval_loss": 0.15962785482406616, "eval_runtime": 4.9115, "eval_samples_per_second": 208.489, "eval_steps_per_second": 13.031, "step": 2240 }, { "epoch": 28.12, "grad_norm": 0.39027243852615356, "learning_rate": 0.0009722222222222222, "loss": 0.1979, "step": 2250 }, { "epoch": 28.25, "grad_norm": 0.5258718729019165, "learning_rate": 0.0009666666666666667, "loss": 0.1447, "step": 2260 }, { "epoch": 28.38, "grad_norm": 0.6615960001945496, "learning_rate": 0.0009611111111111112, "loss": 0.2403, "step": 2270 }, { "epoch": 28.5, "grad_norm": 0.4044310748577118, "learning_rate": 0.0009555555555555556, "loss": 0.1981, "step": 2280 }, { "epoch": 28.62, "grad_norm": 0.2666930556297302, "learning_rate": 0.00095, "loss": 0.2108, "step": 2290 }, { "epoch": 28.75, "grad_norm": 0.5612334609031677, "learning_rate": 0.0009444444444444445, "loss": 0.1783, "step": 2300 }, { "epoch": 28.88, "grad_norm": 0.48420026898384094, "learning_rate": 0.000938888888888889, "loss": 0.1848, "step": 2310 }, { "epoch": 29.0, "grad_norm": 0.5850337743759155, "learning_rate": 0.0009333333333333333, "loss": 0.1798, "step": 2320 }, { "epoch": 29.0, "eval_accuracy": 0.94140625, "eval_loss": 0.1548241674900055, "eval_runtime": 5.32, "eval_samples_per_second": 192.483, "eval_steps_per_second": 12.03, "step": 2320 }, { "epoch": 29.12, "grad_norm": 0.5059901475906372, "learning_rate": 0.0009277777777777778, "loss": 0.1954, "step": 2330 }, { "epoch": 29.25, "grad_norm": 0.22623513638973236, "learning_rate": 0.0009222222222222223, "loss": 0.1604, "step": 2340 }, { "epoch": 29.38, "grad_norm": 0.2330830693244934, "learning_rate": 0.0009166666666666666, "loss": 0.2125, "step": 2350 }, { "epoch": 29.5, "grad_norm": 0.4784901440143585, "learning_rate": 0.0009111111111111111, "loss": 0.1823, "step": 2360 }, { "epoch": 29.62, "grad_norm": 0.6156973242759705, "learning_rate": 0.0009055555555555556, "loss": 0.2289, "step": 2370 }, { "epoch": 29.75, "grad_norm": 0.4373360872268677, "learning_rate": 0.0009000000000000001, "loss": 0.2127, "step": 2380 }, { "epoch": 29.88, "grad_norm": 0.501115083694458, "learning_rate": 0.0008944444444444445, "loss": 0.2359, "step": 2390 }, { "epoch": 30.0, "grad_norm": 0.411662757396698, "learning_rate": 0.0008888888888888888, "loss": 0.197, "step": 2400 }, { "epoch": 30.0, "eval_accuracy": 0.95703125, "eval_loss": 0.17402663826942444, "eval_runtime": 4.919, "eval_samples_per_second": 208.172, "eval_steps_per_second": 13.011, "step": 2400 }, { "epoch": 30.12, "grad_norm": 0.45976510643959045, "learning_rate": 0.0008833333333333333, "loss": 0.1608, "step": 2410 }, { "epoch": 30.25, "grad_norm": 0.3243074417114258, "learning_rate": 0.0008777777777777778, "loss": 0.1742, "step": 2420 }, { "epoch": 30.38, "grad_norm": 0.5205725431442261, "learning_rate": 0.0008722222222222223, "loss": 0.1718, "step": 2430 }, { "epoch": 30.5, "grad_norm": 0.3976719081401825, "learning_rate": 0.0008666666666666667, "loss": 0.2247, "step": 2440 }, { "epoch": 30.62, "grad_norm": 0.2859196662902832, "learning_rate": 0.0008611111111111112, "loss": 0.1884, "step": 2450 }, { "epoch": 30.75, "grad_norm": 0.5310297012329102, "learning_rate": 0.0008555555555555556, "loss": 0.1672, "step": 2460 }, { "epoch": 30.88, "grad_norm": 0.5172590613365173, "learning_rate": 0.00085, "loss": 0.1828, "step": 2470 }, { "epoch": 31.0, "grad_norm": 0.6098745465278625, "learning_rate": 0.0008444444444444444, "loss": 0.1654, "step": 2480 }, { "epoch": 31.0, "eval_accuracy": 0.966796875, "eval_loss": 0.12167137861251831, "eval_runtime": 4.9956, "eval_samples_per_second": 204.98, "eval_steps_per_second": 12.811, "step": 2480 }, { "epoch": 31.12, "grad_norm": 0.3343498706817627, "learning_rate": 0.0008388888888888889, "loss": 0.1784, "step": 2490 }, { "epoch": 31.25, "grad_norm": 0.3938640058040619, "learning_rate": 0.0008333333333333334, "loss": 0.1697, "step": 2500 }, { "epoch": 31.38, "grad_norm": 0.41868484020233154, "learning_rate": 0.0008277777777777778, "loss": 0.2263, "step": 2510 }, { "epoch": 31.5, "grad_norm": 0.4363801181316376, "learning_rate": 0.0008222222222222222, "loss": 0.1762, "step": 2520 }, { "epoch": 31.62, "grad_norm": 0.5088948607444763, "learning_rate": 0.0008166666666666667, "loss": 0.1711, "step": 2530 }, { "epoch": 31.75, "grad_norm": 0.5423977375030518, "learning_rate": 0.0008111111111111111, "loss": 0.1675, "step": 2540 }, { "epoch": 31.88, "grad_norm": 0.431382954120636, "learning_rate": 0.0008055555555555556, "loss": 0.2216, "step": 2550 }, { "epoch": 32.0, "grad_norm": 0.4037337303161621, "learning_rate": 0.0008, "loss": 0.1896, "step": 2560 }, { "epoch": 32.0, "eval_accuracy": 0.92578125, "eval_loss": 0.2552070617675781, "eval_runtime": 5.2019, "eval_samples_per_second": 196.853, "eval_steps_per_second": 12.303, "step": 2560 }, { "epoch": 32.12, "grad_norm": 0.6025939583778381, "learning_rate": 0.0007944444444444444, "loss": 0.1926, "step": 2570 }, { "epoch": 32.25, "grad_norm": 0.7205588221549988, "learning_rate": 0.0007888888888888889, "loss": 0.1755, "step": 2580 }, { "epoch": 32.38, "grad_norm": 0.3841509222984314, "learning_rate": 0.0007833333333333334, "loss": 0.1696, "step": 2590 }, { "epoch": 32.5, "grad_norm": 0.5659075975418091, "learning_rate": 0.0007777777777777778, "loss": 0.133, "step": 2600 }, { "epoch": 32.62, "grad_norm": 0.7011501789093018, "learning_rate": 0.0007722222222222223, "loss": 0.2069, "step": 2610 }, { "epoch": 32.75, "grad_norm": 0.5933576822280884, "learning_rate": 0.0007666666666666667, "loss": 0.1799, "step": 2620 }, { "epoch": 32.88, "grad_norm": 0.636463463306427, "learning_rate": 0.0007611111111111111, "loss": 0.1884, "step": 2630 }, { "epoch": 33.0, "grad_norm": 0.36000609397888184, "learning_rate": 0.0007555555555555555, "loss": 0.1705, "step": 2640 }, { "epoch": 33.0, "eval_accuracy": 0.97265625, "eval_loss": 0.10305143892765045, "eval_runtime": 4.8746, "eval_samples_per_second": 210.07, "eval_steps_per_second": 13.129, "step": 2640 }, { "epoch": 33.12, "grad_norm": 0.25941601395606995, "learning_rate": 0.00075, "loss": 0.143, "step": 2650 }, { "epoch": 33.25, "grad_norm": 0.6486319899559021, "learning_rate": 0.0007444444444444445, "loss": 0.1819, "step": 2660 }, { "epoch": 33.38, "grad_norm": 0.34492290019989014, "learning_rate": 0.000738888888888889, "loss": 0.1877, "step": 2670 }, { "epoch": 33.5, "grad_norm": 0.5475990176200867, "learning_rate": 0.0007333333333333333, "loss": 0.1586, "step": 2680 }, { "epoch": 33.62, "grad_norm": 0.231631800532341, "learning_rate": 0.0007277777777777777, "loss": 0.145, "step": 2690 }, { "epoch": 33.75, "grad_norm": 0.6208530068397522, "learning_rate": 0.0007222222222222222, "loss": 0.2015, "step": 2700 }, { "epoch": 33.88, "grad_norm": 0.7229673862457275, "learning_rate": 0.0007166666666666667, "loss": 0.1814, "step": 2710 }, { "epoch": 34.0, "grad_norm": 0.38056522607803345, "learning_rate": 0.0007111111111111111, "loss": 0.1689, "step": 2720 }, { "epoch": 34.0, "eval_accuracy": 0.96875, "eval_loss": 0.10111749172210693, "eval_runtime": 5.2922, "eval_samples_per_second": 193.491, "eval_steps_per_second": 12.093, "step": 2720 }, { "epoch": 34.12, "grad_norm": 0.5405479669570923, "learning_rate": 0.0007055555555555556, "loss": 0.16, "step": 2730 }, { "epoch": 34.25, "grad_norm": 0.5781314373016357, "learning_rate": 0.0007, "loss": 0.1598, "step": 2740 }, { "epoch": 34.38, "grad_norm": 0.33385559916496277, "learning_rate": 0.0006944444444444445, "loss": 0.1747, "step": 2750 }, { "epoch": 34.5, "grad_norm": 0.36587977409362793, "learning_rate": 0.000688888888888889, "loss": 0.1376, "step": 2760 }, { "epoch": 34.62, "grad_norm": 0.3459375202655792, "learning_rate": 0.0006833333333333333, "loss": 0.1297, "step": 2770 }, { "epoch": 34.75, "grad_norm": 0.5182803273200989, "learning_rate": 0.0006777777777777778, "loss": 0.1747, "step": 2780 }, { "epoch": 34.88, "grad_norm": 0.39014366269111633, "learning_rate": 0.0006722222222222223, "loss": 0.169, "step": 2790 }, { "epoch": 35.0, "grad_norm": 0.4516375660896301, "learning_rate": 0.0006666666666666666, "loss": 0.1439, "step": 2800 }, { "epoch": 35.0, "eval_accuracy": 0.96484375, "eval_loss": 0.11748197674751282, "eval_runtime": 4.909, "eval_samples_per_second": 208.595, "eval_steps_per_second": 13.037, "step": 2800 }, { "epoch": 35.12, "grad_norm": 0.47782474756240845, "learning_rate": 0.0006611111111111111, "loss": 0.1417, "step": 2810 }, { "epoch": 35.25, "grad_norm": 0.11640643328428268, "learning_rate": 0.0006555555555555556, "loss": 0.1226, "step": 2820 }, { "epoch": 35.38, "grad_norm": 0.4363173544406891, "learning_rate": 0.0006500000000000001, "loss": 0.14, "step": 2830 }, { "epoch": 35.5, "grad_norm": 0.6676026582717896, "learning_rate": 0.0006444444444444444, "loss": 0.1548, "step": 2840 }, { "epoch": 35.62, "grad_norm": 0.4940982162952423, "learning_rate": 0.0006388888888888888, "loss": 0.1554, "step": 2850 }, { "epoch": 35.75, "grad_norm": 0.6478282809257507, "learning_rate": 0.0006333333333333333, "loss": 0.1641, "step": 2860 }, { "epoch": 35.88, "grad_norm": 0.6007707715034485, "learning_rate": 0.0006277777777777778, "loss": 0.1484, "step": 2870 }, { "epoch": 36.0, "grad_norm": 0.4945576786994934, "learning_rate": 0.0006222222222222223, "loss": 0.1606, "step": 2880 }, { "epoch": 36.0, "eval_accuracy": 0.9443359375, "eval_loss": 0.18046385049819946, "eval_runtime": 5.0626, "eval_samples_per_second": 202.266, "eval_steps_per_second": 12.642, "step": 2880 }, { "epoch": 36.12, "grad_norm": 0.4033058285713196, "learning_rate": 0.0006166666666666667, "loss": 0.1372, "step": 2890 }, { "epoch": 36.25, "grad_norm": 0.30507412552833557, "learning_rate": 0.0006111111111111112, "loss": 0.1549, "step": 2900 }, { "epoch": 36.38, "grad_norm": 0.3899296820163727, "learning_rate": 0.0006055555555555556, "loss": 0.1667, "step": 2910 }, { "epoch": 36.5, "grad_norm": 0.44058963656425476, "learning_rate": 0.0006, "loss": 0.1712, "step": 2920 }, { "epoch": 36.62, "grad_norm": 0.4805178642272949, "learning_rate": 0.0005944444444444444, "loss": 0.1805, "step": 2930 }, { "epoch": 36.75, "grad_norm": 0.37880581617355347, "learning_rate": 0.0005888888888888889, "loss": 0.1411, "step": 2940 }, { "epoch": 36.88, "grad_norm": 0.4263412654399872, "learning_rate": 0.0005833333333333334, "loss": 0.1714, "step": 2950 }, { "epoch": 37.0, "grad_norm": 0.2723836898803711, "learning_rate": 0.0005777777777777778, "loss": 0.1281, "step": 2960 }, { "epoch": 37.0, "eval_accuracy": 0.9677734375, "eval_loss": 0.1253870278596878, "eval_runtime": 5.1275, "eval_samples_per_second": 199.709, "eval_steps_per_second": 12.482, "step": 2960 }, { "epoch": 37.12, "grad_norm": 0.4946765601634979, "learning_rate": 0.0005722222222222222, "loss": 0.1059, "step": 2970 }, { "epoch": 37.25, "grad_norm": 0.4709372818470001, "learning_rate": 0.0005666666666666667, "loss": 0.1321, "step": 2980 }, { "epoch": 37.38, "grad_norm": 0.36459285020828247, "learning_rate": 0.0005611111111111111, "loss": 0.1351, "step": 2990 }, { "epoch": 37.5, "grad_norm": 0.4145031273365021, "learning_rate": 0.0005555555555555556, "loss": 0.1545, "step": 3000 }, { "epoch": 37.62, "grad_norm": 0.5457221865653992, "learning_rate": 0.00055, "loss": 0.1424, "step": 3010 }, { "epoch": 37.75, "grad_norm": 0.5123695731163025, "learning_rate": 0.0005444444444444444, "loss": 0.1508, "step": 3020 }, { "epoch": 37.88, "grad_norm": 0.29368171095848083, "learning_rate": 0.0005388888888888889, "loss": 0.1438, "step": 3030 }, { "epoch": 38.0, "grad_norm": 0.6859858632087708, "learning_rate": 0.0005333333333333334, "loss": 0.1518, "step": 3040 }, { "epoch": 38.0, "eval_accuracy": 0.96484375, "eval_loss": 0.11837992072105408, "eval_runtime": 4.9042, "eval_samples_per_second": 208.8, "eval_steps_per_second": 13.05, "step": 3040 }, { "epoch": 38.12, "grad_norm": 0.3859548270702362, "learning_rate": 0.0005277777777777778, "loss": 0.1455, "step": 3050 }, { "epoch": 38.25, "grad_norm": 0.21001270413398743, "learning_rate": 0.0005222222222222223, "loss": 0.13, "step": 3060 }, { "epoch": 38.38, "grad_norm": 0.4814240038394928, "learning_rate": 0.0005166666666666667, "loss": 0.1291, "step": 3070 }, { "epoch": 38.5, "grad_norm": 0.4478558301925659, "learning_rate": 0.0005111111111111111, "loss": 0.1293, "step": 3080 }, { "epoch": 38.62, "grad_norm": 0.4811321496963501, "learning_rate": 0.0005055555555555555, "loss": 0.1231, "step": 3090 }, { "epoch": 38.75, "grad_norm": 0.2841961085796356, "learning_rate": 0.0005, "loss": 0.166, "step": 3100 }, { "epoch": 38.88, "grad_norm": 0.5479158759117126, "learning_rate": 0.0004944444444444445, "loss": 0.1044, "step": 3110 }, { "epoch": 39.0, "grad_norm": 0.37449321150779724, "learning_rate": 0.0004888888888888889, "loss": 0.1531, "step": 3120 }, { "epoch": 39.0, "eval_accuracy": 0.9736328125, "eval_loss": 0.09921471774578094, "eval_runtime": 5.3451, "eval_samples_per_second": 191.577, "eval_steps_per_second": 11.974, "step": 3120 }, { "epoch": 39.12, "grad_norm": 0.5961503386497498, "learning_rate": 0.00048333333333333334, "loss": 0.1321, "step": 3130 }, { "epoch": 39.25, "grad_norm": 0.3140615224838257, "learning_rate": 0.0004777777777777778, "loss": 0.1192, "step": 3140 }, { "epoch": 39.38, "grad_norm": 0.8949409127235413, "learning_rate": 0.00047222222222222224, "loss": 0.122, "step": 3150 }, { "epoch": 39.5, "grad_norm": 0.21187840402126312, "learning_rate": 0.00046666666666666666, "loss": 0.1341, "step": 3160 }, { "epoch": 39.62, "grad_norm": 0.6364386081695557, "learning_rate": 0.00046111111111111114, "loss": 0.1327, "step": 3170 }, { "epoch": 39.75, "grad_norm": 0.2257820963859558, "learning_rate": 0.00045555555555555556, "loss": 0.1101, "step": 3180 }, { "epoch": 39.88, "grad_norm": 0.373692125082016, "learning_rate": 0.00045000000000000004, "loss": 0.1293, "step": 3190 }, { "epoch": 40.0, "grad_norm": 0.48990318179130554, "learning_rate": 0.0004444444444444444, "loss": 0.132, "step": 3200 }, { "epoch": 40.0, "eval_accuracy": 0.9775390625, "eval_loss": 0.09202806651592255, "eval_runtime": 4.9155, "eval_samples_per_second": 208.319, "eval_steps_per_second": 13.02, "step": 3200 }, { "epoch": 40.12, "grad_norm": 0.571524441242218, "learning_rate": 0.0004388888888888889, "loss": 0.1167, "step": 3210 }, { "epoch": 40.25, "grad_norm": 0.5896998643875122, "learning_rate": 0.00043333333333333337, "loss": 0.151, "step": 3220 }, { "epoch": 40.38, "grad_norm": 0.44366732239723206, "learning_rate": 0.0004277777777777778, "loss": 0.1422, "step": 3230 }, { "epoch": 40.5, "grad_norm": 0.314609169960022, "learning_rate": 0.0004222222222222222, "loss": 0.1253, "step": 3240 }, { "epoch": 40.62, "grad_norm": 0.3513747453689575, "learning_rate": 0.0004166666666666667, "loss": 0.119, "step": 3250 }, { "epoch": 40.75, "grad_norm": 0.3717803359031677, "learning_rate": 0.0004111111111111111, "loss": 0.1156, "step": 3260 }, { "epoch": 40.88, "grad_norm": 0.22342754900455475, "learning_rate": 0.00040555555555555554, "loss": 0.1372, "step": 3270 }, { "epoch": 41.0, "grad_norm": 0.41738444566726685, "learning_rate": 0.0004, "loss": 0.134, "step": 3280 }, { "epoch": 41.0, "eval_accuracy": 0.9638671875, "eval_loss": 0.13908132910728455, "eval_runtime": 5.2165, "eval_samples_per_second": 196.299, "eval_steps_per_second": 12.269, "step": 3280 }, { "epoch": 41.12, "grad_norm": 0.41814348101615906, "learning_rate": 0.00039444444444444444, "loss": 0.1279, "step": 3290 }, { "epoch": 41.25, "grad_norm": 0.9678131937980652, "learning_rate": 0.0003888888888888889, "loss": 0.1398, "step": 3300 }, { "epoch": 41.38, "grad_norm": 0.6725767850875854, "learning_rate": 0.00038333333333333334, "loss": 0.1492, "step": 3310 }, { "epoch": 41.5, "grad_norm": 0.31534790992736816, "learning_rate": 0.00037777777777777777, "loss": 0.1119, "step": 3320 }, { "epoch": 41.62, "grad_norm": 0.632583737373352, "learning_rate": 0.00037222222222222225, "loss": 0.1131, "step": 3330 }, { "epoch": 41.75, "grad_norm": 0.6746741533279419, "learning_rate": 0.00036666666666666667, "loss": 0.1351, "step": 3340 }, { "epoch": 41.88, "grad_norm": 0.3400849997997284, "learning_rate": 0.0003611111111111111, "loss": 0.0815, "step": 3350 }, { "epoch": 42.0, "grad_norm": 0.5605281591415405, "learning_rate": 0.00035555555555555557, "loss": 0.1413, "step": 3360 }, { "epoch": 42.0, "eval_accuracy": 0.9716796875, "eval_loss": 0.11220287531614304, "eval_runtime": 5.0927, "eval_samples_per_second": 201.072, "eval_steps_per_second": 12.567, "step": 3360 }, { "epoch": 42.12, "grad_norm": 0.5148097276687622, "learning_rate": 0.00035, "loss": 0.125, "step": 3370 }, { "epoch": 42.25, "grad_norm": 0.38650012016296387, "learning_rate": 0.0003444444444444445, "loss": 0.1209, "step": 3380 }, { "epoch": 42.38, "grad_norm": 0.3292187750339508, "learning_rate": 0.0003388888888888889, "loss": 0.1236, "step": 3390 }, { "epoch": 42.5, "grad_norm": 0.20681746304035187, "learning_rate": 0.0003333333333333333, "loss": 0.0973, "step": 3400 }, { "epoch": 42.62, "grad_norm": 0.33743348717689514, "learning_rate": 0.0003277777777777778, "loss": 0.1208, "step": 3410 }, { "epoch": 42.75, "grad_norm": 0.34158453345298767, "learning_rate": 0.0003222222222222222, "loss": 0.11, "step": 3420 }, { "epoch": 42.88, "grad_norm": 0.5730062127113342, "learning_rate": 0.00031666666666666665, "loss": 0.1292, "step": 3430 }, { "epoch": 43.0, "grad_norm": 0.44954267144203186, "learning_rate": 0.0003111111111111111, "loss": 0.1097, "step": 3440 }, { "epoch": 43.0, "eval_accuracy": 0.9677734375, "eval_loss": 0.11706902086734772, "eval_runtime": 4.9202, "eval_samples_per_second": 208.12, "eval_steps_per_second": 13.007, "step": 3440 }, { "epoch": 43.12, "grad_norm": 0.25731635093688965, "learning_rate": 0.0003055555555555556, "loss": 0.1161, "step": 3450 }, { "epoch": 43.25, "grad_norm": 0.5329569578170776, "learning_rate": 0.0003, "loss": 0.1507, "step": 3460 }, { "epoch": 43.38, "grad_norm": 0.3034692704677582, "learning_rate": 0.00029444444444444445, "loss": 0.1447, "step": 3470 }, { "epoch": 43.5, "grad_norm": 0.5483482480049133, "learning_rate": 0.0002888888888888889, "loss": 0.1323, "step": 3480 }, { "epoch": 43.62, "grad_norm": 0.279697984457016, "learning_rate": 0.00028333333333333335, "loss": 0.1, "step": 3490 }, { "epoch": 43.75, "grad_norm": 0.5593113303184509, "learning_rate": 0.0002777777777777778, "loss": 0.1169, "step": 3500 }, { "epoch": 43.88, "grad_norm": 0.621919572353363, "learning_rate": 0.0002722222222222222, "loss": 0.1119, "step": 3510 }, { "epoch": 44.0, "grad_norm": 0.37898024916648865, "learning_rate": 0.0002666666666666667, "loss": 0.1167, "step": 3520 }, { "epoch": 44.0, "eval_accuracy": 0.9765625, "eval_loss": 0.10542036592960358, "eval_runtime": 5.3473, "eval_samples_per_second": 191.5, "eval_steps_per_second": 11.969, "step": 3520 }, { "epoch": 44.12, "grad_norm": 0.40025296807289124, "learning_rate": 0.00026111111111111116, "loss": 0.1107, "step": 3530 }, { "epoch": 44.25, "grad_norm": 0.19010861217975616, "learning_rate": 0.00025555555555555553, "loss": 0.1008, "step": 3540 }, { "epoch": 44.38, "grad_norm": 0.33224934339523315, "learning_rate": 0.00025, "loss": 0.1355, "step": 3550 }, { "epoch": 44.5, "grad_norm": 0.4298325181007385, "learning_rate": 0.00024444444444444443, "loss": 0.106, "step": 3560 }, { "epoch": 44.62, "grad_norm": 0.4320330023765564, "learning_rate": 0.0002388888888888889, "loss": 0.1053, "step": 3570 }, { "epoch": 44.75, "grad_norm": 0.1121302917599678, "learning_rate": 0.00023333333333333333, "loss": 0.0845, "step": 3580 }, { "epoch": 44.88, "grad_norm": 0.3021819293498993, "learning_rate": 0.00022777777777777778, "loss": 0.1222, "step": 3590 }, { "epoch": 45.0, "grad_norm": 0.7353653311729431, "learning_rate": 0.0002222222222222222, "loss": 0.1388, "step": 3600 }, { "epoch": 45.0, "eval_accuracy": 0.9794921875, "eval_loss": 0.09323666244745255, "eval_runtime": 4.9368, "eval_samples_per_second": 207.422, "eval_steps_per_second": 12.964, "step": 3600 }, { "epoch": 45.12, "grad_norm": 0.5964930057525635, "learning_rate": 0.00021666666666666668, "loss": 0.1201, "step": 3610 }, { "epoch": 45.25, "grad_norm": 0.17329342663288116, "learning_rate": 0.0002111111111111111, "loss": 0.0905, "step": 3620 }, { "epoch": 45.38, "grad_norm": 0.5378609299659729, "learning_rate": 0.00020555555555555556, "loss": 0.0981, "step": 3630 }, { "epoch": 45.5, "grad_norm": 0.3457593619823456, "learning_rate": 0.0002, "loss": 0.1116, "step": 3640 }, { "epoch": 45.62, "grad_norm": 0.5954685211181641, "learning_rate": 0.00019444444444444446, "loss": 0.1037, "step": 3650 }, { "epoch": 45.75, "grad_norm": 0.1786712259054184, "learning_rate": 0.00018888888888888888, "loss": 0.0978, "step": 3660 }, { "epoch": 45.88, "grad_norm": 0.25224894285202026, "learning_rate": 0.00018333333333333334, "loss": 0.1089, "step": 3670 }, { "epoch": 46.0, "grad_norm": 0.33607247471809387, "learning_rate": 0.00017777777777777779, "loss": 0.1221, "step": 3680 }, { "epoch": 46.0, "eval_accuracy": 0.9765625, "eval_loss": 0.09462323784828186, "eval_runtime": 5.2287, "eval_samples_per_second": 195.844, "eval_steps_per_second": 12.24, "step": 3680 }, { "epoch": 46.12, "grad_norm": 0.34634700417518616, "learning_rate": 0.00017222222222222224, "loss": 0.1243, "step": 3690 }, { "epoch": 46.25, "grad_norm": 0.5061681866645813, "learning_rate": 0.00016666666666666666, "loss": 0.1115, "step": 3700 }, { "epoch": 46.38, "grad_norm": 0.2837713658809662, "learning_rate": 0.0001611111111111111, "loss": 0.1008, "step": 3710 }, { "epoch": 46.5, "grad_norm": 0.2688066363334656, "learning_rate": 0.00015555555555555556, "loss": 0.1058, "step": 3720 }, { "epoch": 46.62, "grad_norm": 0.32675421237945557, "learning_rate": 0.00015, "loss": 0.0897, "step": 3730 }, { "epoch": 46.75, "grad_norm": 0.6959260702133179, "learning_rate": 0.00014444444444444444, "loss": 0.1182, "step": 3740 }, { "epoch": 46.88, "grad_norm": 0.3018099069595337, "learning_rate": 0.0001388888888888889, "loss": 0.1013, "step": 3750 }, { "epoch": 47.0, "grad_norm": 0.6018778085708618, "learning_rate": 0.00013333333333333334, "loss": 0.1099, "step": 3760 }, { "epoch": 47.0, "eval_accuracy": 0.9755859375, "eval_loss": 0.1115545928478241, "eval_runtime": 5.0799, "eval_samples_per_second": 201.581, "eval_steps_per_second": 12.599, "step": 3760 }, { "epoch": 47.12, "grad_norm": 0.42199546098709106, "learning_rate": 0.00012777777777777776, "loss": 0.1073, "step": 3770 }, { "epoch": 47.25, "grad_norm": 0.6451756358146667, "learning_rate": 0.00012222222222222221, "loss": 0.099, "step": 3780 }, { "epoch": 47.38, "grad_norm": 0.4935210943222046, "learning_rate": 0.00011666666666666667, "loss": 0.1077, "step": 3790 }, { "epoch": 47.5, "grad_norm": 0.2563684582710266, "learning_rate": 0.0001111111111111111, "loss": 0.0907, "step": 3800 }, { "epoch": 47.62, "grad_norm": 0.3351310193538666, "learning_rate": 0.00010555555555555555, "loss": 0.1059, "step": 3810 }, { "epoch": 47.75, "grad_norm": 0.39526107907295227, "learning_rate": 0.0001, "loss": 0.0868, "step": 3820 }, { "epoch": 47.88, "grad_norm": 0.4634101390838623, "learning_rate": 9.444444444444444e-05, "loss": 0.1098, "step": 3830 }, { "epoch": 48.0, "grad_norm": 0.5983624458312988, "learning_rate": 8.888888888888889e-05, "loss": 0.1041, "step": 3840 }, { "epoch": 48.0, "eval_accuracy": 0.974609375, "eval_loss": 0.11264081299304962, "eval_runtime": 4.9279, "eval_samples_per_second": 207.795, "eval_steps_per_second": 12.987, "step": 3840 }, { "epoch": 48.12, "grad_norm": 0.4093017578125, "learning_rate": 8.333333333333333e-05, "loss": 0.1134, "step": 3850 }, { "epoch": 48.25, "grad_norm": 0.6668171286582947, "learning_rate": 7.777777777777778e-05, "loss": 0.0948, "step": 3860 }, { "epoch": 48.38, "grad_norm": 0.24066688120365143, "learning_rate": 7.222222222222222e-05, "loss": 0.0958, "step": 3870 }, { "epoch": 48.5, "grad_norm": 0.2770562469959259, "learning_rate": 6.666666666666667e-05, "loss": 0.1021, "step": 3880 }, { "epoch": 48.62, "grad_norm": 0.45978790521621704, "learning_rate": 6.111111111111111e-05, "loss": 0.1084, "step": 3890 }, { "epoch": 48.75, "grad_norm": 0.594672441482544, "learning_rate": 5.555555555555555e-05, "loss": 0.1373, "step": 3900 }, { "epoch": 48.88, "grad_norm": 0.8167428374290466, "learning_rate": 5e-05, "loss": 0.1038, "step": 3910 }, { "epoch": 49.0, "grad_norm": 0.2987329661846161, "learning_rate": 4.4444444444444447e-05, "loss": 0.1025, "step": 3920 }, { "epoch": 49.0, "eval_accuracy": 0.9755859375, "eval_loss": 0.11138872057199478, "eval_runtime": 5.3184, "eval_samples_per_second": 192.54, "eval_steps_per_second": 12.034, "step": 3920 }, { "epoch": 49.12, "grad_norm": 0.3884102404117584, "learning_rate": 3.888888888888889e-05, "loss": 0.1018, "step": 3930 }, { "epoch": 49.25, "grad_norm": 0.2661769688129425, "learning_rate": 3.3333333333333335e-05, "loss": 0.1011, "step": 3940 }, { "epoch": 49.38, "grad_norm": 0.40820014476776123, "learning_rate": 2.7777777777777776e-05, "loss": 0.1488, "step": 3950 }, { "epoch": 49.5, "grad_norm": 0.46163231134414673, "learning_rate": 2.2222222222222223e-05, "loss": 0.1258, "step": 3960 }, { "epoch": 49.62, "grad_norm": 0.4315054416656494, "learning_rate": 1.6666666666666667e-05, "loss": 0.1018, "step": 3970 }, { "epoch": 49.75, "grad_norm": 0.2365369200706482, "learning_rate": 1.1111111111111112e-05, "loss": 0.0977, "step": 3980 }, { "epoch": 49.88, "grad_norm": 0.4910149574279785, "learning_rate": 5.555555555555556e-06, "loss": 0.1122, "step": 3990 }, { "epoch": 50.0, "grad_norm": 0.2623092234134674, "learning_rate": 0.0, "loss": 0.0887, "step": 4000 }, { "epoch": 50.0, "eval_accuracy": 0.9755859375, "eval_loss": 0.10555899888277054, "eval_runtime": 4.9229, "eval_samples_per_second": 208.005, "eval_steps_per_second": 13.0, "step": 4000 }, { "epoch": 50.0, "step": 4000, "total_flos": 5.437210780237824e+18, "train_loss": 0.3629864407479763, "train_runtime": 3465.6999, "train_samples_per_second": 73.867, "train_steps_per_second": 1.154 } ], "logging_steps": 10, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 5.437210780237824e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }