AbstractPhil commited on
Commit
7d7e3f5
·
verified ·
1 Parent(s): 13f901d

Update best_model_acc69.48_metadata.json - Run 20251012_194945

Browse files
weights/David-partial_shared-hierarchical_tree/20251012_194945/best_model_acc69.48_metadata.json ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(2503.)",
7
+ "exp_avg": "tensor([[ 2.8298e-06, 1.8787e-04, -2.8385e-05, ..., 7.2895e-05,\n 3.2653e-05, -6.6496e-06],\n [ 4.8442e-05, -8.8554e-05, 4.6923e-05, ..., 5.4057e-05,\n -5.5230e-05, 1.2185e-04],\n [-3.9534e-06, -2.0580e-05, 1.0778e-05, ..., -2.3252e-05,\n -4.0105e-06, -1.9547e-05],\n ...,\n [ 2.1644e-05, 4.9039e-05, 1.7297e-05, ..., -1.4619e-05,\n -1.4412e-05, -3.4463e-05],\n [-4.3041e-06, -1.9197e-05, 3.1406e-05, ..., 1.9094e-05,\n 1.1356e-05, -4.0538e-05],\n [ 1.4430e-04, -1.4801e-04, 5.5967e-05, ..., 1.3172e-05,\n 1.4775e-04, -1.2904e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[2.2818e-08, 1.5325e-07, 6.1313e-08, ..., 1.5545e-08, 9.1150e-09,\n 1.6037e-08],\n [5.3959e-08, 1.3734e-07, 6.7516e-08, ..., 2.7350e-08, 2.0359e-08,\n 2.2704e-08],\n [1.7581e-08, 1.3578e-07, 4.3695e-08, ..., 1.0795e-08, 7.6705e-09,\n 9.7655e-09],\n ...,\n [8.6092e-08, 1.0437e-07, 1.4548e-08, ..., 1.1526e-08, 1.9641e-08,\n 1.1937e-08],\n [4.1376e-08, 1.3273e-07, 6.1529e-08, ..., 1.7174e-08, 1.6102e-08,\n 1.5924e-08],\n [1.5480e-07, 1.2023e-07, 2.7463e-08, ..., 2.9722e-08, 4.1152e-08,\n 2.3835e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(2503.)",
12
+ "exp_avg": "tensor([-4.7878e-04, 2.7505e-03, -8.3401e-04, -7.6316e-04, 5.1963e-04,\n -4.6441e-04, -1.1975e-07, -8.5879e-04, -3.5662e-03, -1.9690e-03,\n 2.7726e-03, -1.7377e-03, 5.2589e-04, -3.4086e-04, -6.2771e-04,\n 7.0397e-04, -6.4735e-05, 3.2965e-03, 2.0110e-04, 1.9562e-04,\n 9.5414e-04, -1.6184e-03, -1.5696e-03, -1.1043e-04, 8.7867e-04,\n -1.3555e-04, 2.3696e-03, -3.4293e-04, -8.1708e-04, -1.4749e-03,\n -2.2112e-03, -5.2301e-04, -1.4799e-03, 2.3503e-04, -6.4464e-04,\n 9.6774e-04, -7.2112e-04, 2.1260e-03, -2.6665e-03, 3.9842e-04,\n 9.8644e-04, 1.3929e-03, -2.8522e-03, 2.4945e-03, 3.7981e-06,\n 2.3324e-03, 1.1060e-03, -8.9254e-04, -2.0195e-03, 1.2044e-03,\n 1.5276e-03, 7.4738e-04, 1.4945e-03, -3.3023e-04, 1.7056e-03,\n -1.2024e-03, 4.8215e-04, 7.7149e-04, 1.4045e-03, -8.1999e-04,\n -1.2856e-03, -1.6814e-03, -6.9604e-04, -7.1983e-04, -1.2814e-03,\n 3.5106e-04, 5.4767e-04, -1.5136e-03, 1.4487e-03, 1.7253e-03,\n -5.8637e-04, 5.4499e-04, 1.8072e-03, -6.8887e-04, -1.6182e-03,\n 3.2804e-04, 1.8605e-03, -2.1718e-03, 1.1305e-03, -1.2769e-03,\n -4.3356e-04, -1.4034e-03, -1.0092e-03, 1.0139e-04, 2.1294e-03,\n 9.3070e-04, -1.5541e-03, -5.9492e-04, 2.3853e-04, 2.6404e-04,\n 3.0342e-04, 1.4923e-03, 2.3200e-03, 1.5058e-03, -5.4435e-04,\n -4.3508e-04, 2.8180e-04, 3.3099e-03, -1.0144e-03, 4.2141e-05,\n 4.7777e-04, 6.0388e-05, -8.0865e-04, -5.6062e-04, -4.7274e-04,\n 1.1257e-03, -1.9000e-03, -1.6768e-35, -4.5091e-05, -5.6052e-45,\n -2.1951e-03, 2.4211e-03, -4.1313e-03, 1.2307e-05, -1.1934e-03,\n -1.0189e-04, -1.6572e-03, -6.6679e-04, -3.6069e-03, -5.8354e-04,\n 2.2525e-03, 6.9349e-04, 3.5848e-04, 1.3750e-03, 3.0958e-04,\n 6.9709e-04, 1.3665e-03, 2.4549e-04, 8.3871e-04, -2.4556e-03,\n -1.4010e-03, 5.6186e-04, -1.7510e-04, 1.1813e-03, 3.3609e-04,\n 2.3907e-04, -1.9117e-03, -7.4973e-04, 2.0313e-03, 6.9247e-04,\n -1.4640e-04, -1.2221e-03, -8.1442e-04, -1.9721e-03, -5.0974e-04,\n 4.2607e-04, 7.0398e-04, 8.4776e-05, 4.5227e-04, 2.2835e-03,\n 1.1007e-03, -1.2926e-03, 5.4793e-06, 2.1693e-04, 1.3709e-03,\n 6.7317e-04, 1.7360e-03, -1.3947e-03, -2.0297e-03, 1.5721e-03,\n -4.6667e-04, 7.8313e-05, -4.8968e-04, -2.8471e-04, -1.1692e-04,\n -1.7648e-03, -1.9378e-03, 4.8763e-04, 3.0769e-03, 1.4595e-03,\n -2.1037e-03, -2.1075e-03, 2.4689e-03, -8.3636e-05, -2.0465e-03,\n -1.8343e-04, -1.9552e-03, -3.7526e-04, 5.7658e-04, -1.0390e-03,\n 6.5061e-04, 2.8000e-03, 2.6691e-04, 8.8474e-05, 2.5352e-05,\n 1.7872e-03, -2.2591e-04, -2.7815e-04, 6.8922e-04, 9.2781e-06,\n 7.2551e-04, 1.3777e-03, 1.1229e-04, 8.7446e-04, 5.6899e-05,\n -4.4957e-03, 6.0579e-04, 1.0545e-03, -1.5225e-03, 1.7886e-03,\n -1.2493e-03, 1.8226e-03, -6.6268e-04, -4.3284e-04, 4.3781e-04,\n 4.5641e-04, -1.6842e-04, 4.0179e-04, -9.3928e-05, 1.3320e-03,\n -1.3247e-03, 5.4601e-04, 9.3160e-04, 3.2841e-04, 1.0632e-04,\n -1.0398e-03, -7.1883e-04, 5.7128e-04, 9.2444e-04, -2.6862e-04,\n -4.2083e-04, 3.0075e-03, -1.5966e-04, 1.0769e-04, 5.9123e-04,\n 2.1374e-04, -9.9015e-04, -5.9951e-04, 6.4529e-04, 1.7562e-03,\n 1.7531e-03, 2.5392e-42, -7.5053e-04, -8.2771e-04, -1.6823e-03,\n -3.2252e-04, 1.6466e-03, 9.9949e-04, -1.1185e-03, -1.0782e-03,\n -9.9841e-04, 4.0072e-04, 5.4631e-04, 1.0090e-04, 5.9922e-04,\n -4.8533e-05, 1.4231e-04, -6.2746e-04, 2.0799e-03, -6.7849e-05,\n -7.2535e-04, 2.0635e-05, -4.6825e-06, 1.6843e-03, 1.4584e-03,\n 2.4756e-03, -1.7425e-03, 7.1544e-04, -3.7798e-04, -7.8222e-04,\n -2.5289e-03, -2.1260e-04, 2.9861e-03, 2.3492e-03, 2.3282e-03,\n -1.5363e-03, -1.6558e-04, -1.3080e-03, -9.2431e-04, 3.1722e-03,\n 9.9442e-04, -9.7938e-04, 3.3470e-04, -6.9813e-04, -1.8585e-03,\n -4.4106e-04, 1.8700e-05, 2.5553e-04, -3.6686e-04, 1.2647e-03,\n -2.1711e-03, 1.6956e-03, 8.9856e-04, 4.0939e-04, -9.1483e-04,\n -3.3018e-04, 5.4060e-04, 1.3240e-03, 8.2998e-04, 2.6368e-07,\n -1.2591e-03, 2.0095e-04, -4.9395e-04, 5.6052e-45, 8.7798e-04,\n -7.8407e-04, -1.3377e-03, -2.0391e-04, 2.2027e-03, 3.3116e-05,\n 1.6231e-03, -5.2823e-04, 5.5957e-04, -9.2160e-04, -1.4140e-04,\n -8.6876e-04, -2.0909e-05, -5.4952e-04, -2.4336e-05, -4.4016e-04,\n 5.0172e-04, 1.6874e-03, 8.6907e-04, -4.1331e-04, 1.0461e-03,\n 1.9937e-05, -9.4130e-05, -6.6319e-04, 1.5194e-03, 9.1988e-04,\n 3.8607e-04, -3.8144e-03, -3.8698e-05, -5.5817e-04, -5.9849e-05,\n 1.9832e-03, -2.0490e-03, -3.3524e-03, 1.3431e-03, 3.6200e-04,\n -8.4014e-04, -5.2207e-04, 3.2982e-04, 3.0022e-03, -8.4298e-04,\n 1.9266e-03, -2.9880e-03, -4.9754e-04, -1.8442e-03, -4.5619e-05,\n -5.2859e-04, 9.1412e-04, -1.4226e-03, 1.2713e-04, -2.4005e-03,\n 4.9008e-04, -2.3247e-04, 5.8904e-04, 2.8899e-04, 1.6799e-04,\n 7.3889e-04, 1.6395e-03, -9.1746e-04, 7.2150e-04, 2.4278e-04,\n 6.6197e-04, -9.3113e-04, -3.8974e-04, 1.0247e-03, -1.5146e-04,\n 4.9565e-04, -3.8982e-03, 3.1475e-11, -4.5771e-04, -2.7602e-04,\n -4.1986e-05, 1.5704e-03, 1.9680e-04, 2.1890e-03, -8.3435e-04,\n 1.4206e-03, 3.5844e-04, -1.0210e-04, 7.1876e-04, 1.1406e-03,\n -2.2808e-04, 1.1615e-03, -1.6992e-03, -6.7787e-04, -1.8034e-04,\n -3.7702e-04, 2.9217e-06, 3.9447e-03, 1.0731e-03, 7.8403e-05,\n 3.6876e-04, 4.8517e-04, -1.1180e-04, 2.3083e-04, -7.2684e-04,\n -9.8256e-04, 1.3317e-03, -1.1865e-03, -8.6652e-05, -9.0489e-04,\n 5.9744e-04, -1.6981e-04, -9.2766e-04, 8.2474e-04, 1.9489e-03,\n 3.5591e-04, -5.4302e-04, -4.1369e-04, 1.5668e-03, 1.7137e-03,\n -3.8937e-03, 1.1612e-03, 2.2109e-04, -8.4162e-04, 5.5868e-04,\n -5.2446e-04, -7.3794e-04, 3.1027e-04, 2.0330e-04, 2.2742e-04,\n 8.4912e-04, 2.9959e-04, -8.6896e-04, -2.5599e-03, -5.4761e-04,\n -1.3135e-03, -2.8135e-03, -1.6622e-20, 1.4025e-03, -4.4139e-04,\n -1.1373e-03, 4.8823e-05, 1.1549e-03, 1.8711e-03, -9.1450e-04,\n -1.2245e-03, 8.7384e-04, 2.0960e-04, 1.9823e-03, -1.9255e-05,\n 5.9549e-05, -1.1138e-03, 3.3880e-04, -1.5361e-05, 1.0887e-03,\n -6.4048e-04, -4.5587e-04, 2.8333e-04, -4.1866e-03, 9.3652e-06,\n 7.4739e-04, 1.9322e-03, 1.5960e-03, -1.9341e-04, 8.0459e-04,\n 4.6702e-05, 7.9165e-05, -2.5206e-03, 1.8000e-03, -1.4832e-03,\n 7.0291e-06, -7.5074e-04, -8.4018e-04, 2.9027e-05, -1.0372e-03,\n -7.5999e-04, 2.0816e-03, -5.3162e-04, 5.3399e-04, 8.1738e-05,\n -3.6019e-04, 1.0943e-03, -6.3185e-04, -3.5506e-04, -1.2681e-03,\n -4.9229e-04, -1.2340e-03, -1.0637e-03, 1.3568e-04, 1.4845e-05,\n -5.0710e-04, -2.8397e-09, -1.7498e-05, 5.6052e-45, 1.0012e-03,\n 9.0953e-05, -3.4486e-04, 2.8081e-05, 7.6525e-05, 4.9628e-05,\n -2.8036e-03, 1.6571e-05, 6.2420e-04, 7.6346e-04, 8.8552e-04,\n -8.5684e-04, 6.7389e-04, -1.3292e-03, -1.0304e-03, -3.2030e-04,\n -1.4510e-03, -1.5435e-05, -3.5633e-03, 1.3255e-03, -2.0763e-03,\n -4.7063e-04, 1.8726e-03, 4.4552e-04, 2.7816e-04, 1.6035e-03,\n 5.5139e-04, -1.6511e-03, -1.3361e-03, 9.0719e-04, 6.5243e-04,\n -1.1509e-03, -3.5767e-04, 5.0841e-05, -1.1604e-06, -2.4052e-04,\n 1.7421e-03, -1.4266e-03, 2.7452e-03, 8.8694e-04, 2.0843e-04,\n -2.1598e-03, 7.2962e-04, 1.8600e-03, -1.2327e-03, 5.6052e-45,\n -2.5441e-03, -3.4998e-04, 2.2451e-03, -1.5794e-03, 7.6174e-04,\n 8.8806e-05, -3.3050e-04, -6.2293e-04, 2.7755e-04, 8.4485e-04,\n 3.5043e-03, -1.5868e-04, 1.1906e-03, 2.6681e-03, -1.6871e-04,\n -1.3633e-03, -7.9976e-04, 2.1903e-03, 3.4264e-04, -8.1461e-06,\n -1.6377e-03, -1.4508e-03, -2.0284e-04, 7.1445e-04, -4.4358e-04,\n -3.0418e-04, -2.3190e-03, 1.2670e-03, 4.3852e-04, -1.5022e-03,\n 3.6319e-04, -6.8926e-04, -2.4865e-03, -6.7059e-04, -9.4019e-04,\n 7.4632e-05, -3.5859e-04, -1.2726e-03, -2.9172e-03, 1.2600e-03,\n 9.8134e-05, 2.9102e-03, 1.3916e-03, -2.6224e-04, -5.3877e-04,\n 5.2696e-04, 3.0497e-04, 6.0341e-05, 1.6537e-03, -2.2716e-03,\n -9.3348e-04, 1.3633e-03, 3.0963e-04, -1.3034e-05, 2.5184e-03,\n -1.8401e-03, -3.4917e-04, 3.6892e-04, -1.3496e-03, -1.4872e-04,\n 1.3423e-04, -1.7240e-03, -1.7217e-03, -8.2987e-05, 6.7807e-04,\n -4.1102e-04, 1.2910e-03, -2.0917e-03, -4.7894e-04, -6.0450e-04,\n 5.4681e-04, 6.3599e-04, -1.3093e-03, 5.1682e-04, -4.7978e-04,\n 1.5174e-04, -2.9677e-05, 5.0851e-04, -1.4295e-03, 1.5548e-03,\n 5.8636e-06, -2.9397e-05, 3.0025e-04, 7.9710e-04, 1.3079e-14,\n -5.8638e-04, 7.3063e-05, 1.2996e-03, -1.1460e-03, 1.3937e-03,\n 2.4762e-04, -2.0330e-04, -1.0644e-03, 1.8842e-04, -4.9638e-05,\n -1.0794e-03, 3.4665e-04, -2.5178e-04, 4.7481e-03, 2.5549e-03,\n 2.3799e-03, -8.6023e-04, 3.8976e-04, -9.2954e-04, -1.2888e-03,\n 4.5264e-03, -1.9301e-03, 2.8715e-03, -5.7645e-04, 6.1328e-05,\n 1.1745e-03, -1.6498e-03, 1.1266e-03, 1.3938e-03, -1.4655e-03,\n 3.0397e-03, 2.0163e-03, -5.7742e-04, -1.4894e-03, 1.1213e-03,\n 2.0368e-03, -1.1335e-03, -7.2292e-04, -1.9471e-04, -5.3855e-04,\n 3.6650e-04, -1.8060e-03, 8.2389e-04, -2.4075e-03, 9.9837e-04,\n 7.4902e-04, 6.2822e-05, -2.8187e-03, 4.4623e-04, 1.0113e-03,\n 4.6351e-04, 1.0410e-03, -5.3044e-04, -3.4277e-10, 2.2078e-04,\n 1.2097e-03, -9.5797e-04, 6.0104e-04, 1.6995e-04, -3.2528e-04,\n -8.9603e-04, 3.1922e-04, 2.9517e-03, -7.1716e-04, 6.9754e-04,\n -1.0974e-03, -2.1894e-03, 2.0934e-03, -8.3646e-04, 1.3067e-03,\n -4.8173e-04, -2.3316e-03, 1.9933e-03, -2.0856e-04, 1.0824e-03,\n -2.2123e-03, -4.4072e-04, -7.8874e-04, -4.0379e-04, -7.7426e-04,\n 2.0019e-03, -6.6992e-04, 1.7214e-03, 1.4797e-04, 7.5064e-04,\n -5.0533e-04, 4.6545e-04, -2.6775e-04, 1.1230e-03, 1.7474e-03,\n -2.6213e-03, 1.6124e-05, 2.2407e-03, -1.4919e-03, -9.8461e-04,\n 1.7563e-03, 2.9522e-03, 8.3071e-04, 4.3796e-03, 1.3157e-03,\n -7.9034e-05, 3.1339e-04, 6.0496e-04, 1.8387e-03, -4.8503e-03,\n 5.8089e-04, -6.3306e-06, 1.0225e-04, 3.3726e-04, 5.1730e-05,\n -1.5245e-03, -2.0729e-04, 2.9848e-03, -8.7228e-04, 1.9052e-03,\n 2.8777e-03, 1.4390e-04, 4.0587e-04, -5.5948e-04, 2.3189e-04,\n 6.0060e-04, -4.1934e-04, 1.0869e-03, 1.1133e-03, 1.3791e-03,\n -9.6762e-04, -5.8915e-04, -1.7776e-03, -2.9896e-04, 4.9518e-05,\n -5.3570e-04, -7.8484e-04, -1.9030e-03, 1.1499e-03, 1.8997e-03,\n 2.0270e-03, 3.8706e-03, -6.3760e-04, -2.6387e-03, -4.4649e-04,\n -2.4244e-03, 2.8597e-03, -5.7884e-04, 3.4174e-04, -3.1087e-04,\n 1.5789e-03, 3.9103e-04, -1.6507e-04, 7.0471e-04, 1.7335e-03,\n 4.0497e-04, -6.7239e-04, -3.4950e-03, -8.4331e-04, 5.9205e-04,\n -5.1938e-04, -6.8211e-04, 1.8542e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([1.7168e-05, 3.2508e-05, 1.2290e-05, 3.2189e-05, 3.8602e-05, 1.5212e-05,\n 5.2977e-08, 3.1177e-06, 4.0986e-05, 1.6909e-05, 5.1997e-05, 2.5271e-05,\n 1.3518e-05, 2.3995e-05, 3.3923e-05, 8.8626e-06, 2.6264e-05, 5.6945e-05,\n 1.2860e-05, 2.6817e-05, 2.2610e-05, 4.3695e-05, 2.4228e-05, 2.0547e-05,\n 2.5586e-05, 4.2572e-05, 5.3989e-05, 2.2704e-05, 4.0709e-05, 1.9432e-05,\n 3.4330e-05, 3.6873e-05, 3.0453e-05, 3.9846e-05, 1.7212e-05, 1.1116e-05,\n 2.8722e-05, 1.7365e-05, 3.0875e-05, 1.4077e-05, 6.2178e-06, 1.0095e-05,\n 1.7996e-05, 2.1527e-05, 3.7804e-06, 5.0988e-05, 3.8201e-05, 1.1739e-05,\n 2.2429e-05, 3.7877e-05, 1.3018e-05, 1.8538e-05, 2.2934e-05, 4.3744e-05,\n 3.9004e-05, 3.3718e-05, 2.0236e-05, 1.0880e-05, 1.2744e-05, 3.1797e-05,\n 3.4489e-05, 2.7476e-05, 1.4489e-05, 4.0452e-05, 1.4329e-05, 2.0512e-05,\n 7.0494e-06, 2.3064e-05, 2.7916e-05, 3.8075e-05, 1.6989e-05, 8.6340e-05,\n 2.7718e-05, 3.2942e-05, 3.1701e-05, 9.8551e-06, 1.5383e-05, 3.8146e-05,\n 2.1177e-05, 8.2590e-06, 2.3459e-06, 3.0288e-05, 2.0453e-05, 2.9187e-05,\n 7.0635e-05, 2.1376e-05, 2.9021e-05, 8.4427e-06, 2.5851e-05, 1.3104e-05,\n 2.8596e-05, 3.4559e-05, 2.4807e-05, 4.8371e-05, 3.3862e-05, 1.7037e-05,\n 6.4136e-06, 4.2932e-05, 8.0643e-06, 2.0443e-05, 2.7443e-05, 1.0641e-05,\n 1.1735e-05, 3.9110e-05, 2.3766e-05, 3.0938e-05, 2.5493e-05, 3.1079e-09,\n 6.3971e-06, 1.6879e-09, 4.3830e-05, 1.3862e-05, 6.3836e-05, 2.5072e-05,\n 1.5364e-05, 2.9161e-05, 2.9606e-05, 3.0426e-05, 3.2461e-05, 2.1579e-05,\n 4.1485e-05, 1.6558e-05, 5.6987e-05, 2.1654e-05, 1.6001e-05, 2.4970e-05,\n 2.9253e-05, 2.3178e-06, 1.3157e-05, 4.7144e-05, 1.1451e-05, 6.6849e-06,\n 3.5443e-05, 4.8088e-05, 2.0253e-05, 2.2815e-05, 1.2405e-05, 3.6721e-05,\n 2.7475e-05, 1.8808e-05, 2.8155e-05, 2.6391e-05, 4.7246e-05, 4.5646e-05,\n 2.1089e-06, 3.4347e-05, 1.7244e-05, 2.1564e-05, 2.2049e-05, 3.4465e-05,\n 5.0075e-05, 4.6756e-05, 2.8247e-05, 4.7682e-05, 3.1128e-05, 3.5455e-05,\n 2.1654e-05, 6.6747e-05, 3.2201e-05, 1.8653e-05, 1.5183e-05, 1.6524e-05,\n 4.7058e-05, 1.0165e-05, 8.6590e-06, 3.5599e-05, 3.5426e-05, 3.0977e-05,\n 3.9861e-05, 9.8085e-06, 4.9059e-05, 3.7099e-05, 4.4703e-05, 1.8214e-05,\n 2.4965e-05, 1.6717e-05, 2.7058e-05, 3.6096e-05, 3.3018e-05, 1.0249e-05,\n 3.8998e-05, 4.0957e-05, 2.2361e-05, 1.9477e-05, 3.5889e-05, 2.8853e-05,\n 2.6762e-05, 3.0373e-05, 2.3150e-05, 1.8927e-05, 1.0858e-05, 1.8305e-05,\n 2.8325e-05, 1.1494e-05, 1.2107e-05, 4.7516e-05, 3.0221e-05, 3.3530e-05,\n 2.1149e-05, 1.0487e-05, 3.3224e-05, 2.2671e-05, 2.5273e-05, 5.4973e-05,\n 1.9910e-05, 3.9508e-05, 4.1307e-05, 1.5577e-05, 5.4719e-06, 1.0298e-05,\n 2.3933e-05, 4.0737e-05, 3.1831e-05, 5.6329e-05, 2.2668e-05, 1.5883e-05,\n 2.7542e-05, 3.2544e-05, 4.3499e-05, 1.3929e-05, 1.5597e-05, 3.7345e-05,\n 3.7631e-05, 6.9480e-05, 9.1021e-06, 1.8241e-05, 2.5132e-05, 2.6702e-05,\n 9.5732e-06, 1.0045e-05, 3.2200e-05, 3.4546e-09, 5.3289e-05, 2.7730e-05,\n 1.1991e-05, 3.3759e-05, 1.4324e-05, 2.6130e-05, 4.4499e-05, 5.6411e-06,\n 1.4321e-05, 1.2902e-05, 2.5310e-05, 1.2202e-05, 1.4202e-05, 2.1897e-05,\n 3.0974e-05, 3.4694e-05, 3.5310e-05, 2.2077e-07, 5.5452e-05, 2.2759e-05,\n 6.0048e-05, 1.9438e-05, 2.4761e-05, 3.2973e-05, 4.1277e-05, 3.6795e-05,\n 2.3242e-05, 3.6647e-06, 8.7382e-06, 1.0144e-05, 3.4747e-05, 5.2133e-05,\n 3.7366e-05, 1.6515e-05, 1.9500e-05, 1.6791e-05, 2.2511e-05, 4.4658e-05,\n 2.7505e-05, 2.1688e-05, 2.7087e-05, 6.7114e-05, 1.9673e-05, 2.2487e-05,\n 2.0424e-05, 1.6462e-05, 8.7052e-06, 8.3914e-06, 3.2597e-05, 2.5721e-05,\n 1.5868e-05, 3.8724e-05, 1.5263e-05, 7.8669e-06, 1.7636e-05, 2.2101e-05,\n 2.4280e-05, 7.9506e-09, 3.2886e-05, 2.3974e-05, 1.3132e-05, 1.0705e-08,\n 4.8154e-05, 3.2921e-05, 3.0114e-05, 1.4960e-05, 1.3026e-05, 1.1855e-05,\n 2.0281e-05, 1.9861e-05, 3.3720e-05, 2.1062e-05, 1.9112e-05, 5.4138e-05,\n 1.3812e-05, 3.6310e-05, 1.7115e-05, 5.6255e-05, 4.4596e-05, 2.4685e-05,\n 1.3419e-05, 1.8074e-05, 3.2543e-05, 7.4376e-07, 1.1565e-05, 1.6647e-05,\n 5.4387e-05, 5.4794e-05, 8.2419e-06, 4.1467e-05, 1.6749e-05, 1.6847e-05,\n 1.6468e-05, 2.9111e-05, 4.8142e-05, 6.0340e-05, 1.3086e-05, 4.3989e-05,\n 1.5730e-05, 1.6609e-05, 9.0626e-06, 5.8148e-05, 3.2124e-05, 2.5107e-05,\n 2.6218e-05, 2.7096e-05, 4.8451e-05, 6.2841e-07, 2.0874e-05, 3.6566e-05,\n 2.4120e-05, 1.3950e-05, 1.9796e-05, 2.6017e-05, 2.9495e-05, 2.2360e-05,\n 2.4751e-05, 1.2402e-05, 1.6376e-05, 3.1141e-05, 3.0391e-05, 4.9347e-05,\n 1.0195e-05, 1.3071e-05, 3.3267e-05, 1.4090e-05, 2.7884e-05, 2.0997e-05,\n 7.8332e-06, 3.4159e-05, 5.3972e-08, 3.0956e-05, 2.0365e-05, 2.1794e-05,\n 2.6122e-05, 3.6531e-05, 1.7074e-05, 2.6157e-05, 4.3033e-05, 2.1978e-05,\n 3.6326e-05, 4.0205e-05, 1.2954e-05, 2.5047e-05, 3.4293e-05, 3.2290e-05,\n 1.4927e-05, 2.3086e-05, 1.6620e-05, 1.3168e-06, 3.5307e-05, 2.0713e-05,\n 3.9893e-06, 2.1870e-05, 5.4702e-06, 5.9076e-06, 3.8495e-05, 2.6629e-05,\n 3.9436e-05, 2.4228e-05, 3.9033e-05, 4.0680e-05, 1.3255e-05, 1.9905e-05,\n 2.3711e-05, 3.0396e-05, 2.3938e-05, 2.3623e-05, 6.9610e-05, 1.3701e-05,\n 6.4852e-06, 4.0968e-05, 2.5200e-05, 5.3264e-05, 5.4852e-05, 8.1890e-06,\n 1.1225e-05, 3.5231e-05, 1.2529e-05, 1.8419e-05, 1.2316e-05, 2.7071e-05,\n 4.9051e-05, 3.7614e-06, 4.3106e-05, 4.5730e-05, 2.9988e-05, 6.7054e-06,\n 3.7822e-05, 4.6785e-05, 4.5020e-07, 2.2583e-05, 1.3942e-05, 2.4817e-05,\n 7.2642e-06, 2.6175e-05, 5.2898e-05, 7.7132e-06, 2.0409e-05, 2.3583e-05,\n 1.4439e-05, 2.9539e-05, 1.7806e-05, 6.7402e-05, 2.7921e-05, 1.3901e-05,\n 1.8913e-05, 2.1186e-05, 4.1605e-05, 3.8490e-05, 9.5452e-06, 2.5507e-05,\n 5.0814e-05, 3.2766e-05, 5.3606e-05, 6.4329e-05, 2.4011e-05, 4.2192e-05,\n 3.3828e-05, 4.8624e-05, 4.3250e-05, 3.2282e-05, 2.1121e-05, 1.1858e-05,\n 3.9394e-05, 1.2829e-05, 3.5213e-05, 8.3042e-06, 1.7900e-05, 2.2017e-05,\n 1.6869e-05, 1.3916e-05, 1.6524e-05, 2.8377e-05, 2.5186e-05, 1.7728e-06,\n 3.9522e-06, 1.1531e-05, 1.6316e-05, 2.0686e-05, 1.8525e-05, 1.4828e-05,\n 1.2671e-05, 3.6829e-05, 2.4499e-08, 1.3107e-05, 9.1399e-10, 1.1370e-05,\n 5.7837e-06, 2.1458e-05, 4.8105e-05, 1.3776e-05, 7.4035e-06, 4.0204e-05,\n 3.5324e-05, 1.5988e-05, 2.6083e-05, 3.5000e-05, 2.9813e-05, 7.3808e-06,\n 1.0739e-05, 3.3824e-05, 3.1262e-06, 1.7834e-05, 6.3900e-07, 6.1972e-05,\n 8.9276e-05, 5.3858e-05, 2.3090e-05, 4.6215e-05, 8.0593e-06, 4.1026e-05,\n 4.0424e-05, 1.2745e-05, 2.4098e-05, 1.6540e-05, 5.3914e-05, 1.1206e-05,\n 2.1195e-05, 2.3073e-05, 3.6024e-05, 5.1064e-08, 3.6282e-05, 2.3672e-05,\n 3.0368e-05, 1.9684e-05, 2.0442e-05, 5.0693e-05, 3.8373e-05, 1.1645e-05,\n 3.2110e-05, 3.8715e-05, 8.8194e-10, 3.2857e-05, 4.8557e-06, 1.8494e-05,\n 3.8929e-05, 5.2920e-05, 1.6417e-05, 2.8654e-05, 7.8899e-06, 2.5941e-05,\n 3.3188e-05, 3.4019e-05, 2.0496e-05, 1.6782e-05, 2.1732e-05, 7.6205e-06,\n 4.0595e-05, 1.3624e-05, 1.4291e-05, 2.0649e-05, 4.2576e-05, 2.5485e-05,\n 2.4973e-05, 5.3849e-05, 2.0791e-05, 3.0690e-05, 3.5196e-05, 3.2115e-05,\n 3.7929e-05, 3.3782e-05, 4.0218e-05, 1.4254e-05, 3.6311e-05, 2.2565e-05,\n 4.3019e-05, 2.9650e-05, 1.3277e-05, 2.0024e-05, 2.5611e-05, 3.7780e-05,\n 4.6563e-05, 2.6400e-05, 3.4451e-05, 1.5926e-05, 2.2859e-05, 5.3881e-05,\n 1.5515e-05, 2.5394e-05, 3.4321e-05, 4.2089e-05, 4.2440e-05, 5.5553e-05,\n 2.1725e-05, 1.5476e-05, 2.3741e-05, 3.9560e-05, 1.5738e-05, 1.8560e-05,\n 4.0061e-05, 4.9510e-05, 4.3471e-06, 2.1873e-05, 5.0652e-05, 4.2065e-05,\n 7.7529e-06, 2.5879e-05, 2.9935e-05, 2.9392e-05, 3.8780e-05, 5.7735e-05,\n 2.2019e-05, 3.3127e-05, 2.3394e-05, 5.2872e-05, 1.7404e-05, 2.0012e-05,\n 1.1925e-05, 3.3288e-05, 4.4242e-05, 2.1604e-05, 3.3523e-05, 2.3752e-05,\n 4.2624e-06, 1.7424e-05, 3.4672e-05, 1.1934e-08, 2.9639e-05, 2.7518e-05,\n 2.1007e-05, 2.5172e-05, 1.3818e-05, 2.9477e-05, 2.7101e-05, 1.5671e-05,\n 1.4008e-05, 9.4571e-06, 3.3986e-05, 2.6402e-05, 1.9690e-05, 2.9234e-05,\n 2.3118e-05, 2.4614e-05, 4.7561e-05, 1.4519e-05, 2.2216e-05, 2.6391e-05,\n 4.4590e-05, 3.0181e-05, 2.8309e-05, 1.1621e-05, 2.6191e-05, 2.4139e-05,\n 1.7625e-05, 5.4578e-05, 1.6681e-05, 6.6060e-05, 3.1046e-05, 2.5014e-05,\n 2.8027e-05, 7.8004e-06, 1.8215e-05, 2.3373e-05, 1.6715e-05, 2.4969e-05,\n 2.6097e-05, 2.9917e-05, 2.2414e-05, 2.5616e-05, 3.5329e-05, 2.0291e-05,\n 2.9219e-05, 1.4287e-05, 2.5009e-05, 2.2502e-05, 9.5391e-06, 1.2896e-05,\n 1.1971e-05, 3.2976e-05, 1.0546e-05, 6.1504e-08, 1.4800e-05, 5.3948e-05,\n 3.5142e-05, 2.2360e-05, 1.8578e-05, 1.1044e-05, 2.8156e-05, 2.5939e-05,\n 1.7268e-05, 1.3109e-05, 2.0849e-05, 4.2375e-05, 2.8534e-05, 6.3085e-06,\n 3.1469e-05, 2.7477e-05, 4.2921e-05, 2.3880e-05, 3.2130e-05, 2.6474e-05,\n 2.6882e-05, 4.5295e-05, 1.9932e-05, 2.7628e-05, 1.2394e-05, 1.2880e-05,\n 3.2254e-05, 1.6864e-05, 4.0682e-05, 2.3182e-05, 1.5208e-05, 1.1667e-05,\n 2.7250e-05, 5.4161e-05, 3.0544e-05, 2.4399e-05, 5.8279e-05, 2.8253e-06,\n 2.8182e-05, 3.1331e-05, 1.7580e-05, 1.7276e-05, 4.2509e-05, 3.3349e-05,\n 3.9967e-05, 1.9673e-05, 2.2029e-06, 2.8447e-05, 3.3131e-05, 2.7786e-05,\n 4.0519e-05, 2.0754e-05, 8.5653e-07, 1.6446e-05, 1.1594e-05, 5.9451e-05,\n 1.4219e-05, 3.6766e-05, 5.0904e-05, 2.3947e-05, 9.5497e-06, 3.2336e-05,\n 1.1103e-05, 2.3545e-05, 2.8643e-05, 2.6412e-06, 1.3137e-05, 3.6504e-05,\n 2.8056e-05, 3.2644e-05, 4.7434e-05, 1.4699e-05, 2.4897e-05, 2.2173e-05,\n 1.8505e-05, 6.0227e-05, 1.7172e-05, 6.2031e-05, 5.0747e-05, 3.9224e-05,\n 1.9625e-05, 2.2206e-05, 2.6271e-05, 2.2914e-05, 4.1368e-05, 8.2348e-06,\n 1.5487e-05, 1.6050e-05, 8.3334e-06, 4.4848e-05, 3.4208e-05, 3.6507e-05,\n 3.8886e-05, 1.7724e-05, 1.3892e-05, 2.8791e-05, 2.5772e-05, 4.8166e-05,\n 4.7997e-05, 2.6005e-05, 1.6294e-05, 2.0909e-05, 1.8395e-05, 4.4008e-05],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(2503.)",
17
+ "exp_avg": "tensor([[ 2.6763e-05, 1.0239e-04, -4.8783e-06, ..., 2.7676e-06,\n 8.0765e-05, 2.4209e-05],\n [-7.2761e-05, -3.9099e-06, -4.2357e-06, ..., 3.9698e-05,\n 4.2493e-07, 1.8724e-05],\n [-2.1981e-05, -9.1788e-06, -2.6184e-05, ..., 1.1513e-05,\n -3.8291e-06, 5.2883e-06],\n ...,\n [ 4.2810e-05, 1.3957e-05, -8.1873e-06, ..., 2.9071e-05,\n -6.5630e-06, 2.5189e-05],\n [-1.8403e-05, 5.7988e-05, -2.7174e-05, ..., -3.7992e-06,\n 5.2036e-06, 3.1358e-05],\n [-3.4617e-21, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.0201e-20, 5.6052e-45]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[1.1045e-08, 1.7452e-08, 2.6108e-09, ..., 2.5911e-09, 1.5586e-07,\n 1.8706e-08],\n [1.2919e-08, 9.5561e-09, 1.0580e-08, ..., 7.3328e-09, 5.4165e-10,\n 2.2392e-08],\n [2.7859e-08, 2.8375e-08, 8.5890e-09, ..., 1.6505e-09, 4.9131e-10,\n 2.1507e-09],\n ...,\n [3.4370e-08, 1.7422e-09, 1.2969e-08, ..., 2.2430e-08, 1.8956e-09,\n 4.5947e-08],\n [3.2240e-08, 2.0328e-08, 2.7356e-08, ..., 1.9354e-08, 4.9517e-09,\n 2.5292e-08],\n [2.6401e-11, 1.0182e-11, 5.1535e-11, ..., 1.5518e-11, 8.2701e-16,\n 3.1993e-12]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(2503.)",
22
+ "exp_avg": "tensor([ 2.3981e-04, -1.5002e-04, 1.8043e-04, 3.2351e-04, -9.2736e-04,\n 7.9368e-28, 3.3714e-05, 5.0539e-04, 5.6052e-45, 5.3607e-04,\n 2.1635e-04, 1.7626e-04, 8.5873e-04, -5.7073e-04, 4.8032e-04,\n -7.8082e-05, -2.5890e-04, 6.3953e-05, -6.4924e-04, -3.2229e-04,\n -1.3507e-04, -1.1823e-03, -1.9087e-04, 1.5061e-04, -1.3226e-04,\n -9.1581e-04, 5.6052e-45, -3.4905e-04, 5.6052e-45, -1.4731e-04,\n -3.3002e-04, 8.0723e-05, -3.4182e-05, -1.1136e-03, 5.6052e-45,\n 2.6545e-39, 4.1726e-04, 1.1789e-04, -2.7212e-04, 4.7871e-04,\n -8.2874e-05, 7.4139e-05, -6.0944e-04, -2.5244e-04, 3.0601e-25,\n -2.5638e-04, -8.5335e-04, -5.8678e-04, -1.1548e-03, -8.8364e-04,\n -1.0420e-04, -4.9308e-04, 4.7900e-04, 1.0664e-25, 3.5679e-04,\n -5.5178e-04, -3.6595e-04, 1.9630e-10, 4.1988e-06, -2.4209e-06,\n 2.9989e-04, 1.4668e-04, 8.8944e-04, -2.6335e-05, 3.3243e-04,\n 1.8389e-05, 5.6052e-45, -2.4608e-04, 1.7602e-05, 3.4668e-17,\n -2.5092e-04, -4.2004e-04, -6.3395e-04, 5.7403e-20, -4.3165e-04,\n 3.7055e-04, -2.9327e-04, -1.8453e-04, 4.1569e-05, 3.2624e-04,\n -3.2116e-04, -1.8898e-04, 1.7590e-04, 1.5074e-39, 5.7853e-04,\n -1.4734e-04, -2.9829e-04, 3.5960e-04, -7.8082e-05, 1.4796e-04,\n 6.7324e-04, -3.9643e-04, 3.3187e-04, -4.3384e-08, -1.2676e-05,\n 1.9812e-04, 2.6705e-04, -2.3695e-19, 4.0961e-04, 4.2757e-04,\n 4.2023e-05, 2.5163e-05, 1.0002e-04, 2.7865e-06, -5.5398e-05,\n -6.8052e-04, -4.7907e-04, 2.4690e-04, 7.0518e-04, 2.4011e-04,\n -6.1916e-04, 4.9192e-04, 1.1762e-04, -1.4411e-04, 5.6052e-45,\n -9.9909e-04, -4.9971e-04, -4.0691e-04, 3.6261e-04, -6.6486e-04,\n 5.6052e-45, -3.1207e-05, 4.9940e-05, 1.5604e-04, -8.5947e-05,\n -2.4058e-04, -1.2628e-04, 5.6052e-45, -6.5321e-04, -1.0327e-04,\n -3.8152e-05, -1.7254e-04, 1.4751e-04, 1.6508e-04, 2.3395e-04,\n -2.7863e-04, 5.6052e-45, -3.1332e-05, -1.4057e-04, 2.6402e-04,\n -1.7646e-04, 9.3277e-05, 4.3967e-04, 1.9664e-05, 2.3543e-04,\n 5.6052e-45, 3.5412e-04, 5.6052e-45, 4.3967e-04, -3.2764e-05,\n 3.0509e-05, 5.6052e-45, 1.9376e-04, 1.8835e-10, 1.4667e-04,\n -1.6766e-04, 1.8651e-04, 2.3709e-04, 5.6052e-45, 1.6413e-04,\n -7.4577e-05, -5.3730e-04, -7.9270e-08, 1.5334e-04, 5.6052e-45,\n -2.2907e-05, -5.1043e-04, 7.0636e-05, -2.7156e-04, 5.6052e-45,\n 5.6052e-45, 7.4507e-05, 2.9434e-04, 2.4318e-04, 3.5720e-05,\n 3.4644e-04, -6.1248e-05, 4.2633e-04, 5.6052e-45, 5.6052e-45,\n 7.9258e-04, -2.9025e-06, 3.0862e-04, 5.6052e-45, 5.6052e-45,\n 1.0698e-04, 5.6052e-45, 7.5520e-18, 4.2158e-04, 8.0305e-05,\n -5.5470e-04, 3.3363e-04, 4.2453e-04, 5.5212e-11, -8.6500e-05,\n 4.2000e-04, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -9.7551e-05, 1.0388e-05, 9.7610e-13, 5.8407e-04, -7.9813e-05,\n 2.0503e-16, 2.5204e-04, 1.5700e-04, -4.4656e-05, 7.4600e-06,\n -3.2327e-04, 5.7375e-04, 2.5679e-04, -8.5337e-04, -4.1094e-04,\n -9.4613e-05, -6.3246e-05, -2.2004e-04, 3.8436e-21, 3.3708e-04,\n -7.9121e-04, 3.3301e-05, 5.6052e-45, -7.1090e-04, -9.6558e-04,\n -4.7761e-05, 5.6052e-45, -1.2014e-04, -3.5427e-05, 1.0793e-21,\n -7.4213e-05, 3.5747e-04, -1.1580e-04, 5.6052e-45, 2.6565e-06,\n -1.3437e-04, 2.4378e-04, 3.3249e-19, -1.4081e-04, -1.8831e-04,\n 8.4372e-05, -1.4678e-05, 9.1456e-05, -1.2214e-04, -1.1525e-08,\n 5.6052e-45, -1.6015e-06, 5.6052e-45, 4.5944e-04, 5.6052e-45,\n 3.8065e-04, -4.3998e-05, 8.2885e-08, 5.6052e-45, -2.5886e-04,\n -1.1340e-04, -4.6224e-04, 2.3922e-06, 5.6052e-45, -9.0729e-05,\n 1.9653e-04, 8.2537e-06, 1.9273e-04, 5.6052e-45, 2.8122e-32,\n -3.8285e-04, 2.1834e-04, 5.6052e-45, 5.5489e-04, -8.9472e-06,\n -4.7019e-04, -2.9347e-04, 5.6052e-45, 7.4170e-05, 6.2867e-39,\n 1.2751e-05, -1.8307e-05, 9.7018e-05, -8.2474e-14, -4.1457e-04,\n 1.0180e-04, 3.0438e-04, 5.3994e-05, 3.2232e-04, 5.6052e-45,\n -1.8755e-04, 9.4039e-05, 5.0029e-24, 1.2767e-07, 3.1715e-04,\n 5.6052e-45, -2.6466e-05, -5.2640e-04, -4.1678e-05, 1.3940e-04,\n -6.4768e-05, -1.4895e-04, 3.0345e-04, 9.6379e-09, -3.6969e-04,\n 4.6952e-04, 1.0976e-05, 6.1969e-04, -8.9827e-05, 5.6052e-45,\n 7.1654e-04, 1.3013e-33, 1.0131e-04, -9.0633e-05, -2.2682e-04,\n -1.1418e-04, -1.3405e-04, 1.3029e-04, 5.5923e-04, 2.4226e-04,\n -4.5531e-04, 5.6052e-45, -2.3231e-04, 1.1325e-05, -2.7799e-06,\n 1.0196e-04, -2.7412e-04, 7.5057e-05, -3.4045e-05, 1.7355e-04,\n 5.9467e-05, 5.6052e-45, 4.9146e-04, -1.2073e-04, 3.4790e-28,\n 3.1008e-04, 2.1815e-24, -1.8478e-04, 5.2131e-05, -3.2405e-04,\n -1.6461e-04, -4.9877e-04, 2.3796e-04, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -1.0886e-04, 4.5523e-05, -2.0711e-05, -3.0268e-04,\n -1.4192e-04, -6.9323e-05, 1.7331e-04, -3.2780e-04, 7.0608e-04,\n 2.9884e-04, -4.6936e-05, -3.1821e-04, 3.3024e-04, 5.6052e-45,\n -6.7764e-05, 5.6052e-45, -1.7562e-04, 5.6052e-45, 5.3031e-04,\n -2.1098e-05, 1.7144e-04, -3.2911e-05, 5.6052e-45, -1.7024e-04,\n 5.6052e-45, -1.7897e-04, 1.6348e-04, 1.8645e-04, -7.5732e-06,\n 5.6052e-45, 3.7820e-05, -2.9393e-04, -5.6083e-04, 8.2319e-05,\n -8.0722e-05, 6.7529e-05, 7.3573e-04, -3.9883e-05, -5.7117e-05,\n -8.9818e-04, 2.0292e-04, 3.5101e-04, 5.6522e-04, 6.3429e-24,\n 2.4532e-04, 4.0638e-44, -3.5586e-04, -2.7189e-05, 9.6598e-05,\n 6.0104e-04, -3.1714e-04, -7.4214e-04, 5.6052e-45, 5.7609e-04,\n 1.8008e-04, 5.6052e-45, 3.4872e-04, 5.7455e-05, 5.6052e-45,\n 1.2483e-04, 2.5625e-04, -7.7056e-05, 4.2459e-17, 5.6052e-45,\n 7.1291e-05, 1.9195e-04, 8.8623e-05, 3.2632e-04, 5.7179e-04,\n 1.4421e-36, 2.2577e-04, 5.6052e-45, -1.4540e-04, 1.0232e-04,\n -5.9842e-04, 2.2437e-04, 2.0138e-04, 5.6052e-45, -5.6896e-04,\n 1.9142e-05, 3.3909e-04, -1.7746e-04, -9.3793e-05, 5.6052e-45,\n -2.1542e-04, -7.4152e-05, 4.8533e-04, 1.1746e-35, 1.0299e-04,\n -2.4749e-07, -1.6343e-04, 2.9930e-04, 3.0663e-05, -1.5357e-04,\n 5.6052e-45, -6.0311e-04, 3.0591e-04, -4.3182e-04, 2.8622e-04,\n 1.4071e-04, 2.0759e-04, -4.5078e-04, 5.6052e-45, 2.6972e-04,\n -4.3455e-04, 5.6052e-45, -4.1985e-04, -3.4111e-04, -4.1653e-05,\n 1.3030e-04, 8.6061e-26, 6.4208e-37, -3.3072e-04, 3.9679e-05,\n -1.7891e-04, -6.5360e-04, 5.1090e-04, 5.7658e-06, -1.8020e-04,\n 3.3829e-04, 4.5271e-04, 2.4707e-04, -5.8133e-04, 9.9416e-05,\n 1.4698e-04, 3.8668e-04, -6.4772e-04, 3.7766e-10, 4.9994e-05,\n 3.4454e-04, 5.8866e-14, 5.6052e-45, 9.1848e-05, 5.6052e-45,\n -1.0124e-04, 2.3994e-04, -5.7281e-04, 7.4312e-04, 2.4088e-04,\n -3.8281e-04, 5.6052e-45, 2.6747e-04, -2.1009e-05, -6.8958e-05,\n -1.5362e-04, 2.3017e-04, -1.0281e-04, 1.7963e-04, 5.6052e-45,\n 3.5007e-04, 5.8326e-04, 1.7046e-04, -2.9815e-05, 6.5295e-04,\n -4.6619e-05, -2.1979e-05, -2.9047e-04, -1.3772e-05, 9.8082e-05,\n -5.7692e-04, -1.5735e-04, 3.1285e-04, -4.3402e-04, 2.6573e-04,\n 7.0799e-04, -2.9030e-05, 9.6078e-05, 3.8743e-04, 2.7148e-05,\n -3.5125e-05, 2.1006e-19], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([8.9023e-07, 1.7744e-06, 1.2960e-06, 7.2493e-06, 5.0839e-06, 2.4165e-06,\n 7.4304e-06, 5.9680e-06, 1.1181e-12, 6.1198e-06, 5.5501e-06, 8.3243e-06,\n 8.9331e-06, 4.9830e-06, 4.5383e-06, 7.3767e-06, 2.7825e-06, 4.7370e-06,\n 3.8371e-06, 5.9588e-06, 2.9622e-06, 7.1341e-06, 1.0010e-06, 5.4839e-06,\n 5.3015e-06, 2.4382e-06, 4.2292e-09, 3.2208e-06, 2.3082e-06, 1.1013e-05,\n 4.2668e-06, 2.4283e-06, 6.4181e-07, 6.4108e-06, 2.0149e-14, 3.0294e-06,\n 7.7789e-06, 5.7114e-06, 5.6801e-06, 2.2114e-06, 3.5531e-06, 3.1264e-06,\n 4.6371e-06, 7.7897e-06, 9.7668e-06, 1.7106e-06, 6.3277e-06, 4.8186e-06,\n 4.0222e-06, 6.1400e-06, 5.4792e-07, 4.5259e-06, 6.1674e-06, 9.5229e-07,\n 4.9660e-06, 3.9133e-06, 8.6725e-06, 1.0583e-06, 3.9756e-06, 6.5894e-06,\n 3.2071e-06, 6.4747e-06, 6.0777e-06, 2.5184e-06, 4.9809e-06, 1.6602e-05,\n 5.4597e-06, 3.0500e-06, 7.6316e-06, 8.0577e-06, 2.5482e-06, 3.5121e-06,\n 4.8365e-06, 7.1303e-08, 6.0312e-06, 3.2864e-06, 6.4080e-06, 7.7393e-07,\n 5.5724e-06, 5.4720e-06, 3.1241e-06, 4.3158e-06, 5.0975e-06, 2.1363e-07,\n 6.8405e-06, 8.8328e-06, 3.8569e-06, 5.3672e-06, 8.6771e-06, 3.5014e-06,\n 3.7373e-06, 4.3713e-06, 7.7432e-06, 1.7413e-06, 8.1497e-06, 6.3754e-06,\n 5.3745e-06, 2.7133e-06, 4.0149e-06, 2.8179e-06, 1.5936e-05, 1.3582e-06,\n 1.2504e-07, 1.8523e-06, 3.9739e-08, 7.6875e-06, 4.6750e-06, 5.4799e-07,\n 5.8461e-06, 1.7397e-06, 5.5119e-06, 4.0606e-06, 8.1141e-06, 1.1751e-06,\n 4.4141e-07, 5.9128e-06, 6.8008e-06, 3.5177e-06, 8.0178e-07, 6.4784e-06,\n 1.1050e-05, 4.9123e-06, 3.6863e-06, 6.3224e-06, 2.0362e-06, 5.5276e-06,\n 4.1222e-06, 5.6049e-06, 2.0286e-06, 7.9588e-06, 9.6242e-06, 2.1540e-06,\n 7.6653e-06, 1.2921e-06, 1.9229e-06, 1.0512e-05, 2.0391e-06, 7.0871e-06,\n 6.4242e-07, 4.4822e-06, 5.4775e-06, 3.4068e-06, 6.3590e-06, 5.9508e-06,\n 8.4894e-08, 5.9511e-09, 7.1839e-06, 9.9676e-07, 4.7857e-06, 9.2076e-07,\n 9.4083e-06, 1.4803e-09, 5.2512e-06, 2.0288e-06, 5.8334e-06, 2.2019e-06,\n 7.9528e-06, 5.5705e-06, 4.5680e-05, 1.4312e-06, 4.3952e-07, 3.9656e-06,\n 9.9997e-06, 1.7293e-06, 1.6759e-11, 5.5455e-06, 7.3221e-07, 1.8852e-06,\n 6.0381e-06, 2.4956e-08, 8.9588e-06, 2.9779e-06, 4.1668e-06, 2.1675e-06,\n 2.0048e-06, 4.5429e-06, 3.1498e-07, 2.3504e-06, 5.4727e-06, 3.3527e-13,\n 7.0390e-06, 2.4748e-08, 1.3717e-06, 9.2157e-06, 1.5739e-05, 4.8316e-06,\n 6.4120e-06, 1.8457e-06, 6.0902e-06, 7.4739e-06, 3.8622e-06, 5.2332e-06,\n 9.1813e-07, 2.0357e-07, 5.1930e-06, 2.5958e-06, 1.4208e-07, 2.9009e-10,\n 2.2286e-05, 2.7799e-06, 1.8696e-06, 1.0766e-05, 8.1377e-07, 5.8213e-06,\n 2.2368e-06, 2.8171e-06, 3.8818e-06, 2.0846e-06, 3.8266e-07, 2.1574e-06,\n 1.7559e-06, 1.0547e-05, 2.9040e-06, 4.8402e-06, 5.7714e-06, 2.1936e-06,\n 6.6899e-06, 3.0071e-06, 2.8016e-06, 2.1439e-06, 4.0406e-06, 7.2137e-06,\n 1.2745e-05, 6.2609e-06, 1.0729e-05, 8.1147e-06, 1.2265e-09, 7.5431e-06,\n 6.4678e-06, 2.4194e-06, 3.7165e-06, 5.2013e-06, 3.7558e-06, 3.1389e-07,\n 6.4879e-06, 1.6612e-06, 8.1212e-06, 1.8168e-06, 5.3885e-06, 2.6168e-05,\n 2.7212e-06, 7.9492e-06, 1.1455e-05, 2.1811e-06, 1.2117e-06, 1.9574e-05,\n 4.5299e-06, 1.0967e-05, 4.5330e-06, 2.0672e-11, 4.0822e-06, 4.4846e-06,\n 1.5841e-06, 2.8470e-10, 6.9469e-06, 4.8936e-07, 1.9306e-05, 1.2768e-06,\n 2.8919e-10, 2.5264e-07, 2.9356e-06, 4.7812e-06, 3.0002e-06, 2.8989e-06,\n 2.1878e-07, 1.9116e-06, 3.2765e-06, 2.7240e-06, 1.5631e-06, 1.6376e-06,\n 6.6212e-06, 3.9688e-07, 7.6230e-07, 2.5426e-06, 2.9815e-08, 1.8511e-06,\n 1.9974e-06, 1.0220e-05, 4.5147e-06, 6.1275e-06, 7.6419e-06, 2.0572e-06,\n 9.2683e-06, 9.0386e-06, 1.5036e-05, 4.7007e-06, 3.3621e-06, 3.5397e-07,\n 2.5231e-06, 1.0193e-05, 1.5159e-07, 5.6831e-06, 7.2432e-06, 4.2850e-07,\n 5.8999e-06, 2.2487e-06, 6.3736e-06, 2.3371e-06, 3.5362e-06, 1.6833e-06,\n 5.9077e-06, 5.1473e-06, 1.1540e-06, 3.5269e-06, 1.9043e-07, 4.3656e-06,\n 3.7544e-08, 6.2391e-06, 5.1527e-06, 2.9763e-06, 3.8553e-06, 8.3613e-06,\n 4.1302e-06, 3.0145e-06, 2.7841e-06, 6.5372e-06, 2.2019e-07, 5.8798e-06,\n 3.4843e-06, 1.4470e-05, 2.4837e-06, 3.2812e-06, 4.1956e-06, 6.8628e-06,\n 1.2142e-06, 6.1501e-06, 5.0907e-06, 5.6595e-06, 1.7040e-06, 1.7810e-06,\n 1.0690e-05, 4.9245e-07, 5.4348e-06, 2.7280e-07, 3.4715e-06, 2.4947e-05,\n 8.4912e-06, 7.8957e-06, 9.6027e-06, 5.4388e-15, 3.3345e-06, 6.0050e-06,\n 2.6890e-06, 4.0446e-06, 1.0098e-06, 1.1110e-05, 1.9738e-07, 2.1695e-06,\n 4.3991e-06, 6.2903e-06, 2.3251e-06, 1.2541e-06, 5.5113e-06, 7.9959e-06,\n 7.7002e-07, 1.6966e-06, 5.0587e-08, 4.5671e-06, 2.9671e-07, 2.4032e-05,\n 2.4916e-06, 2.8553e-06, 5.4506e-06, 1.6785e-06, 1.6212e-06, 5.1221e-08,\n 3.2998e-07, 6.1316e-06, 5.0617e-06, 4.7415e-06, 8.1555e-08, 3.3660e-06,\n 1.1409e-05, 4.5502e-06, 9.7607e-06, 1.7623e-06, 1.8284e-06, 1.9515e-06,\n 1.3793e-05, 8.2632e-06, 3.1754e-06, 2.3984e-06, 7.3874e-06, 3.4615e-06,\n 6.7901e-07, 8.5073e-06, 1.1066e-05, 3.9302e-06, 9.4402e-07, 4.7207e-06,\n 4.3610e-06, 4.5242e-06, 2.8473e-05, 6.3536e-07, 3.6787e-06, 5.8249e-06,\n 8.5817e-12, 8.9754e-06, 6.7104e-06, 1.2857e-06, 3.5687e-07, 4.8175e-06,\n 1.9365e-06, 2.4468e-06, 2.8710e-06, 4.9325e-06, 3.5051e-06, 1.8912e-06,\n 5.2613e-06, 6.2098e-06, 5.7345e-06, 2.9848e-06, 6.9064e-07, 1.8785e-06,\n 4.3891e-06, 4.7897e-06, 2.9269e-06, 5.5747e-06, 1.3157e-06, 2.6070e-06,\n 2.3302e-06, 3.7719e-06, 2.4770e-06, 6.3317e-06, 2.0779e-07, 3.9937e-06,\n 1.5516e-06, 1.6392e-06, 1.5451e-09, 4.9567e-06, 1.7723e-06, 6.1309e-06,\n 9.4310e-07, 8.7264e-06, 2.8117e-06, 2.1711e-06, 4.1254e-06, 2.3581e-06,\n 6.7453e-06, 7.1788e-06, 3.6638e-06, 7.0778e-07, 6.0260e-06, 2.0834e-06,\n 9.9543e-06, 3.0504e-06, 2.3729e-07, 1.2125e-05, 1.0354e-06, 4.3904e-06,\n 2.2013e-06, 5.2626e-07, 5.8561e-08, 7.0373e-06, 5.6097e-06, 3.3669e-06,\n 6.2529e-06, 3.6302e-06, 2.5334e-06, 4.0281e-06, 6.2250e-06, 2.2482e-06,\n 2.3486e-06, 3.1123e-06, 7.6519e-08, 4.7055e-07, 1.2895e-06, 4.3423e-06,\n 2.3225e-06, 1.1240e-05, 2.5603e-06, 1.8036e-06, 1.2078e-08, 4.7071e-06,\n 7.7726e-07, 6.5455e-07, 3.9216e-06, 9.7394e-06, 4.9638e-06, 6.6243e-06,\n 1.1323e-05, 1.6370e-06, 1.2484e-06, 5.5804e-06, 1.2004e-05, 6.6402e-07,\n 7.0839e-06, 1.4691e-06, 8.6176e-06, 1.0684e-16, 5.4302e-06, 3.1971e-06,\n 1.0176e-05, 5.3643e-06, 7.5796e-06, 5.2048e-06, 4.8349e-06, 4.3105e-06,\n 1.2163e-06, 1.3037e-07, 4.1997e-06, 5.2643e-06, 5.9049e-06, 3.0942e-06,\n 9.1483e-06, 3.5463e-06, 1.8780e-06, 5.0557e-06, 3.4150e-06, 9.6373e-06,\n 1.8265e-06, 1.7677e-08], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(2503.)",
27
+ "exp_avg": "tensor([[ 1.8758e-05, 1.0331e-04, 8.8835e-06, ..., 9.0118e-06,\n 4.9884e-05, 7.5172e-21],\n [ 7.5948e-06, 2.2908e-05, -8.6651e-06, ..., -8.8837e-05,\n -9.2989e-06, 4.6385e-20],\n [-4.2487e-05, 2.6887e-06, -3.6413e-06, ..., 2.2012e-05,\n 7.5281e-05, 3.0762e-21],\n ...,\n [-2.4396e-06, 6.4449e-07, 1.1507e-05, ..., 6.8297e-05,\n 5.3909e-05, -1.2225e-20],\n [-4.8099e-06, -2.0328e-05, -4.2798e-05, ..., 9.3006e-05,\n -8.0560e-05, 1.1873e-20],\n [-1.2371e-06, -7.0142e-05, 1.2929e-05, ..., 8.3806e-05,\n -4.7206e-05, 2.2624e-20]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[1.3399e-09, 3.9327e-08, 1.5430e-08, ..., 1.1100e-07, 1.2568e-08,\n 1.0126e-09],\n [3.2487e-09, 2.4316e-08, 2.1584e-08, ..., 1.9732e-07, 4.8595e-08,\n 2.7782e-13],\n [2.9556e-09, 2.6906e-08, 5.4845e-08, ..., 1.0103e-07, 6.3486e-08,\n 2.0727e-12],\n ...,\n [5.1040e-09, 2.2566e-08, 2.1483e-08, ..., 2.3727e-07, 2.8943e-08,\n 2.2159e-10],\n [3.0814e-09, 2.9157e-08, 2.2160e-08, ..., 2.0633e-07, 2.9696e-08,\n 1.0328e-12],\n [2.6909e-09, 4.8927e-08, 3.4780e-08, ..., 1.6507e-07, 2.5191e-08,\n 5.6595e-11]], device='cuda:0')"
29
+ }
30
+ },
31
+ "param_groups": [
32
+ {
33
+ "lr": 0.00975530705321762,
34
+ "name": "shared",
35
+ "betas": [
36
+ 0.9,
37
+ 0.999
38
+ ],
39
+ "eps": 1e-08,
40
+ "weight_decay": 1e-05,
41
+ "amsgrad": false,
42
+ "maximize": false,
43
+ "foreach": null,
44
+ "capturable": false,
45
+ "differentiable": false,
46
+ "fused": null,
47
+ "decoupled_weight_decay": true,
48
+ "initial_lr": 0.01,
49
+ "params": [
50
+ 0,
51
+ 1
52
+ ]
53
+ },
54
+ {
55
+ "lr": 0.00975530705321762,
56
+ "name": "scale_256",
57
+ "betas": [
58
+ 0.9,
59
+ 0.999
60
+ ],
61
+ "eps": 1e-08,
62
+ "weight_decay": 1e-05,
63
+ "amsgrad": false,
64
+ "maximize": false,
65
+ "foreach": null,
66
+ "capturable": false,
67
+ "differentiable": false,
68
+ "fused": null,
69
+ "decoupled_weight_decay": true,
70
+ "initial_lr": 0.01,
71
+ "params": [
72
+ 2,
73
+ 3,
74
+ 4
75
+ ]
76
+ },
77
+ {
78
+ "lr": 0.00975530705321762,
79
+ "name": "scale_512",
80
+ "betas": [
81
+ 0.9,
82
+ 0.999
83
+ ],
84
+ "eps": 1e-08,
85
+ "weight_decay": 1e-05,
86
+ "amsgrad": false,
87
+ "maximize": false,
88
+ "foreach": null,
89
+ "capturable": false,
90
+ "differentiable": false,
91
+ "fused": null,
92
+ "decoupled_weight_decay": true,
93
+ "initial_lr": 0.01,
94
+ "params": [
95
+ 5,
96
+ 6,
97
+ 7
98
+ ]
99
+ },
100
+ {
101
+ "lr": 0.00975530705321762,
102
+ "name": "scale_768",
103
+ "betas": [
104
+ 0.9,
105
+ 0.999
106
+ ],
107
+ "eps": 1e-08,
108
+ "weight_decay": 1e-05,
109
+ "amsgrad": false,
110
+ "maximize": false,
111
+ "foreach": null,
112
+ "capturable": false,
113
+ "differentiable": false,
114
+ "fused": null,
115
+ "decoupled_weight_decay": true,
116
+ "initial_lr": 0.01,
117
+ "params": [
118
+ 8,
119
+ 9,
120
+ 10
121
+ ]
122
+ },
123
+ {
124
+ "lr": 0.00975530705321762,
125
+ "name": "scale_1024",
126
+ "betas": [
127
+ 0.9,
128
+ 0.999
129
+ ],
130
+ "eps": 1e-08,
131
+ "weight_decay": 1e-05,
132
+ "amsgrad": false,
133
+ "maximize": false,
134
+ "foreach": null,
135
+ "capturable": false,
136
+ "differentiable": false,
137
+ "fused": null,
138
+ "decoupled_weight_decay": true,
139
+ "initial_lr": 0.01,
140
+ "params": [
141
+ 11,
142
+ 12,
143
+ 13
144
+ ]
145
+ },
146
+ {
147
+ "lr": 0.004877665762479736,
148
+ "name": "fusion",
149
+ "betas": [
150
+ 0.9,
151
+ 0.999
152
+ ],
153
+ "eps": 1e-08,
154
+ "weight_decay": 1e-05,
155
+ "amsgrad": false,
156
+ "maximize": false,
157
+ "foreach": null,
158
+ "capturable": false,
159
+ "differentiable": false,
160
+ "fused": null,
161
+ "decoupled_weight_decay": true,
162
+ "initial_lr": 0.005,
163
+ "params": [
164
+ 14,
165
+ 15,
166
+ 16,
167
+ 17,
168
+ 18,
169
+ 19,
170
+ 20,
171
+ 21,
172
+ 22,
173
+ 23,
174
+ 24,
175
+ 25,
176
+ 26,
177
+ 27,
178
+ 28,
179
+ 29,
180
+ 30,
181
+ 31,
182
+ 32,
183
+ 33,
184
+ 34,
185
+ 35,
186
+ 36,
187
+ 37,
188
+ 38,
189
+ 39,
190
+ 40,
191
+ 41,
192
+ 42,
193
+ 43,
194
+ 44,
195
+ 45,
196
+ 46,
197
+ 47,
198
+ 48,
199
+ 49,
200
+ 50,
201
+ 51,
202
+ 52,
203
+ 53,
204
+ 54,
205
+ 55,
206
+ 56,
207
+ 57,
208
+ 58,
209
+ 59,
210
+ 60,
211
+ 61,
212
+ 62,
213
+ 63,
214
+ 64
215
+ ]
216
+ }
217
+ ]
218
+ },
219
+ "scheduler_state_dict": {
220
+ "T_0": 10,
221
+ "T_i": 10,
222
+ "T_mult": 2,
223
+ "eta_min": 1e-06,
224
+ "T_cur": 1,
225
+ "base_lrs": [
226
+ 0.01,
227
+ 0.01,
228
+ 0.01,
229
+ 0.01,
230
+ 0.01,
231
+ 0.005
232
+ ],
233
+ "last_epoch": 1,
234
+ "_step_count": 0,
235
+ "_is_initial": false,
236
+ "_get_lr_called_within_step": false,
237
+ "_last_lr": [
238
+ 0.00975530705321762,
239
+ 0.00975530705321762,
240
+ 0.00975530705321762,
241
+ 0.00975530705321762,
242
+ 0.00975530705321762,
243
+ 0.004877665762479736
244
+ ]
245
+ },
246
+ "metrics": {
247
+ "best_val_acc": 69.481,
248
+ "best_epoch": 0,
249
+ "scale_accuracies": {
250
+ "256": 69.481
251
+ },
252
+ "training_history": {
253
+ "epochs": [
254
+ 1
255
+ ],
256
+ "train_loss": [
257
+ 3.078377773987499
258
+ ],
259
+ "train_acc": [
260
+ 58.631622575355124
261
+ ],
262
+ "val_acc": [
263
+ 69.481
264
+ ],
265
+ "scale_accs": {
266
+ "256": [
267
+ 69.481
268
+ ]
269
+ },
270
+ "lr": [
271
+ 0.00975530705321762
272
+ ]
273
+ }
274
+ },
275
+ "train_config": {
276
+ "name": "david_training",
277
+ "run_id": "20251012_194945",
278
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
279
+ "model_variant": [
280
+ "clip_vit_b16",
281
+ "clip_vit_laion_b32"
282
+ ],
283
+ "num_classes": 1000,
284
+ "preset": "balanced",
285
+ "custom_config_path": null,
286
+ "num_classes_override": null,
287
+ "use_belly_override": null,
288
+ "belly_expand_override": null,
289
+ "progressive_training_override": true,
290
+ "scale_warmup_epochs_override": {
291
+ "256": 0,
292
+ "512": 2,
293
+ "768": 5,
294
+ "1024": 8
295
+ },
296
+ "num_epochs": 10,
297
+ "batch_size": 1024,
298
+ "learning_rate": 0.01,
299
+ "weight_decay": 1e-05,
300
+ "warmup_epochs": 3,
301
+ "use_rose_loss": true,
302
+ "rose_initial_weight": 0.2,
303
+ "rose_max_weight": 0.8,
304
+ "rose_weight_schedule": "adaptive",
305
+ "use_cayley_loss": true,
306
+ "cayley_weight": 0.01,
307
+ "scale_loss_balance": null,
308
+ "use_mixed_precision": false,
309
+ "gradient_clip": 10.0,
310
+ "scheduler_type": "cosine_restarts",
311
+ "min_lr": 1e-06,
312
+ "freeze_strategy": "never",
313
+ "freeze_threshold": 90.0,
314
+ "unfreeze_on_plateau": true,
315
+ "patience": 10,
316
+ "track_gradients": true,
317
+ "gradient_scale_threshold": 1e-05,
318
+ "gradient_scale_multiplier": 10.0,
319
+ "log_interval": 50,
320
+ "val_interval": 1,
321
+ "save_interval": 5,
322
+ "log_fusion_weights": true,
323
+ "log_loss_components": true,
324
+ "save_format": "safetensors",
325
+ "hf_repo": "AbstractPhil/david-shared-space",
326
+ "upload_to_hub": true,
327
+ "base_dir": "./david_training",
328
+ "num_workers": 10,
329
+ "pin_memory": true,
330
+ "prefetch_factor": 4,
331
+ "persistent_workers": true
332
+ }
333
+ }