Update best_model_acc62.98_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc62.98_metadata.json
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(7508.)",
|
| 7 |
+
"exp_avg": "tensor([[-1.0006e-05, 8.9611e-05, 4.8096e-05, ..., 3.1640e-05,\n 6.7809e-05, -8.1573e-05],\n [ 6.3683e-06, -1.4064e-04, 9.6412e-05, ..., -2.4463e-05,\n -2.8972e-05, -9.8153e-06],\n [ 1.5403e-05, -5.2554e-05, -4.5801e-06, ..., 5.5249e-05,\n -1.2115e-05, 6.3212e-05],\n ...,\n [-4.0520e-05, 6.8393e-05, 3.0781e-05, ..., 8.7892e-05,\n -5.6296e-05, -4.3151e-07],\n [ 3.2869e-05, -2.2452e-05, -7.5354e-05, ..., -5.8524e-05,\n -6.5552e-06, 3.6017e-05],\n [ 3.8461e-05, 7.5431e-07, 2.7953e-05, ..., 3.5851e-05,\n -1.7146e-05, 7.7061e-07]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.2621e-07, 1.5833e-07, 6.3068e-08, ..., 5.5812e-08, 3.6062e-08,\n 2.9481e-08],\n [6.3832e-08, 2.4962e-07, 1.0529e-07, ..., 8.7392e-08, 3.4949e-08,\n 3.6791e-08],\n [3.1242e-08, 3.9715e-08, 2.9970e-08, ..., 9.2356e-08, 1.7077e-08,\n 2.2179e-08],\n ...,\n [5.0213e-08, 4.6903e-07, 6.3792e-08, ..., 1.0590e-07, 2.8996e-08,\n 4.4126e-08],\n [1.0481e-07, 2.0465e-07, 8.1408e-08, ..., 8.6755e-08, 4.0137e-08,\n 4.2237e-08],\n [1.3251e-08, 2.8169e-08, 1.0168e-08, ..., 1.1771e-08, 4.6745e-09,\n 5.5606e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(7508.)",
|
| 12 |
+
"exp_avg": "tensor([-3.2021e-03, -6.4976e-04, 1.0493e-03, 2.3825e-03, -2.8791e-03,\n 1.2193e-03, 5.1972e-04, 3.0670e-04, -3.4991e-03, -2.0894e-03,\n 3.2911e-04, -8.8797e-04, 1.6974e-03, -1.5534e-04, -1.3867e-03,\n -2.7693e-03, 8.9628e-05, 5.5643e-04, -3.8548e-04, 6.4130e-04,\n -1.1150e-03, 9.6633e-05, -4.8025e-04, 1.1814e-03, 9.6854e-04,\n -9.4230e-04, -1.6375e-05, -1.4206e-03, 1.2956e-03, 9.9537e-04,\n 2.2244e-03, -3.0252e-04, 1.6909e-03, -3.2585e-04, 9.7452e-04,\n -8.6069e-04, -6.0499e-04, 1.6785e-03, 4.7130e-04, -1.7485e-03,\n 4.2364e-03, 7.5361e-04, 1.9728e-05, 1.3732e-03, -1.5047e-03,\n -1.0235e-03, -1.3809e-03, 1.9162e-03, 9.6526e-04, -4.9323e-05,\n -2.5610e-03, -1.6251e-04, 8.8856e-04, 1.3270e-03, -3.8498e-03,\n 1.1418e-03, -4.5333e-04, -2.8104e-03, -6.9024e-04, 2.4530e-04,\n 4.7525e-03, -8.2446e-04, -3.0926e-03, 2.0921e-03, -2.5863e-03,\n -1.1181e-03, 3.4032e-04, 1.5976e-03, -6.0620e-04, -1.2781e-03,\n -4.3723e-04, 3.0581e-06, 1.1717e-03, -3.4415e-03, 3.8958e-03,\n 7.0204e-04, -2.5877e-03, -1.4142e-03, -7.8346e-05, -9.5042e-04,\n 7.7724e-04, -4.6990e-04, -1.1006e-04, 7.5659e-04, 4.7302e-04,\n 3.9989e-03, 1.8566e-03, -3.6844e-04, 1.4904e-05, 6.6841e-04,\n -1.6572e-03, -1.1109e-03, 1.0772e-03, 1.6274e-03, -6.8824e-04,\n -6.1859e-03, 2.0439e-03, -3.3062e-03, 6.3555e-04, -1.7712e-04,\n 8.6384e-04, 7.7223e-04, 6.6687e-03, -1.5896e-03, 8.4096e-04,\n 1.2566e-03, 1.3374e-03, -9.6808e-04, 2.5106e-04, 7.4423e-05,\n 9.6189e-04, 1.7431e-03, -2.1478e-04, -1.2516e-04, 2.1294e-03,\n -1.2232e-04, -9.4323e-04, -3.0133e-03, -6.5187e-04, 1.1982e-03,\n 1.6417e-04, -2.8845e-03, -5.2973e-04, -8.2798e-04, 1.0164e-03,\n -1.5531e-03, -1.8591e-05, -8.7308e-04, -2.2278e-03, -1.5658e-03,\n -2.7460e-03, 8.9038e-04, -1.5434e-04, -2.0610e-03, 1.3119e-04,\n -1.3839e-03, -2.8051e-04, 9.0406e-04, -4.4783e-03, -1.3383e-03,\n -5.3517e-04, 3.3850e-04, -1.9414e-03, -4.5883e-04, 1.4230e-03,\n -3.2012e-04, -3.9494e-04, -1.7062e-03, 2.1340e-03, 1.6004e-04,\n 1.1146e-03, -1.2455e-03, -2.0138e-03, -1.1474e-04, -1.4772e-03,\n 1.7095e-04, -1.5975e-03, 2.1529e-03, -9.1436e-04, 6.1176e-04,\n 4.5240e-04, -1.8007e-03, 7.8985e-04, 3.7707e-03, 3.4202e-04,\n -2.3898e-04, 1.5390e-03, -1.2966e-03, -4.8905e-04, 1.9970e-03,\n 6.5958e-04, 4.3355e-04, -6.9873e-04, 1.8902e-03, 2.8620e-03,\n 1.7698e-04, 5.0718e-04, 8.1350e-04, -3.0638e-04, -9.2497e-04,\n -1.7003e-03, 1.4184e-04, 7.3455e-04, -2.6103e-03, 4.1745e-05,\n -9.8781e-04, -9.5756e-05, 4.0090e-04, -1.3550e-03, -1.1241e-03,\n -2.5678e-04, -1.9970e-03, 1.4497e-03, -4.2563e-05, -1.4613e-03,\n -1.4367e-03, 9.0490e-04, 1.9912e-03, -7.5462e-04, 2.8085e-04,\n -1.7173e-03, -3.4478e-03, -2.2326e-04, -9.6230e-04, 2.4987e-03,\n -7.4879e-04, -2.8854e-05, -2.4345e-04, -2.0918e-04, -6.0909e-04,\n 3.1178e-05, -2.1487e-03, 3.2769e-04, -1.7756e-03, -2.9608e-03,\n -2.1021e-04, -7.4160e-05, 5.9304e-04, 1.8386e-03, -8.7311e-04,\n -1.2014e-03, 7.7804e-04, -3.9087e-03, 1.3389e-04, 1.7609e-03,\n -2.8564e-04, -4.8993e-04, -1.4059e-03, -1.6563e-03, 2.3454e-03,\n -2.2784e-03, 1.2909e-03, -1.8988e-03, -7.5436e-04, 1.1604e-04,\n -1.3134e-03, -2.4814e-03, -1.3809e-04, 8.7792e-05, 7.1451e-03,\n -1.0955e-04, 2.0815e-03, 1.3698e-04, -3.3580e-03, -3.9014e-04,\n 1.6592e-03, 1.0028e-03, -8.3625e-05, 1.2090e-03, -6.1506e-04,\n -7.4318e-04, 1.3876e-03, -9.4815e-04, -8.9609e-04, -1.4849e-03,\n 3.0994e-03, -7.7050e-04, 2.2481e-03, -2.4495e-03, 3.7398e-04,\n 7.8029e-04, 1.3056e-03, 5.4630e-04, -1.5081e-03, -1.4507e-03,\n -2.5658e-03, -1.2374e-03, 4.5378e-03, 5.4880e-04, 5.7499e-04,\n -4.8415e-04, 1.9874e-04, -5.2294e-05, -8.6815e-04, -4.9528e-04,\n -2.4084e-03, 2.2023e-03, -4.0962e-04, -1.2425e-03, -5.0014e-04,\n 1.7764e-03, 1.8147e-03, 2.0547e-03, -2.3494e-03, 7.4240e-04,\n 2.4695e-03, -1.1766e-03, 3.0374e-03, -3.5012e-03, 2.0403e-03,\n -1.0220e-03, 8.0015e-04, 2.4235e-03, -2.1361e-03, -2.1304e-03,\n 2.1872e-04, 9.0210e-05, 3.2078e-04, -4.0266e-03, -2.2373e-03,\n 4.4218e-04, 4.2250e-03, 2.0557e-03, 6.5418e-04, -6.9207e-04,\n 6.6856e-04, -1.4619e-03, -2.3914e-03, 2.4992e-03, -1.5623e-03,\n 1.1408e-03, -2.7603e-04, 1.4194e-03, -2.8379e-05, 3.1882e-05,\n -1.0740e-03, 9.3784e-05, 4.5711e-04, 3.6066e-03, -1.3312e-04,\n -2.3436e-03, 2.4293e-03, 9.8213e-04, 5.7334e-04, -2.5797e-04,\n -3.1475e-04, 9.0939e-04, -7.6064e-04, 6.2454e-04, -5.1265e-03,\n -8.8849e-04, 3.9485e-03, -1.9649e-03, 1.4165e-03, -5.2112e-04,\n -1.7595e-05, 1.1999e-03, -1.1809e-03, 1.2798e-03, -1.9554e-03,\n 1.0911e-04, -1.5519e-03, 2.5188e-03, 2.0733e-03, 7.5208e-04,\n 6.9277e-05, -8.9273e-04, -6.3286e-04, 1.7406e-06, -6.2588e-04,\n -2.7007e-03, -2.7706e-04, 1.8008e-03, -1.6660e-03, 1.2465e-03,\n -5.1811e-04, 1.6372e-03, 1.5848e-03, 1.8184e-04, -1.4678e-03,\n 2.0519e-03, 1.8119e-03, 4.7604e-03, 8.8879e-05, 2.9857e-03,\n 1.0157e-03, 2.9937e-03, -2.8235e-03, -2.1972e-03, 4.9976e-05,\n -7.3181e-04, -1.9916e-03, -9.8019e-04, -3.8147e-03, -2.5974e-03,\n 6.3363e-05, -4.1868e-04, 6.4063e-04, -2.0769e-03, 9.1410e-04,\n 4.1216e-04, 3.2839e-03, 2.1310e-03, 2.4164e-03, 2.1787e-04,\n 6.3502e-04, -1.6205e-03, -1.8538e-04, -1.3716e-03, -1.9387e-03,\n 1.4889e-03, -1.9498e-03, -1.3928e-03, 9.2485e-04, -1.2692e-03,\n -3.8860e-03, 2.8588e-03, 2.9490e-03, 1.6057e-03, 4.0465e-03,\n 2.6720e-03, 1.4981e-03, -1.6063e-04, -1.4032e-03, 3.0899e-03,\n 1.5183e-03, -3.0627e-03, 2.4154e-03, -1.7805e-03, -5.7423e-04,\n -8.1053e-04, 3.5515e-03, 4.2605e-03, 1.9282e-03, -1.2253e-03,\n 9.5002e-04, 1.0979e-03, 4.9698e-04, -1.8893e-03, 4.8157e-04,\n 1.5011e-03, -2.0408e-03, -3.4266e-03, 2.1528e-03, -7.9270e-04,\n -2.2584e-03, 2.0510e-03, -1.0200e-03, -1.4577e-03, 9.2523e-04,\n 6.4176e-04, 5.1272e-04, -1.2651e-03, 1.8466e-03, -2.9200e-03,\n -2.4458e-03, 4.5169e-04, 1.2000e-03, -8.1722e-04, -3.0881e-03,\n -1.5456e-03, -3.6707e-03, 4.3510e-04, -6.1535e-03, -1.9233e-03,\n 1.4689e-04, -7.8465e-04, -1.0847e-04, 2.0920e-03, 6.8740e-04,\n -2.6611e-07, -8.1174e-04, 9.4415e-04, -3.4551e-03, 1.0240e-03,\n 2.1412e-03, -1.9274e-03, 4.7412e-04, 1.3534e-04, -2.8342e-03,\n 9.7163e-04, 3.1255e-04, 3.3384e-03, -1.1208e-03, 1.7363e-04,\n 1.4156e-03, -4.7666e-04, -1.8276e-04, 3.2631e-04, 8.5531e-04,\n 1.8560e-04, 3.6754e-03, 3.0162e-03, 1.4613e-03, -7.0093e-04,\n 2.0854e-03, -9.3949e-04, -7.7513e-04, -2.5671e-03, -6.5117e-04,\n 1.6228e-03, 5.4560e-04, 2.2830e-03, 1.1722e-03, 4.2031e-04,\n -2.5319e-03, 2.0454e-03, 1.0742e-03, 5.8247e-04, 2.8471e-03,\n 2.3883e-03, 4.4321e-04, -2.3636e-03, 1.8581e-04, 7.3306e-04,\n -2.4873e-03, 2.4002e-03, -6.0757e-04, 2.1329e-03, 1.3264e-03,\n 1.3674e-03, -5.0020e-04, -2.3408e-04, 2.4754e-03, -1.9107e-04,\n 1.1209e-03, -1.0619e-04, 1.3501e-03, 1.4379e-03, 1.0509e-03,\n 9.1948e-04, -2.4099e-05], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([4.9499e-05, 5.6864e-05, 3.9149e-05, 5.2089e-05, 5.5906e-05, 1.1017e-04,\n 4.4284e-05, 8.3175e-05, 4.9781e-05, 6.7735e-05, 7.8938e-05, 8.7722e-05,\n 1.0677e-04, 2.7441e-05, 8.2297e-05, 7.6714e-05, 5.4205e-05, 6.3467e-05,\n 5.6075e-05, 8.2122e-05, 5.0399e-05, 4.9572e-05, 4.5937e-05, 7.9910e-05,\n 1.1361e-04, 5.9238e-05, 4.4362e-05, 7.0264e-05, 6.7655e-05, 6.2966e-05,\n 2.2886e-05, 3.9275e-05, 6.2392e-05, 5.4806e-05, 2.7939e-05, 7.4218e-05,\n 4.7223e-05, 5.2100e-05, 4.5416e-05, 2.9932e-05, 4.8219e-05, 3.9230e-05,\n 7.3277e-06, 9.2108e-05, 1.4661e-04, 5.5399e-05, 5.7756e-05, 3.6996e-05,\n 6.1953e-05, 1.1358e-04, 1.1641e-04, 3.5998e-05, 5.6211e-05, 6.1173e-05,\n 7.1345e-05, 4.6153e-05, 4.8446e-05, 1.1894e-04, 4.1213e-05, 4.7725e-05,\n 1.3795e-04, 3.6363e-05, 4.9610e-05, 3.9910e-05, 4.8724e-05, 4.1015e-05,\n 4.3858e-05, 4.3911e-05, 7.5438e-05, 4.6273e-05, 6.2408e-05, 4.4092e-05,\n 3.9222e-05, 4.3645e-05, 5.2487e-05, 4.9310e-05, 4.9184e-05, 6.0103e-05,\n 8.0264e-05, 5.1608e-05, 8.3619e-05, 4.1062e-05, 5.2639e-05, 2.7849e-05,\n 5.2235e-05, 5.0461e-05, 7.2979e-05, 4.3165e-05, 3.9729e-05, 5.5630e-05,\n 4.0761e-05, 8.0412e-05, 5.7283e-05, 5.4534e-05, 4.1080e-05, 7.5420e-05,\n 7.1820e-05, 6.3242e-05, 5.5311e-05, 2.6963e-05, 6.8063e-05, 9.0814e-05,\n 8.2647e-05, 5.2127e-05, 7.0334e-05, 7.3800e-05, 6.1159e-05, 1.2478e-04,\n 4.2876e-05, 5.9578e-05, 7.0411e-05, 6.7368e-05, 5.1031e-05, 7.6304e-05,\n 6.2686e-05, 4.5927e-05, 9.6882e-05, 5.7748e-05, 4.8682e-05, 4.5707e-05,\n 2.4960e-05, 9.1879e-05, 6.3218e-05, 8.7424e-05, 4.5690e-05, 5.9908e-05,\n 5.1882e-05, 7.6737e-05, 5.5463e-05, 9.6342e-05, 6.5871e-05, 4.8623e-05,\n 5.7121e-05, 5.1503e-05, 1.4811e-04, 5.0420e-05, 3.8953e-05, 6.6302e-05,\n 5.4886e-05, 6.1432e-05, 2.2375e-05, 4.4010e-05, 6.3147e-05, 1.5070e-04,\n 6.5425e-05, 4.8529e-05, 3.5443e-05, 7.3582e-05, 6.6391e-05, 1.0030e-04,\n 2.6185e-05, 7.2805e-05, 3.1162e-05, 1.0245e-04, 3.8325e-05, 5.6393e-05,\n 8.5379e-05, 7.0225e-05, 4.1879e-05, 4.5795e-05, 5.3571e-05, 2.4298e-05,\n 4.7649e-05, 7.8332e-05, 2.2917e-05, 8.3228e-05, 5.4046e-05, 3.0458e-05,\n 2.5555e-05, 5.3448e-05, 4.7720e-05, 6.1856e-05, 6.6610e-05, 5.2697e-05,\n 5.4515e-05, 3.6465e-05, 1.4190e-04, 4.0139e-05, 3.1160e-05, 5.9755e-05,\n 3.9570e-05, 4.4065e-05, 1.1227e-04, 5.1575e-05, 4.8913e-05, 8.4664e-05,\n 1.2319e-04, 2.8473e-05, 3.3337e-05, 2.3574e-05, 5.0795e-05, 7.5773e-05,\n 1.1535e-04, 7.4148e-05, 3.1334e-05, 2.7795e-05, 4.2494e-05, 5.0241e-05,\n 2.5137e-05, 2.6229e-05, 3.5526e-05, 6.4942e-05, 4.9810e-05, 3.2047e-05,\n 5.9769e-05, 5.7301e-05, 4.5141e-05, 3.2200e-05, 8.1372e-05, 3.6289e-05,\n 4.3090e-05, 7.0014e-05, 5.8943e-05, 3.6262e-05, 9.1782e-05, 3.0746e-05,\n 3.9565e-05, 3.4297e-05, 7.6339e-05, 6.4589e-05, 3.4490e-05, 2.7904e-05,\n 5.0302e-05, 2.4226e-05, 4.0685e-05, 6.4606e-05, 3.8075e-05, 3.0485e-05,\n 2.9782e-05, 6.6466e-05, 5.0324e-05, 3.5285e-05, 6.3082e-05, 5.0674e-05,\n 4.1642e-05, 4.2598e-05, 5.5363e-05, 6.8045e-06, 5.5677e-05, 1.6772e-04,\n 4.4142e-05, 8.5701e-05, 3.7218e-05, 6.3777e-05, 1.0717e-04, 6.0274e-05,\n 6.8055e-05, 4.2455e-05, 7.5220e-05, 4.3038e-05, 7.3249e-05, 4.0047e-05,\n 4.8858e-05, 6.1415e-05, 4.4868e-05, 4.5031e-05, 3.8588e-05, 6.5826e-05,\n 8.5212e-05, 4.0369e-05, 3.8836e-05, 8.0338e-05, 4.4824e-05, 7.1503e-05,\n 4.4401e-05, 3.4756e-05, 3.4303e-05, 6.1995e-05, 3.9163e-05, 6.1249e-05,\n 4.4603e-05, 6.3318e-05, 4.6076e-05, 3.9994e-05, 3.8118e-05, 5.8255e-05,\n 3.4026e-05, 8.8942e-05, 5.2135e-05, 3.0804e-05, 3.2490e-05, 3.8241e-05,\n 6.1625e-05, 6.6864e-05, 3.1849e-05, 7.7909e-05, 4.9890e-05, 5.1610e-05,\n 8.1449e-05, 7.0343e-05, 4.0832e-05, 5.0852e-05, 9.2429e-05, 4.0952e-05,\n 6.8908e-05, 3.3439e-05, 3.9584e-05, 7.2905e-05, 1.5509e-04, 5.1897e-05,\n 3.8484e-05, 4.6425e-05, 5.0535e-05, 4.9561e-05, 9.0346e-05, 2.9088e-05,\n 5.0619e-05, 5.6162e-05, 8.6162e-05, 5.4331e-05, 6.9725e-05, 6.4454e-05,\n 3.4492e-05, 3.6877e-05, 8.0798e-05, 4.8538e-05, 4.0651e-05, 2.5131e-05,\n 4.6343e-05, 6.8888e-05, 1.1580e-04, 6.1251e-05, 5.5178e-05, 5.9142e-05,\n 8.2413e-05, 1.1740e-04, 3.7691e-05, 7.4199e-05, 5.9418e-05, 5.3108e-05,\n 4.8422e-05, 7.7975e-05, 2.8234e-05, 3.2243e-05, 4.2861e-05, 5.0660e-05,\n 7.6935e-05, 3.3014e-05, 6.6951e-05, 6.1879e-05, 4.9192e-05, 5.1309e-05,\n 5.4598e-05, 4.3479e-05, 7.2768e-05, 8.2390e-05, 5.7153e-05, 4.0295e-05,\n 7.0012e-05, 5.0610e-05, 4.1231e-05, 5.0135e-05, 4.9657e-05, 3.6900e-05,\n 5.3021e-05, 3.9124e-05, 4.2672e-05, 4.6893e-05, 3.9181e-05, 3.1519e-05,\n 3.1974e-05, 9.9703e-05, 1.5556e-04, 4.2670e-05, 6.0797e-05, 4.1593e-05,\n 5.1852e-05, 6.8518e-05, 3.4679e-05, 4.0654e-05, 2.9707e-05, 5.1856e-05,\n 3.0166e-05, 5.5346e-05, 9.7831e-05, 4.5797e-05, 2.9861e-05, 2.5837e-05,\n 4.4924e-05, 4.4117e-05, 4.6576e-05, 7.1074e-05, 5.8259e-05, 4.9922e-05,\n 5.8392e-05, 9.0633e-05, 4.4261e-05, 8.2747e-05, 5.3621e-05, 6.2198e-05,\n 6.4563e-05, 5.2017e-05, 4.3062e-05, 5.4315e-05, 6.1732e-05, 4.5052e-05,\n 3.0755e-05, 8.0364e-05, 4.6120e-05, 9.0200e-05, 5.4001e-05, 3.5938e-05,\n 7.1377e-05, 5.3592e-05, 6.0035e-05, 5.0414e-05, 1.0535e-04, 3.2161e-05,\n 3.8298e-05, 6.5551e-05, 7.0205e-05, 6.2438e-05, 6.5661e-05, 5.8308e-05,\n 4.1550e-05, 3.0968e-05, 8.5977e-05, 3.4348e-05, 8.3466e-05, 9.6566e-05,\n 5.4493e-05, 3.6316e-05, 7.5608e-05, 6.5167e-05, 4.5817e-05, 5.0416e-05,\n 9.5530e-05, 5.3349e-05, 5.1474e-05, 5.2912e-05, 3.5081e-05, 3.8215e-05,\n 7.8093e-05, 6.6236e-05, 4.4276e-05, 5.1716e-05, 8.0847e-05, 5.7600e-05,\n 4.1055e-05, 6.1466e-05, 7.9553e-05, 8.4200e-05, 3.8358e-05, 1.9135e-04,\n 5.8450e-05, 5.1064e-05, 4.8463e-05, 5.1694e-05, 4.2670e-05, 5.9912e-05,\n 5.8210e-05, 6.5787e-05, 5.3969e-05, 1.0162e-04, 4.9574e-05, 2.6957e-05,\n 1.1633e-04, 5.5525e-05, 2.1995e-05, 8.0386e-05, 6.6373e-05, 3.6863e-05,\n 4.7769e-05, 3.8934e-05, 1.0252e-04, 4.5359e-05, 4.2177e-05, 6.4791e-05,\n 8.0882e-05, 7.5445e-05, 7.1651e-05, 9.9635e-05, 7.1775e-05, 6.4708e-05,\n 6.4145e-05, 6.1792e-05, 5.7603e-05, 4.7579e-05, 5.3540e-05, 4.1023e-05,\n 3.0013e-05, 3.8210e-05, 7.5209e-05, 5.1413e-05, 4.5437e-05, 5.5891e-05,\n 9.2055e-05, 4.5288e-05, 7.4671e-05, 5.1298e-05, 6.1130e-05, 6.0716e-05,\n 7.7813e-05, 3.5422e-05, 3.0868e-05, 4.8476e-05, 5.5428e-05, 3.0677e-05,\n 4.2631e-05, 3.9798e-05, 6.0358e-05, 3.8582e-05, 5.2908e-05, 4.7937e-05,\n 5.0489e-05, 2.5334e-05, 4.9105e-05, 5.7353e-05, 4.9273e-05, 5.6089e-05,\n 5.9217e-05, 7.7409e-06], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(7508.)",
|
| 17 |
+
"exp_avg": "tensor([-7.2981e-03, -1.1102e-03, 1.3798e-03, 3.8119e-03, -2.4130e-03,\n 1.6875e-03, 6.2445e-04, 8.3649e-04, -5.9614e-03, -5.8383e-03,\n 1.2181e-03, -1.4783e-03, 2.3702e-03, -1.0703e-03, -2.5011e-03,\n -4.9101e-03, 3.2855e-04, 2.5227e-03, -9.2501e-04, 1.0479e-03,\n -3.2224e-03, 4.7224e-04, -6.4093e-04, 1.6850e-03, 4.6915e-04,\n -1.2076e-03, 5.6023e-04, -2.6507e-03, 2.8298e-03, 8.2285e-04,\n 5.2184e-03, -1.0589e-03, 2.8021e-03, -1.1975e-04, 1.9982e-03,\n -8.0132e-04, -8.0496e-04, 2.8595e-03, 4.8401e-04, -3.0225e-03,\n 5.0952e-03, 6.1907e-04, 5.6052e-45, 1.8351e-03, -1.8466e-03,\n -1.4120e-03, -2.6515e-03, 2.0198e-03, 1.7852e-03, -5.2716e-04,\n -2.6018e-03, -5.0145e-04, 1.0129e-03, 1.1324e-03, -5.9105e-03,\n 1.7915e-03, -2.2420e-03, -2.3845e-03, -1.0235e-03, 3.2551e-04,\n 5.4538e-03, -2.1547e-03, -6.0834e-03, 3.3465e-03, -3.5082e-03,\n -2.5396e-03, -6.3244e-04, 2.0932e-03, -1.5100e-03, -1.9807e-03,\n -1.1788e-03, -4.6469e-04, 3.5072e-03, -8.9463e-03, 5.2162e-03,\n 2.3465e-03, -5.4861e-03, -9.4874e-04, -6.9190e-04, -1.6497e-03,\n 1.5750e-03, -1.4978e-03, -3.0368e-05, 1.3853e-03, 1.0475e-03,\n 1.1100e-02, 2.8609e-03, -7.3497e-04, 4.9310e-04, 1.3536e-03,\n -3.7393e-03, -1.6289e-03, 1.8420e-04, 2.8033e-03, -1.2103e-03,\n -9.3427e-03, 3.2095e-03, -5.9772e-03, 4.9265e-05, 8.9189e-04,\n 4.2504e-05, 1.5648e-03, 1.0849e-02, -2.6532e-03, 6.9988e-04,\n 1.3496e-03, 1.2376e-03, -1.7245e-03, 4.0290e-04, -8.2283e-05,\n 1.5907e-03, 2.9368e-03, -1.0508e-03, 2.6068e-04, 2.7303e-03,\n -9.7209e-04, -7.0850e-04, -4.5921e-03, -1.4051e-03, 3.2907e-03,\n 9.6546e-04, -5.1007e-03, -1.7837e-03, -6.3548e-04, 3.2353e-03,\n -3.4762e-03, -1.0179e-04, -2.0025e-03, -6.7249e-03, -3.2735e-03,\n -4.5232e-03, 1.2964e-03, -1.1098e-03, -3.7978e-03, 1.1180e-03,\n -2.0301e-03, -1.0162e-03, 9.4558e-04, -5.1926e-03, -1.1732e-03,\n -8.9613e-04, 6.5995e-04, -2.9241e-03, -3.0756e-04, 2.4450e-03,\n -1.5553e-04, -5.7315e-04, -4.3778e-03, 3.0174e-03, 1.6328e-04,\n 1.6455e-03, -3.4241e-03, -3.4895e-03, -1.1148e-03, -2.5092e-03,\n 4.3342e-04, -2.1248e-03, 4.8565e-03, -2.4742e-03, 1.3729e-03,\n 1.2284e-03, -3.8211e-03, 1.6888e-03, 3.3718e-03, 9.0749e-04,\n 3.2585e-04, 3.3720e-03, -3.2132e-03, -1.1308e-03, 3.8702e-03,\n 4.7193e-04, 8.5910e-04, -1.5840e-03, 4.5024e-03, 5.0391e-03,\n 2.9397e-04, -6.5730e-04, 1.4784e-03, -5.6420e-04, -4.5877e-04,\n -2.5514e-03, -1.5291e-05, -2.5003e-05, -4.9944e-03, -4.0037e-04,\n -8.2129e-04, -3.8897e-05, 1.2663e-03, -2.0887e-03, -1.6047e-03,\n -8.8298e-04, -3.6423e-03, 1.2393e-03, -1.4102e-04, -3.2061e-03,\n -1.7185e-03, 1.4234e-03, 2.6049e-03, -2.1016e-03, -3.2312e-04,\n -3.0161e-03, -4.7939e-03, 2.5412e-04, -1.6247e-03, 1.4853e-03,\n -1.5763e-03, 1.5622e-04, -7.6949e-04, -1.0834e-03, -1.2343e-03,\n 9.1886e-04, -4.1307e-03, 2.5437e-04, -1.8866e-03, -2.9929e-03,\n 8.9232e-05, -4.8040e-04, 3.3262e-03, 3.8625e-03, -2.0094e-03,\n -1.7877e-03, 1.9248e-03, -7.4994e-03, -1.3315e-04, 3.0579e-03,\n -1.6220e-03, -7.7123e-05, -1.8244e-03, -1.9712e-03, 3.8324e-03,\n -3.0310e-03, 1.6301e-03, -5.6287e-03, 1.9858e-04, -4.2054e-04,\n -1.5987e-03, -4.4438e-03, 5.6052e-45, 5.4937e-04, 8.9150e-03,\n -2.4007e-04, 2.9615e-03, 4.8968e-04, -4.4793e-03, 4.0415e-04,\n 3.7039e-03, 1.5256e-03, 2.0479e-04, 1.5182e-03, -1.2169e-03,\n -1.6310e-03, 2.3983e-03, -1.8787e-03, -1.6608e-03, -2.9456e-03,\n 4.8259e-03, -2.4801e-03, 2.5733e-03, -2.3781e-03, 5.8512e-04,\n 2.9813e-03, 1.8081e-03, 8.6614e-04, -1.9832e-03, -3.2228e-03,\n -6.2811e-03, -4.4372e-03, 7.8735e-03, 5.3399e-04, 8.4621e-04,\n -1.6034e-03, 1.1378e-03, 5.8355e-04, -2.1236e-03, 4.2886e-04,\n -3.3481e-03, 4.2615e-03, -1.2846e-03, -1.2530e-03, -1.5096e-03,\n 4.2580e-03, 1.5773e-03, 2.7568e-03, -2.7409e-03, 1.6349e-03,\n 4.5780e-03, -2.5193e-03, 4.1640e-03, -5.4722e-03, 3.6958e-03,\n -1.5132e-03, 7.7310e-04, 2.9769e-03, -2.1579e-03, -3.7326e-03,\n 1.5859e-04, 4.7199e-04, 3.1558e-04, -5.0433e-03, -5.4083e-03,\n 9.3035e-04, 5.9616e-03, 4.5397e-03, 9.8411e-04, -7.4862e-04,\n 1.5944e-03, -1.4019e-03, -4.4024e-03, 6.8347e-03, -3.1067e-03,\n 4.1343e-03, -3.3455e-04, 2.4358e-03, -4.8529e-04, 1.8212e-04,\n -2.9936e-03, -6.5443e-04, 2.8164e-04, 5.7821e-03, -3.9637e-05,\n -3.2446e-03, 5.5378e-03, 5.2044e-04, 1.6615e-03, -9.8868e-04,\n 3.4330e-04, 3.4387e-03, -2.3496e-03, 4.0091e-04, -8.4975e-03,\n -1.7510e-03, 5.5311e-03, -2.3912e-03, 2.0076e-03, -9.1677e-04,\n -5.8869e-04, 1.0058e-03, -1.8544e-03, 1.6078e-03, -3.5265e-03,\n 1.3138e-03, -1.7886e-03, 3.3037e-03, 5.1391e-03, 7.9423e-04,\n 1.7202e-04, -9.0568e-04, -1.9360e-04, -2.8468e-05, -1.9127e-04,\n -4.8773e-03, -1.0339e-03, 2.2365e-03, -2.4092e-03, 2.5231e-03,\n -6.8072e-04, 4.9027e-03, 2.5084e-03, 1.6065e-04, -2.9569e-03,\n 3.5310e-03, 3.9619e-03, 6.6259e-03, 4.3870e-04, 4.6235e-03,\n 1.5937e-03, 2.2502e-03, -6.0555e-03, -3.4662e-03, -1.3998e-04,\n -1.3972e-03, -3.0804e-03, -1.5196e-03, -6.2075e-03, -3.1453e-03,\n -1.8058e-04, -1.1327e-04, 5.6494e-04, -4.5539e-03, 1.5767e-03,\n 9.7985e-04, 5.0001e-03, 3.4494e-03, 4.2432e-03, 1.1254e-03,\n 9.5517e-04, -1.5909e-03, -3.1181e-04, -1.6892e-03, -2.7166e-03,\n 1.9576e-03, -1.7388e-03, -2.0744e-03, 2.4934e-03, -1.9999e-03,\n -4.9002e-03, 5.1158e-03, 7.0728e-03, 1.7056e-03, 7.4018e-03,\n 5.1410e-03, 3.6157e-03, -7.9890e-04, -2.3246e-03, 5.0773e-03,\n 2.1848e-03, -7.3652e-03, 3.1953e-03, -2.9039e-03, -2.8452e-04,\n -1.0793e-03, 2.0236e-03, 9.4128e-03, 3.5058e-03, -2.8462e-03,\n 3.9328e-03, 2.0308e-03, 1.2508e-03, -2.8963e-03, 7.1258e-04,\n 2.1189e-03, -3.7781e-03, -4.5818e-03, 2.9577e-03, -3.6179e-03,\n -5.2332e-03, 1.4769e-03, -1.5476e-03, -3.2472e-03, 1.9353e-04,\n 2.1201e-03, 5.7332e-04, -1.1874e-03, 3.7416e-03, -9.3027e-03,\n -3.7772e-03, 1.0121e-03, 2.6511e-03, -1.1040e-03, -2.7728e-03,\n -2.0410e-03, -4.6899e-03, 1.9341e-03, -1.0561e-02, -2.1854e-03,\n 1.9830e-05, -1.6842e-04, 2.6151e-04, 4.5484e-03, 5.1988e-04,\n 3.7397e-04, -9.6792e-04, 1.7775e-03, -6.7443e-03, 2.5494e-03,\n 3.8317e-03, -3.2353e-03, 8.3578e-04, 1.9354e-04, -3.7453e-03,\n 2.0407e-03, 8.3056e-04, 7.2682e-03, -1.7617e-03, 1.1688e-03,\n 1.9221e-03, -9.5882e-04, -2.2419e-04, 3.6254e-04, 4.8119e-04,\n -5.9398e-04, 5.0185e-03, 4.0869e-03, 1.9381e-03, -2.0034e-03,\n 3.8140e-03, -2.3063e-03, -9.7321e-04, -7.1540e-03, -1.1778e-03,\n 2.2942e-03, 3.1547e-03, 3.3944e-03, 1.0918e-03, 1.3793e-03,\n -1.7782e-03, 2.4860e-03, 2.0293e-03, 5.1454e-04, 3.4704e-03,\n 2.8059e-03, 3.2026e-04, -2.4680e-03, 1.5532e-03, 7.8475e-04,\n -6.8224e-03, 4.7682e-03, -1.3702e-03, 3.5019e-03, 2.8156e-03,\n 2.6630e-03, -1.8433e-03, -9.1589e-04, 4.3735e-03, 7.3313e-04,\n 2.2549e-03, -3.4056e-05, 2.1202e-03, 6.7953e-04, 1.5642e-03,\n 8.0735e-04, 5.6052e-45], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([2.0813e-04, 1.0986e-04, 7.2846e-05, 9.6048e-05, 5.4658e-05, 1.7691e-04,\n 1.1087e-04, 1.3870e-04, 1.5036e-04, 1.9320e-04, 3.2070e-04, 2.2160e-04,\n 1.8507e-04, 1.1633e-04, 1.3375e-04, 1.3296e-04, 9.0565e-05, 1.6385e-04,\n 1.0640e-04, 2.5405e-04, 2.2863e-04, 8.6896e-05, 9.0459e-05, 1.4104e-04,\n 3.2191e-04, 1.2983e-04, 1.4088e-04, 2.1380e-04, 1.9502e-04, 1.2496e-04,\n 1.2284e-04, 2.4366e-04, 1.4599e-04, 1.1077e-04, 7.5393e-05, 7.2187e-05,\n 4.5717e-05, 9.5488e-05, 5.9480e-05, 1.1714e-04, 5.2793e-05, 6.6947e-05,\n 1.0591e-08, 1.4739e-04, 2.8944e-04, 1.1772e-04, 1.0790e-04, 3.1131e-05,\n 5.8072e-04, 1.4912e-04, 2.1812e-04, 4.8650e-05, 8.6989e-05, 1.5833e-04,\n 1.3008e-04, 1.0795e-04, 2.4187e-04, 7.4573e-05, 1.0850e-04, 4.3769e-05,\n 1.5330e-04, 1.2505e-04, 1.8357e-04, 8.6603e-05, 8.0309e-05, 1.1957e-04,\n 1.3460e-04, 8.0649e-05, 1.1547e-04, 6.1860e-05, 1.9087e-04, 1.1288e-04,\n 2.3706e-04, 1.6339e-04, 7.7302e-05, 1.6920e-04, 1.9208e-04, 9.4010e-05,\n 1.4437e-04, 1.5589e-04, 1.8420e-04, 1.0347e-04, 8.0173e-05, 5.4149e-05,\n 1.2300e-04, 3.3341e-04, 1.6604e-04, 1.8526e-04, 5.5282e-05, 5.9375e-04,\n 1.6425e-04, 1.2225e-04, 1.2977e-04, 1.4274e-04, 3.1894e-04, 1.4748e-04,\n 1.6742e-04, 1.9385e-04, 1.1866e-04, 4.3165e-04, 1.4632e-04, 2.0477e-04,\n 1.4403e-04, 1.1314e-04, 1.5216e-04, 2.3360e-04, 1.0608e-04, 2.9529e-04,\n 9.9264e-05, 1.8171e-04, 1.8129e-04, 1.9357e-04, 1.7219e-04, 1.0501e-04,\n 1.1690e-04, 8.1984e-05, 1.1083e-04, 1.6084e-04, 9.6094e-05, 2.1098e-04,\n 1.0613e-04, 1.9411e-04, 3.8140e-04, 2.5989e-04, 3.3346e-04, 6.0359e-04,\n 1.3042e-04, 1.3333e-04, 3.8781e-04, 3.4439e-04, 1.0451e-04, 1.3582e-04,\n 2.1752e-04, 8.6075e-05, 1.9951e-04, 1.0587e-04, 1.8554e-04, 2.1836e-04,\n 8.7244e-05, 6.2890e-05, 9.1907e-05, 1.2046e-04, 1.6134e-04, 1.2735e-04,\n 1.3224e-04, 1.1209e-04, 5.6008e-05, 2.2697e-04, 1.5213e-04, 1.4936e-04,\n 5.1069e-05, 2.5534e-04, 8.5633e-05, 1.3431e-04, 6.6777e-05, 5.3758e-05,\n 2.4554e-04, 2.4531e-04, 2.8272e-04, 1.3080e-04, 2.3080e-04, 8.0543e-05,\n 3.2030e-04, 7.6896e-05, 9.8271e-05, 2.9912e-04, 1.5873e-04, 7.6854e-05,\n 1.1669e-04, 1.0714e-04, 5.1998e-05, 1.3455e-04, 1.3853e-04, 2.0367e-04,\n 1.8587e-04, 7.6036e-05, 1.5449e-04, 8.0170e-05, 8.5134e-05, 1.1420e-04,\n 9.9588e-05, 1.5110e-04, 3.4963e-04, 1.3931e-04, 9.7413e-04, 1.3918e-04,\n 2.7817e-04, 1.3288e-04, 1.1280e-04, 5.3370e-05, 8.3421e-05, 2.0840e-04,\n 1.1163e-04, 5.6389e-05, 1.4677e-04, 7.5827e-05, 1.3826e-04, 9.3019e-05,\n 4.9297e-05, 6.6870e-05, 9.8749e-05, 9.3267e-05, 1.0593e-04, 6.2330e-05,\n 2.8890e-05, 1.0765e-04, 1.1492e-04, 1.0584e-04, 1.8092e-04, 5.3727e-05,\n 1.9354e-04, 1.3834e-04, 2.3923e-05, 6.3013e-05, 1.4680e-04, 1.8148e-04,\n 7.1312e-05, 8.2594e-04, 1.6452e-04, 3.3659e-04, 7.0954e-05, 1.2510e-04,\n 1.5345e-04, 1.1684e-04, 1.5871e-04, 2.2710e-04, 6.9915e-05, 7.4893e-05,\n 4.8833e-05, 9.0588e-05, 1.3051e-04, 7.1946e-05, 4.2722e-04, 1.9538e-04,\n 5.9996e-05, 1.0013e-04, 1.8827e-04, 5.8623e-08, 8.2939e-05, 2.7421e-04,\n 7.9108e-05, 2.2156e-04, 3.6290e-04, 1.0068e-04, 1.3527e-04, 1.8288e-04,\n 8.0927e-05, 1.0237e-04, 1.9975e-04, 9.2596e-05, 1.9641e-04, 1.3290e-04,\n 1.9098e-04, 1.4273e-04, 9.2955e-05, 1.2546e-04, 1.2523e-04, 8.8967e-05,\n 6.9920e-05, 1.3705e-04, 1.3374e-04, 1.5297e-04, 1.3130e-04, 9.5923e-05,\n 1.1512e-04, 2.8149e-04, 1.6369e-04, 1.5356e-04, 1.4116e-04, 2.2116e-04,\n 3.3077e-04, 2.5054e-04, 8.3522e-05, 1.4755e-04, 5.9653e-05, 8.5679e-05,\n 1.1337e-04, 1.3040e-04, 1.2870e-04, 1.2488e-04, 1.3627e-04, 9.1003e-05,\n 9.5510e-05, 1.0446e-04, 1.0327e-04, 1.9267e-04, 1.4187e-04, 6.0065e-05,\n 3.1613e-04, 1.9489e-04, 4.7204e-05, 4.9097e-05, 1.2369e-04, 5.4387e-05,\n 1.7664e-04, 1.1259e-04, 9.7548e-05, 9.0311e-05, 2.1097e-04, 2.0185e-04,\n 8.2597e-05, 9.6257e-05, 2.4828e-04, 1.0286e-04, 1.0102e-04, 1.5550e-04,\n 8.6026e-05, 1.1170e-04, 4.0807e-04, 2.7234e-04, 2.4589e-04, 1.5660e-04,\n 1.2984e-04, 2.0225e-04, 7.9846e-05, 2.6645e-04, 1.4388e-04, 9.1266e-05,\n 1.0745e-04, 1.3492e-04, 3.7817e-04, 2.2604e-04, 1.0356e-04, 1.8706e-04,\n 3.0692e-04, 1.6863e-04, 1.3598e-04, 2.1512e-04, 7.5975e-05, 1.7883e-04,\n 1.4647e-04, 1.4475e-04, 3.8681e-05, 9.0387e-05, 1.2271e-04, 1.0508e-04,\n 4.9974e-05, 9.6355e-05, 1.6265e-04, 1.4829e-04, 1.0670e-04, 7.6437e-05,\n 1.4650e-04, 2.3295e-04, 7.9011e-05, 1.7800e-04, 1.8062e-04, 1.0692e-04,\n 1.0895e-04, 1.3774e-04, 1.3000e-04, 1.4283e-04, 1.0966e-04, 9.7678e-05,\n 1.7825e-04, 1.1377e-04, 2.2813e-04, 7.9091e-05, 5.7010e-05, 8.1298e-05,\n 6.7630e-05, 4.1493e-04, 1.9645e-04, 3.7998e-04, 1.3153e-04, 1.3208e-04,\n 3.8152e-05, 2.5532e-04, 6.8785e-05, 7.0189e-05, 1.0352e-04, 1.4055e-04,\n 2.2133e-04, 1.0049e-04, 9.7187e-05, 1.3505e-04, 8.1745e-05, 8.4345e-05,\n 1.9290e-04, 7.7935e-05, 1.2042e-04, 1.2841e-04, 1.4140e-04, 1.1327e-04,\n 1.6274e-04, 1.5585e-04, 8.0868e-05, 9.7764e-05, 1.0332e-04, 9.8030e-05,\n 6.4789e-05, 7.2455e-05, 1.6934e-04, 4.2155e-04, 8.9076e-05, 6.8089e-05,\n 1.3714e-04, 3.1932e-04, 8.5123e-05, 2.4090e-04, 1.6684e-04, 7.7446e-05,\n 2.3211e-04, 1.1470e-04, 9.9586e-05, 7.4263e-05, 4.0244e-04, 3.6337e-05,\n 1.7495e-04, 1.4593e-04, 1.4799e-04, 3.2524e-05, 2.4915e-04, 1.3151e-04,\n 1.2853e-04, 8.5665e-04, 3.3014e-04, 2.4419e-04, 1.6917e-04, 8.4300e-05,\n 1.2056e-04, 8.7598e-05, 1.4466e-04, 1.8694e-04, 4.2058e-04, 1.8746e-04,\n 9.9835e-05, 1.4718e-04, 1.9036e-04, 1.0281e-04, 1.1000e-04, 1.0403e-04,\n 8.4758e-05, 1.5472e-04, 3.4894e-04, 1.6194e-04, 1.5376e-04, 2.7082e-04,\n 1.1297e-04, 5.2220e-05, 2.6421e-04, 1.2045e-04, 1.3990e-04, 4.2500e-04,\n 1.3233e-04, 1.6311e-04, 2.3935e-04, 9.0663e-05, 1.2105e-04, 1.1357e-04,\n 2.9009e-04, 7.2885e-05, 1.0138e-04, 2.7818e-04, 1.6954e-04, 9.0567e-05,\n 1.8270e-04, 1.1383e-04, 7.9210e-05, 1.1059e-04, 1.6164e-04, 1.1693e-04,\n 1.3381e-04, 1.2043e-04, 1.0012e-04, 1.1863e-04, 9.7584e-05, 1.7214e-04,\n 2.0404e-04, 2.3247e-04, 1.2872e-04, 1.6079e-04, 1.6229e-04, 6.5255e-05,\n 1.8730e-04, 1.5914e-04, 1.7112e-04, 1.8311e-04, 3.9262e-04, 9.7794e-05,\n 5.6062e-05, 8.0137e-04, 1.0452e-04, 5.5168e-05, 1.2154e-04, 4.9907e-05,\n 7.0021e-05, 1.3175e-04, 2.3647e-04, 1.0576e-04, 8.8938e-05, 8.1922e-05,\n 2.7737e-04, 1.0638e-04, 1.0015e-04, 2.7418e-04, 1.1503e-04, 1.7649e-04,\n 9.6538e-05, 1.4929e-04, 1.4130e-04, 1.2061e-04, 8.5443e-05, 9.9968e-05,\n 5.3439e-05, 1.4234e-04, 6.0830e-05, 1.0852e-04, 5.4023e-05, 7.1934e-05,\n 7.8998e-05, 1.3465e-08], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(7508.)",
|
| 22 |
+
"exp_avg": "tensor([-3.7232e-03, -8.6014e-04, 1.0767e-03, 2.2015e-03, -2.0931e-03,\n 1.1363e-03, 4.5342e-04, 2.4032e-04, -3.6283e-03, -2.8467e-03,\n -1.2350e-04, -1.1135e-03, 1.9576e-03, -2.9779e-04, -1.5075e-03,\n -2.2913e-03, 2.0491e-04, 9.4604e-04, -3.4466e-04, 5.3852e-04,\n -1.5505e-03, 3.5147e-04, -3.1406e-04, 9.9496e-04, 8.8302e-04,\n -9.1804e-04, 3.1148e-04, -1.3738e-03, 1.7153e-03, 9.9993e-04,\n 2.4813e-03, -3.7034e-04, 1.8836e-03, -5.6164e-05, 1.1686e-03,\n -1.1057e-03, -4.2881e-04, 1.4218e-03, 2.2688e-04, -1.6992e-03,\n 3.2437e-03, 7.9418e-04, 5.6052e-45, 1.5690e-03, -8.2755e-04,\n -7.2555e-04, -1.6208e-03, 1.2756e-03, 1.1031e-03, 8.1418e-05,\n -2.0748e-03, -1.1192e-04, 5.2192e-04, 1.2172e-03, -4.2026e-03,\n 1.1224e-03, -8.1230e-04, -1.7691e-03, -8.0782e-04, 3.0400e-04,\n 4.0139e-03, -1.0296e-03, -3.9123e-03, 2.2490e-03, -2.5493e-03,\n -1.0883e-03, 3.7688e-04, 1.4979e-03, -1.0791e-03, -1.0840e-03,\n -4.1655e-04, -3.6816e-05, 1.6918e-03, -3.8570e-03, 3.4870e-03,\n 7.2535e-04, -2.1351e-03, -1.0752e-03, -4.0453e-04, -8.3108e-04,\n 1.1070e-03, -3.6738e-04, -1.1520e-05, 8.4358e-04, 4.7564e-04,\n 4.8239e-03, 1.4842e-03, -6.0799e-04, 1.2682e-04, 7.2669e-04,\n -1.6732e-03, -8.8960e-04, 8.5096e-04, 1.6576e-03, -8.9821e-04,\n -5.1546e-03, 2.0989e-03, -3.7646e-03, 6.2901e-04, 1.3738e-05,\n 7.2472e-04, 9.8774e-04, 6.2117e-03, -1.5467e-03, 6.3216e-04,\n 1.2499e-03, 7.4141e-04, -1.0352e-03, 2.3773e-04, 3.6655e-05,\n 1.0808e-03, 1.8826e-03, -6.0220e-05, 2.8652e-05, 2.3660e-03,\n -2.0645e-04, -5.4275e-04, -3.0494e-03, -7.2429e-04, 1.5990e-03,\n 3.4226e-04, -2.8007e-03, -4.6153e-04, -8.2876e-04, 1.3737e-03,\n -2.1040e-03, 3.8742e-06, -7.6994e-04, -2.9697e-03, -1.7069e-03,\n -2.5186e-03, 7.4824e-04, -1.7372e-04, -1.8184e-03, 1.1741e-03,\n -1.3004e-03, -3.5776e-04, 1.0674e-03, -3.3506e-03, -7.3526e-04,\n -6.8487e-04, 4.9481e-04, -1.6727e-03, -4.8932e-04, 1.2908e-03,\n -2.2692e-05, -5.2351e-04, -2.0216e-03, 2.0419e-03, 4.7442e-04,\n 9.1293e-04, -1.7158e-03, -2.1366e-03, 1.0116e-04, -1.4611e-03,\n -1.2066e-05, -1.5530e-03, 2.4082e-03, -9.5579e-04, 8.8568e-04,\n 6.0480e-04, -1.7234e-03, 1.0511e-03, 2.8426e-03, 5.1042e-04,\n 1.5191e-04, 1.7673e-03, -1.3889e-03, -4.8832e-04, 2.3465e-03,\n 5.7239e-04, 6.7635e-04, -6.2796e-04, 2.5620e-03, 2.8774e-03,\n 1.8977e-04, -6.2909e-05, 1.0684e-03, -4.1487e-04, -8.0088e-04,\n -1.3524e-03, 1.0070e-04, 7.8027e-04, -2.7759e-03, 1.4684e-04,\n -4.9947e-04, -8.6212e-05, 1.0211e-03, -1.6026e-03, -9.7278e-04,\n -4.0864e-04, -1.9669e-03, 1.3042e-03, 8.0865e-05, -1.5273e-03,\n -1.2868e-03, 1.1094e-03, 1.8931e-03, -8.9123e-04, 2.0843e-04,\n -1.8693e-03, -2.8531e-03, -1.5921e-04, -1.2032e-03, 1.5425e-03,\n -5.7681e-04, 1.2207e-04, -2.0524e-04, -6.4202e-04, -6.1758e-04,\n 8.1655e-05, -2.0862e-03, 2.7023e-04, -1.7151e-03, -1.5730e-03,\n 1.9448e-04, -1.1165e-04, 1.1400e-03, 1.7921e-03, -7.5510e-04,\n -7.7229e-04, 9.9682e-04, -4.1602e-03, 2.3675e-04, 2.3460e-03,\n -7.1472e-04, -1.0712e-04, -1.1838e-03, -1.3038e-03, 1.9572e-03,\n -1.7740e-03, 1.3395e-03, -2.5000e-03, -4.9848e-04, 2.4646e-04,\n -1.2717e-03, -2.6915e-03, 5.6052e-45, 2.1327e-04, 6.4510e-03,\n -1.6931e-04, 1.9312e-03, 3.3441e-04, -2.7944e-03, -1.3202e-04,\n 1.9259e-03, 9.5661e-04, 9.3637e-05, 8.4079e-04, -3.9494e-04,\n -9.0802e-04, 1.4717e-03, -9.6508e-04, -8.7561e-04, -1.2290e-03,\n 3.3525e-03, -9.9964e-04, 2.0566e-03, -1.8510e-03, 4.9353e-04,\n 1.2626e-03, 1.2110e-03, 2.5610e-04, -1.5661e-03, -1.8056e-03,\n -3.3655e-03, -1.5978e-03, 5.0901e-03, 5.2409e-04, 9.2655e-04,\n -4.2989e-04, 4.5649e-04, -5.3780e-05, -9.4942e-04, -2.2868e-04,\n -1.7417e-03, 3.0391e-03, -7.1679e-04, -1.0020e-03, -6.2785e-04,\n 2.2885e-03, 1.6560e-03, 1.7328e-03, -1.9965e-03, 9.3695e-04,\n 2.5713e-03, -1.3394e-03, 2.2875e-03, -3.3365e-03, 1.9438e-03,\n -8.9516e-04, 5.3209e-04, 2.0504e-03, -1.5133e-03, -1.8807e-03,\n 3.7921e-04, 2.4409e-04, 2.1610e-04, -4.0281e-03, -2.7121e-03,\n 6.6286e-04, 3.6866e-03, 2.0966e-03, 6.8153e-04, -4.6161e-04,\n 7.3856e-04, -1.1301e-03, -2.4575e-03, 3.1103e-03, -1.3410e-03,\n 1.6809e-03, -1.8023e-04, 1.2801e-03, -1.0963e-04, 5.8250e-05,\n -1.1180e-03, -9.9016e-05, 4.3343e-04, 3.3291e-03, -1.9301e-04,\n -2.3891e-03, 3.1106e-03, 7.5410e-04, 4.6571e-04, -1.9323e-04,\n -4.2785e-04, 1.4032e-03, -9.7546e-04, 4.6821e-04, -5.2695e-03,\n -1.0957e-03, 3.2194e-03, -1.6726e-03, 1.4266e-03, -7.0467e-04,\n -9.8435e-05, 9.6675e-04, -9.4010e-04, 1.2745e-03, -1.7908e-03,\n 7.0472e-04, -1.0528e-03, 2.3223e-03, 3.3196e-03, 8.6421e-04,\n 3.2952e-05, -6.4284e-04, -3.2465e-04, 1.7361e-04, -5.2519e-04,\n -2.7988e-03, -1.8102e-04, 1.4812e-03, -1.3197e-03, 1.5273e-03,\n -3.5198e-04, 2.5210e-03, 1.4128e-03, 2.4262e-04, -1.1407e-03,\n 2.0850e-03, 2.4880e-03, 4.5081e-03, 8.8878e-05, 3.0652e-03,\n 8.2514e-04, 1.8674e-03, -2.7320e-03, -1.9466e-03, -1.5535e-05,\n -5.1250e-04, -1.7802e-03, -9.4653e-04, -3.5765e-03, -2.0358e-03,\n 1.0396e-04, -3.9008e-04, 7.6792e-04, -2.0206e-03, 7.2214e-04,\n 6.2985e-04, 3.0148e-03, 2.0630e-03, 2.2111e-03, 5.6542e-04,\n 5.3339e-04, -1.2537e-03, -6.3218e-05, -9.9724e-04, -1.7650e-03,\n 9.8097e-04, -1.2221e-03, -1.4259e-03, 1.1608e-03, -1.3482e-03,\n -2.9864e-03, 2.9808e-03, 3.2895e-03, 1.7603e-03, 4.4686e-03,\n 2.8478e-03, 1.6191e-03, 1.2610e-04, -1.2513e-03, 2.7720e-03,\n 1.5526e-03, -3.5833e-03, 1.9597e-03, -2.3555e-03, -5.0555e-05,\n -5.3231e-04, 2.2241e-03, 5.4641e-03, 2.2120e-03, -1.1836e-03,\n 1.1898e-03, 1.1599e-03, 9.1491e-04, -1.9586e-03, 3.7562e-04,\n 1.2232e-03, -2.0051e-03, -3.0283e-03, 2.0309e-03, -1.0561e-03,\n -2.5883e-03, 1.6125e-03, -9.5281e-04, -1.9499e-03, 8.3750e-04,\n 7.7084e-04, 4.5067e-04, -9.7718e-04, 1.8282e-03, -3.5412e-03,\n -2.4793e-03, 3.6547e-04, 1.5575e-03, -8.9901e-04, -2.2345e-03,\n -1.2818e-03, -2.7992e-03, 9.6216e-04, -6.4289e-03, -1.1587e-03,\n 1.3150e-04, -3.9458e-04, -2.7095e-05, 3.0025e-03, 8.3347e-04,\n 2.3435e-04, -3.2505e-04, 1.3374e-03, -3.9064e-03, 1.2713e-03,\n 2.2117e-03, -1.9871e-03, 4.5017e-04, 5.9877e-05, -2.3596e-03,\n 1.4291e-03, 4.2149e-04, 3.6893e-03, -6.5881e-04, 3.7164e-04,\n 1.3290e-03, -5.7661e-04, 4.4414e-05, 1.9580e-04, 4.2886e-04,\n 2.1068e-04, 3.8972e-03, 2.6467e-03, 1.0898e-03, -1.0002e-03,\n 2.2358e-03, -1.0920e-03, -7.8132e-04, -2.9818e-03, -5.9443e-04,\n 1.6672e-03, 1.1935e-03, 2.0919e-03, 7.2457e-04, 6.3237e-04,\n -2.5508e-03, 1.6498e-03, 1.1375e-03, 9.8574e-04, 2.3671e-03,\n 2.1181e-03, 2.9979e-04, -1.8550e-03, 3.6183e-04, 6.6911e-04,\n -2.9178e-03, 2.4498e-03, -7.3566e-04, 2.2511e-03, 1.6525e-03,\n 1.5039e-03, -5.2307e-04, -2.6507e-04, 2.4098e-03, -1.5293e-04,\n 1.5160e-03, -2.8713e-05, 1.2278e-03, 1.0764e-03, 7.8633e-04,\n 8.3716e-04, 5.6052e-45], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.0519e-05, 4.0453e-05, 3.3421e-05, 3.9581e-05, 2.6795e-05, 7.9315e-05,\n 3.8429e-05, 7.5298e-05, 5.4008e-05, 6.8275e-05, 9.5044e-05, 7.9142e-05,\n 9.0760e-05, 3.6364e-05, 6.5653e-05, 5.6777e-05, 3.5526e-05, 6.5814e-05,\n 4.1433e-05, 7.6470e-05, 7.2636e-05, 3.7199e-05, 3.6267e-05, 5.9765e-05,\n 1.2799e-04, 6.2869e-05, 4.6171e-05, 7.7088e-05, 6.5644e-05, 5.6449e-05,\n 3.1831e-05, 4.5050e-05, 6.0493e-05, 4.3061e-05, 2.8609e-05, 4.5523e-05,\n 2.5895e-05, 3.6626e-05, 2.8871e-05, 3.2991e-05, 2.6129e-05, 3.8367e-05,\n 6.7154e-10, 8.1268e-05, 1.2557e-04, 3.8329e-05, 5.9503e-05, 1.4626e-05,\n 9.3506e-05, 1.1436e-04, 1.1414e-04, 2.3215e-05, 3.5385e-05, 5.1226e-05,\n 7.6962e-05, 3.3168e-05, 5.6447e-05, 4.9075e-05, 4.4514e-05, 2.1398e-05,\n 9.5031e-05, 3.7267e-05, 5.8041e-05, 3.5837e-05, 3.1865e-05, 3.9326e-05,\n 4.2334e-05, 3.0755e-05, 4.9329e-05, 3.1941e-05, 6.0881e-05, 5.4159e-05,\n 4.4804e-05, 4.3302e-05, 3.7515e-05, 4.3770e-05, 4.2183e-05, 4.0471e-05,\n 5.8655e-05, 7.1969e-05, 6.8368e-05, 4.5265e-05, 4.0666e-05, 2.1349e-05,\n 5.5383e-05, 7.2179e-05, 5.7021e-05, 5.6849e-05, 2.5159e-05, 9.1536e-05,\n 4.7199e-05, 5.0787e-05, 6.6201e-05, 4.9172e-05, 7.0395e-05, 5.0299e-05,\n 6.8893e-05, 6.7362e-05, 5.3472e-05, 5.8722e-05, 4.6682e-05, 6.9164e-05,\n 6.3318e-05, 4.6387e-05, 5.7884e-05, 9.3226e-05, 4.6484e-05, 1.3955e-04,\n 3.4193e-05, 4.4454e-05, 6.8894e-05, 6.8222e-05, 5.5379e-05, 4.7134e-05,\n 5.8438e-05, 3.6904e-05, 6.0372e-05, 6.8418e-05, 3.4596e-05, 6.0200e-05,\n 3.2733e-05, 7.4385e-05, 8.2674e-05, 1.1818e-04, 8.2620e-05, 9.9621e-05,\n 4.6444e-05, 6.1035e-05, 9.6446e-05, 1.3955e-04, 5.4046e-05, 3.7187e-05,\n 7.1747e-05, 3.4643e-05, 1.4061e-04, 3.9431e-05, 8.3114e-05, 6.3309e-05,\n 4.0028e-05, 2.8753e-05, 2.9226e-05, 4.3946e-05, 5.4332e-05, 8.9085e-05,\n 4.6629e-05, 4.5188e-05, 2.4783e-05, 7.8428e-05, 5.1470e-05, 8.7600e-05,\n 2.0529e-05, 8.5065e-05, 3.0972e-05, 7.3413e-05, 3.0173e-05, 3.7456e-05,\n 1.0168e-04, 7.1616e-05, 5.0771e-05, 5.0999e-05, 6.0605e-05, 2.2465e-05,\n 7.8561e-05, 3.8659e-05, 2.6053e-05, 9.8842e-05, 5.1143e-05, 2.7273e-05,\n 2.6982e-05, 4.5917e-05, 2.8772e-05, 5.8144e-05, 4.7039e-05, 7.7167e-05,\n 5.9495e-05, 2.5075e-05, 1.0558e-04, 3.6275e-05, 2.7867e-05, 6.2975e-05,\n 4.1491e-05, 4.8995e-05, 1.3107e-04, 5.2690e-05, 1.3834e-04, 6.5138e-05,\n 1.3232e-04, 4.9674e-05, 4.6378e-05, 1.8300e-05, 3.7934e-05, 5.9323e-05,\n 7.3661e-05, 3.5153e-05, 3.4449e-05, 2.3475e-05, 5.7399e-05, 4.6195e-05,\n 1.5501e-05, 2.2837e-05, 4.0029e-05, 4.6188e-05, 4.3976e-05, 2.4987e-05,\n 2.1242e-05, 4.0400e-05, 3.4671e-05, 3.0387e-05, 6.8646e-05, 2.6392e-05,\n 5.4791e-05, 6.0078e-05, 2.2330e-05, 2.9598e-05, 5.9543e-05, 4.1866e-05,\n 3.0818e-05, 8.2460e-05, 4.9534e-05, 7.7131e-05, 2.8346e-05, 4.7689e-05,\n 4.6711e-05, 3.0161e-05, 4.6944e-05, 7.8578e-05, 2.7320e-05, 2.8277e-05,\n 2.0855e-05, 3.7647e-05, 3.3545e-05, 3.1691e-05, 9.9361e-05, 6.2731e-05,\n 2.7992e-05, 3.9230e-05, 5.2793e-05, 2.0527e-09, 3.8441e-05, 1.3725e-04,\n 3.5605e-05, 8.8012e-05, 6.4693e-05, 5.1730e-05, 8.1903e-05, 6.5335e-05,\n 4.7508e-05, 4.0032e-05, 6.7098e-05, 3.5856e-05, 8.7394e-05, 3.7670e-05,\n 5.9139e-05, 6.0807e-05, 3.2047e-05, 4.7432e-05, 5.2489e-05, 4.8681e-05,\n 4.1388e-05, 5.0842e-05, 3.7829e-05, 6.0288e-05, 4.3245e-05, 5.4649e-05,\n 4.2966e-05, 6.4823e-05, 3.7697e-05, 6.2121e-05, 4.4672e-05, 6.5180e-05,\n 7.9499e-05, 7.2028e-05, 3.6030e-05, 5.5822e-05, 2.9479e-05, 3.3114e-05,\n 4.8973e-05, 6.8259e-05, 4.5368e-05, 3.9630e-05, 4.5156e-05, 3.3158e-05,\n 3.7870e-05, 5.5513e-05, 3.5918e-05, 6.8757e-05, 4.7518e-05, 2.5812e-05,\n 8.8536e-05, 5.9693e-05, 2.2948e-05, 2.4843e-05, 5.8920e-05, 2.7867e-05,\n 5.4171e-05, 3.2485e-05, 4.3047e-05, 5.3443e-05, 1.4493e-04, 5.8936e-05,\n 3.2528e-05, 3.5412e-05, 5.4183e-05, 3.9718e-05, 5.9119e-05, 3.8383e-05,\n 3.3181e-05, 4.9705e-05, 1.1581e-04, 6.5600e-05, 7.6755e-05, 5.1038e-05,\n 3.5175e-05, 3.8851e-05, 4.6168e-05, 5.7596e-05, 4.8298e-05, 3.2299e-05,\n 3.5701e-05, 6.5479e-05, 1.4927e-04, 8.7454e-05, 4.6949e-05, 6.4337e-05,\n 9.7395e-05, 9.2900e-05, 6.2601e-05, 8.9937e-05, 4.2104e-05, 5.1357e-05,\n 6.6300e-05, 5.6071e-05, 1.7324e-05, 3.7672e-05, 4.3888e-05, 4.3265e-05,\n 2.9065e-05, 2.5185e-05, 6.3489e-05, 4.8041e-05, 4.6252e-05, 3.2557e-05,\n 4.8873e-05, 8.4891e-05, 5.0382e-05, 9.2832e-05, 7.9617e-05, 4.4191e-05,\n 5.5406e-05, 6.2636e-05, 4.7504e-05, 4.4886e-05, 5.2061e-05, 2.7404e-05,\n 5.3634e-05, 4.1180e-05, 6.5773e-05, 2.8337e-05, 2.9174e-05, 2.3117e-05,\n 2.5583e-05, 1.2557e-04, 9.9739e-05, 7.7685e-05, 5.1871e-05, 5.3271e-05,\n 2.0392e-05, 7.4435e-05, 2.6198e-05, 2.5407e-05, 3.3510e-05, 5.3142e-05,\n 5.3824e-05, 4.8809e-05, 5.4503e-05, 3.8484e-05, 3.5575e-05, 2.7932e-05,\n 5.4554e-05, 3.2787e-05, 4.0853e-05, 5.6860e-05, 5.4416e-05, 3.5260e-05,\n 6.1030e-05, 9.2072e-05, 3.8842e-05, 5.1100e-05, 4.0214e-05, 4.4603e-05,\n 3.8317e-05, 3.2070e-05, 6.1222e-05, 9.3504e-05, 4.2959e-05, 2.7469e-05,\n 3.4666e-05, 8.7372e-05, 4.1634e-05, 9.4878e-05, 5.2274e-05, 3.0315e-05,\n 8.2885e-05, 3.9062e-05, 4.3449e-05, 3.4514e-05, 1.0946e-04, 1.8146e-05,\n 5.6665e-05, 6.8492e-05, 5.2962e-05, 2.6389e-05, 1.0433e-04, 6.3673e-05,\n 3.7136e-05, 6.8683e-05, 1.0416e-04, 8.5012e-05, 7.0288e-05, 4.7195e-05,\n 4.7534e-05, 3.2434e-05, 6.2268e-05, 6.0329e-05, 7.9424e-05, 4.9092e-05,\n 5.7668e-05, 4.7754e-05, 6.0030e-05, 4.1174e-05, 3.2350e-05, 4.1075e-05,\n 4.5867e-05, 5.7507e-05, 6.6493e-05, 5.5440e-05, 6.1791e-05, 7.1921e-05,\n 3.8136e-05, 2.8032e-05, 8.9254e-05, 5.3755e-05, 4.7641e-05, 2.2526e-04,\n 6.0858e-05, 4.9396e-05, 7.6845e-05, 4.5940e-05, 5.0263e-05, 4.6486e-05,\n 7.4183e-05, 3.7527e-05, 4.5445e-05, 1.3429e-04, 6.2385e-05, 2.6467e-05,\n 9.7979e-05, 4.2812e-05, 2.1814e-05, 5.2340e-05, 7.1938e-05, 4.5420e-05,\n 4.1795e-05, 4.6619e-05, 4.7876e-05, 3.5685e-05, 3.4433e-05, 6.1706e-05,\n 6.2399e-05, 7.1386e-05, 5.4164e-05, 1.1764e-04, 6.3176e-05, 4.8276e-05,\n 8.6660e-05, 5.2711e-05, 6.3247e-05, 5.4824e-05, 7.9563e-05, 3.2908e-05,\n 2.7924e-05, 9.6797e-05, 5.0935e-05, 2.5059e-05, 4.1660e-05, 4.2101e-05,\n 4.4612e-05, 4.4213e-05, 7.3876e-05, 4.6343e-05, 4.2784e-05, 3.6202e-05,\n 7.2430e-05, 4.8798e-05, 3.4127e-05, 6.5459e-05, 4.7297e-05, 5.5066e-05,\n 3.9205e-05, 4.3738e-05, 4.5762e-05, 3.8962e-05, 3.4701e-05, 3.8423e-05,\n 2.7565e-05, 4.2046e-05, 3.1800e-05, 4.6602e-05, 3.7761e-05, 4.1980e-05,\n 3.4989e-05, 3.6159e-10], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(7508.)",
|
| 27 |
+
"exp_avg": "tensor([[-2.1593e-06, 2.9795e-05, -7.2643e-06, ..., 8.5620e-06,\n 5.9513e-08, 5.6052e-45],\n [-5.5597e-06, 3.6261e-05, 3.5267e-05, ..., -2.7700e-05,\n -3.9419e-05, 5.6052e-45],\n [-6.0594e-06, -2.1937e-05, -1.6172e-05, ..., 4.7446e-05,\n -9.1511e-06, 5.6052e-45],\n ...,\n [ 2.1611e-07, -1.6425e-05, -3.9018e-06, ..., 3.6738e-05,\n 5.2305e-06, -5.6052e-45],\n [ 4.4870e-05, 1.0251e-05, -2.1434e-05, ..., 5.1251e-05,\n 2.8268e-05, -5.6052e-45],\n [ 2.1307e-05, -1.1087e-05, 4.6062e-05, ..., 2.6723e-05,\n -1.0669e-05, -5.6052e-45]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[5.9263e-09, 1.5650e-08, 5.7719e-09, ..., 1.0811e-08, 3.5560e-09,\n 1.4131e-10],\n [1.0431e-08, 2.2757e-08, 1.4397e-08, ..., 3.2659e-08, 1.8762e-08,\n 4.6158e-11],\n [1.0056e-08, 1.4346e-08, 1.7718e-08, ..., 2.7154e-08, 6.3785e-09,\n 4.5150e-11],\n ...,\n [1.1415e-08, 3.2217e-08, 1.3014e-08, ..., 3.5865e-08, 8.6576e-09,\n 1.0454e-10],\n [1.1186e-08, 4.3631e-08, 1.1834e-08, ..., 2.3150e-08, 1.7769e-08,\n 1.3946e-10],\n [1.4907e-08, 1.9561e-08, 1.0652e-08, ..., 2.8717e-08, 1.3492e-08,\n 7.2357e-11]], device='cuda:0')"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"param_groups": [
|
| 32 |
+
{
|
| 33 |
+
"lr": 0.00904518046337755,
|
| 34 |
+
"name": "shared",
|
| 35 |
+
"betas": [
|
| 36 |
+
0.9,
|
| 37 |
+
0.999
|
| 38 |
+
],
|
| 39 |
+
"eps": 1e-08,
|
| 40 |
+
"weight_decay": 1e-05,
|
| 41 |
+
"amsgrad": false,
|
| 42 |
+
"maximize": false,
|
| 43 |
+
"foreach": null,
|
| 44 |
+
"capturable": false,
|
| 45 |
+
"differentiable": false,
|
| 46 |
+
"fused": null,
|
| 47 |
+
"decoupled_weight_decay": true,
|
| 48 |
+
"initial_lr": 0.01,
|
| 49 |
+
"params": [
|
| 50 |
+
0,
|
| 51 |
+
1,
|
| 52 |
+
2,
|
| 53 |
+
3
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"lr": 0.00904518046337755,
|
| 58 |
+
"name": "scale_256",
|
| 59 |
+
"betas": [
|
| 60 |
+
0.9,
|
| 61 |
+
0.999
|
| 62 |
+
],
|
| 63 |
+
"eps": 1e-08,
|
| 64 |
+
"weight_decay": 1e-05,
|
| 65 |
+
"amsgrad": false,
|
| 66 |
+
"maximize": false,
|
| 67 |
+
"foreach": null,
|
| 68 |
+
"capturable": false,
|
| 69 |
+
"differentiable": false,
|
| 70 |
+
"fused": null,
|
| 71 |
+
"decoupled_weight_decay": true,
|
| 72 |
+
"initial_lr": 0.01,
|
| 73 |
+
"params": [
|
| 74 |
+
4
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"lr": 0.00904518046337755,
|
| 79 |
+
"name": "scale_512",
|
| 80 |
+
"betas": [
|
| 81 |
+
0.9,
|
| 82 |
+
0.999
|
| 83 |
+
],
|
| 84 |
+
"eps": 1e-08,
|
| 85 |
+
"weight_decay": 1e-05,
|
| 86 |
+
"amsgrad": false,
|
| 87 |
+
"maximize": false,
|
| 88 |
+
"foreach": null,
|
| 89 |
+
"capturable": false,
|
| 90 |
+
"differentiable": false,
|
| 91 |
+
"fused": null,
|
| 92 |
+
"decoupled_weight_decay": true,
|
| 93 |
+
"initial_lr": 0.01,
|
| 94 |
+
"params": [
|
| 95 |
+
5
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"lr": 0.004522637977440181,
|
| 100 |
+
"name": "fusion",
|
| 101 |
+
"betas": [
|
| 102 |
+
0.9,
|
| 103 |
+
0.999
|
| 104 |
+
],
|
| 105 |
+
"eps": 1e-08,
|
| 106 |
+
"weight_decay": 1e-05,
|
| 107 |
+
"amsgrad": false,
|
| 108 |
+
"maximize": false,
|
| 109 |
+
"foreach": null,
|
| 110 |
+
"capturable": false,
|
| 111 |
+
"differentiable": false,
|
| 112 |
+
"fused": null,
|
| 113 |
+
"decoupled_weight_decay": true,
|
| 114 |
+
"initial_lr": 0.005,
|
| 115 |
+
"params": [
|
| 116 |
+
6
|
| 117 |
+
]
|
| 118 |
+
}
|
| 119 |
+
]
|
| 120 |
+
},
|
| 121 |
+
"scheduler_state_dict": {
|
| 122 |
+
"T_0": 10,
|
| 123 |
+
"T_i": 10,
|
| 124 |
+
"T_mult": 2,
|
| 125 |
+
"eta_min": 1e-06,
|
| 126 |
+
"T_cur": 2,
|
| 127 |
+
"base_lrs": [
|
| 128 |
+
0.01,
|
| 129 |
+
0.01,
|
| 130 |
+
0.01,
|
| 131 |
+
0.005
|
| 132 |
+
],
|
| 133 |
+
"last_epoch": 2,
|
| 134 |
+
"_step_count": 0,
|
| 135 |
+
"_is_initial": false,
|
| 136 |
+
"_get_lr_called_within_step": false,
|
| 137 |
+
"_last_lr": [
|
| 138 |
+
0.00904518046337755,
|
| 139 |
+
0.00904518046337755,
|
| 140 |
+
0.00904518046337755,
|
| 141 |
+
0.004522637977440181
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
"metrics": {
|
| 145 |
+
"best_val_acc": 62.978,
|
| 146 |
+
"best_epoch": 1,
|
| 147 |
+
"scale_accuracies": {
|
| 148 |
+
"256": 62.978
|
| 149 |
+
},
|
| 150 |
+
"training_history": {
|
| 151 |
+
"epochs": [
|
| 152 |
+
1,
|
| 153 |
+
2
|
| 154 |
+
],
|
| 155 |
+
"train_loss": [
|
| 156 |
+
3.9435249049420933,
|
| 157 |
+
3.3040703793567867
|
| 158 |
+
],
|
| 159 |
+
"train_acc": [
|
| 160 |
+
54.38726307083047,
|
| 161 |
+
59.31631083223343
|
| 162 |
+
],
|
| 163 |
+
"val_acc": [
|
| 164 |
+
61.635333333333335,
|
| 165 |
+
62.978
|
| 166 |
+
],
|
| 167 |
+
"scale_accs": {
|
| 168 |
+
"256": [
|
| 169 |
+
61.635333333333335,
|
| 170 |
+
62.978
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
"lr": [
|
| 174 |
+
0.00975530705321762,
|
| 175 |
+
0.00904518046337755
|
| 176 |
+
]
|
| 177 |
+
}
|
| 178 |
+
},
|
| 179 |
+
"train_config": {
|
| 180 |
+
"name": "david_training",
|
| 181 |
+
"run_id": "20251012_235237",
|
| 182 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 183 |
+
"model_variant": [
|
| 184 |
+
"clip_vit_b16",
|
| 185 |
+
"clip_vit_laion_b32",
|
| 186 |
+
"clip_vit_b32"
|
| 187 |
+
],
|
| 188 |
+
"num_classes": 1000,
|
| 189 |
+
"preset": "small_fast",
|
| 190 |
+
"custom_config_path": null,
|
| 191 |
+
"num_classes_override": null,
|
| 192 |
+
"use_belly_override": null,
|
| 193 |
+
"belly_expand_override": null,
|
| 194 |
+
"progressive_training_override": true,
|
| 195 |
+
"scale_warmup_epochs_override": {
|
| 196 |
+
"256": 0,
|
| 197 |
+
"512": 2
|
| 198 |
+
},
|
| 199 |
+
"num_epochs": 10,
|
| 200 |
+
"batch_size": 1024,
|
| 201 |
+
"learning_rate": 0.01,
|
| 202 |
+
"weight_decay": 1e-05,
|
| 203 |
+
"warmup_epochs": 3,
|
| 204 |
+
"use_rose_loss": true,
|
| 205 |
+
"rose_initial_weight": 0.1,
|
| 206 |
+
"rose_max_weight": 0.8,
|
| 207 |
+
"rose_weight_schedule": "adaptive",
|
| 208 |
+
"use_cayley_loss": false,
|
| 209 |
+
"cayley_weight": 0.01,
|
| 210 |
+
"scale_loss_balance": null,
|
| 211 |
+
"use_mixed_precision": false,
|
| 212 |
+
"gradient_clip": 15.0,
|
| 213 |
+
"scheduler_type": "cosine_restarts",
|
| 214 |
+
"min_lr": 1e-06,
|
| 215 |
+
"freeze_strategy": "never",
|
| 216 |
+
"freeze_threshold": 90.0,
|
| 217 |
+
"unfreeze_on_plateau": true,
|
| 218 |
+
"patience": 10,
|
| 219 |
+
"track_gradients": true,
|
| 220 |
+
"gradient_scale_threshold": 1e-05,
|
| 221 |
+
"gradient_scale_multiplier": 10.0,
|
| 222 |
+
"log_interval": 50,
|
| 223 |
+
"val_interval": 1,
|
| 224 |
+
"save_interval": 5,
|
| 225 |
+
"log_fusion_weights": true,
|
| 226 |
+
"log_loss_components": true,
|
| 227 |
+
"save_format": "safetensors",
|
| 228 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 229 |
+
"upload_to_hub": true,
|
| 230 |
+
"base_dir": "./david_training",
|
| 231 |
+
"num_workers": 10,
|
| 232 |
+
"pin_memory": true,
|
| 233 |
+
"prefetch_factor": 4,
|
| 234 |
+
"persistent_workers": true
|
| 235 |
+
}
|
| 236 |
+
}
|