Update best_model_acc61.64_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc61.64_metadata.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(3754.)",
|
| 7 |
+
"exp_avg": "tensor([[-6.6889e-05, 1.3265e-04, -6.6153e-05, ..., 4.2558e-05,\n 4.8405e-05, 8.1500e-06],\n [ 6.8543e-06, 4.3199e-05, 3.6650e-05, ..., -7.1323e-05,\n -1.3142e-05, -9.6672e-06],\n [ 6.8508e-05, 1.0143e-04, 5.8639e-05, ..., 9.3924e-05,\n -1.1528e-05, 4.6736e-05],\n ...,\n [ 2.9199e-05, -1.1982e-04, 1.2026e-05, ..., -1.2766e-06,\n -6.6592e-05, -3.0264e-05],\n [ 6.8503e-05, -1.6572e-05, 1.0229e-04, ..., 1.5418e-05,\n 1.2989e-04, -6.8448e-05],\n [-5.2403e-07, -1.0003e-04, 1.9755e-05, ..., 7.3766e-05,\n -3.3841e-05, 3.8571e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.3427e-07, 3.3637e-07, 1.0494e-07, ..., 6.8386e-08, 5.2532e-08,\n 4.8599e-08],\n [1.7465e-07, 4.9178e-07, 2.1782e-07, ..., 2.2039e-07, 7.9658e-08,\n 7.8578e-08],\n [6.6014e-08, 8.2426e-08, 6.3620e-08, ..., 2.1457e-07, 3.1822e-08,\n 4.4420e-08],\n ...,\n [1.0630e-07, 8.1123e-07, 1.3177e-07, ..., 2.1173e-07, 5.7564e-08,\n 8.1481e-08],\n [2.1016e-07, 4.5119e-07, 1.5122e-07, ..., 1.5311e-07, 7.0747e-08,\n 8.5301e-08],\n [2.4040e-08, 4.9302e-08, 1.7626e-08, ..., 2.1475e-08, 8.4640e-09,\n 9.2510e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(3754.)",
|
| 12 |
+
"exp_avg": "tensor([ 3.9632e-04, 1.5579e-03, -2.3064e-03, 4.2013e-03, -1.8920e-03,\n 1.4397e-03, 1.7355e-03, -1.0686e-05, 3.2616e-03, -1.8670e-03,\n 5.7083e-04, -8.6037e-04, 3.9029e-03, -4.2929e-03, 7.3415e-03,\n -4.1131e-03, 1.9186e-04, -2.1669e-03, -2.3859e-03, 2.0722e-03,\n -8.2817e-04, 2.2056e-03, -4.7095e-04, -4.9333e-03, -3.0566e-04,\n 1.3702e-03, -1.9688e-03, 2.7054e-03, -7.8853e-04, -2.0112e-03,\n -6.4850e-04, -1.3115e-04, -9.9241e-04, 3.7861e-03, -7.9307e-04,\n 2.5908e-03, 2.9013e-03, -5.6548e-04, -2.0166e-03, -6.4295e-04,\n -5.4217e-03, 8.5288e-04, 7.5059e-05, 2.6962e-03, -2.6484e-03,\n 1.4010e-03, 1.6857e-03, -1.2576e-03, -3.2343e-03, 3.9164e-03,\n -3.3385e-03, -1.7644e-03, -6.8960e-04, 2.5261e-03, 6.4738e-04,\n 4.4648e-04, -2.1311e-03, -4.3048e-04, -3.9405e-03, -1.1403e-03,\n 2.4017e-04, -1.1647e-03, -9.2583e-04, 2.4262e-04, 2.6964e-03,\n -3.4944e-03, -1.6066e-04, -2.1718e-03, -1.7267e-03, 3.1921e-03,\n 9.0207e-04, -2.1186e-04, -4.3842e-04, 7.3341e-04, -2.9407e-03,\n -5.5586e-03, 6.9940e-04, -7.6842e-04, 8.9427e-05, 1.2611e-03,\n 1.8575e-03, -9.0895e-04, -6.4300e-04, 1.7665e-03, 8.5442e-04,\n -9.4755e-04, -4.5031e-04, 2.7175e-03, 1.9476e-03, -2.2881e-04,\n 4.5214e-04, 2.1650e-03, 2.5794e-05, 9.4567e-04, -2.2747e-04,\n 1.6468e-03, -1.1633e-03, 6.0094e-04, 3.9879e-03, -2.0470e-04,\n 2.8222e-03, 1.1161e-03, -1.3226e-03, -5.7010e-04, 1.1251e-03,\n -3.1648e-03, -3.9171e-04, 1.4511e-03, 1.1957e-03, -3.4792e-03,\n -2.8942e-03, -9.2336e-04, 1.7821e-03, -2.1244e-03, -2.5487e-03,\n 3.1175e-03, 1.8229e-03, 1.6481e-03, 2.5278e-04, -1.6219e-03,\n 1.4465e-03, 7.2343e-04, 8.3077e-04, -1.7808e-03, -4.0967e-04,\n -2.5327e-03, -2.2786e-03, 1.6785e-03, 1.6867e-03, 1.1900e-03,\n -9.3284e-04, -1.8004e-03, 1.9822e-03, 4.0956e-03, -1.3118e-03,\n 5.4526e-04, 1.0040e-03, -1.6715e-03, 1.3096e-03, -3.6358e-04,\n -1.8875e-03, -1.5143e-03, -3.2096e-03, -2.6495e-03, 1.5289e-03,\n 2.0100e-03, -2.0407e-04, -2.2955e-03, 9.5162e-04, -1.2930e-03,\n 1.0049e-03, 1.5433e-03, -1.4396e-03, 1.5131e-03, -2.9122e-03,\n 1.3550e-03, -5.3400e-04, 3.4764e-04, -5.8936e-04, -3.2118e-03,\n 2.3675e-04, 2.7342e-04, -6.1232e-05, -1.3427e-04, 1.5372e-03,\n 7.1235e-04, 4.7519e-03, 4.8855e-04, 8.3590e-04, 9.2336e-04,\n 2.5104e-03, 3.1840e-03, 1.4341e-04, -1.7657e-03, -3.3038e-03,\n 2.3797e-03, 1.4051e-03, -1.4510e-03, 3.2832e-04, -8.3705e-04,\n -7.3537e-04, 2.2821e-03, -4.5379e-03, 7.5325e-04, 1.5737e-03,\n 2.1232e-05, -2.8519e-03, -5.4544e-04, 1.9478e-03, 2.4000e-03,\n -2.9966e-03, -5.1053e-04, 1.2901e-03, 1.0498e-03, -1.3450e-03,\n 1.2425e-03, 2.3924e-03, 7.9848e-05, -8.1174e-04, -2.7321e-04,\n -1.0957e-03, 7.2351e-04, 3.6585e-03, 8.1827e-04, 1.9689e-03,\n -1.9321e-03, 8.0373e-04, -1.3578e-03, 6.6965e-04, -9.8506e-04,\n 4.4013e-04, -2.7125e-03, -3.8241e-04, -1.5546e-03, 5.4418e-03,\n 1.7665e-03, -4.3283e-05, 3.2125e-04, -2.4556e-04, -1.7274e-03,\n 1.2949e-03, 1.1819e-03, 2.6126e-04, -5.0736e-05, -3.3761e-03,\n -1.1504e-03, 2.3023e-03, 1.8309e-03, -8.3331e-03, -1.8770e-03,\n -1.8978e-03, -2.9059e-03, 4.5032e-04, 2.0814e-03, 2.2115e-03,\n -2.5175e-03, -2.2324e-03, 4.8063e-06, -1.3694e-03, 3.1704e-04,\n 2.2946e-03, -9.1567e-04, -1.3407e-03, -7.1966e-04, -2.9953e-03,\n -2.4460e-05, -1.1299e-03, -1.4750e-03, -9.4826e-04, -5.8136e-04,\n -3.7371e-03, -4.3449e-04, 6.2479e-04, -2.8807e-03, -1.8250e-03,\n -1.0205e-03, 3.5198e-03, 5.4455e-05, -1.4524e-03, 2.3656e-03,\n 1.3798e-03, 1.9982e-03, -1.4604e-03, -5.8275e-04, -4.4731e-03,\n 2.6025e-04, 2.0045e-03, -2.3544e-03, 2.6284e-03, -2.2476e-03,\n 4.3185e-04, 2.7079e-03, 4.5127e-03, -2.1422e-03, 8.3068e-04,\n -1.4831e-03, 2.2784e-04, -1.0853e-03, 3.3525e-04, -9.7730e-04,\n -1.5774e-03, -6.0608e-04, -1.3034e-03, 4.6828e-04, -7.3785e-04,\n -2.3136e-03, -1.7278e-04, -7.3045e-04, -5.1176e-04, -5.5110e-04,\n 5.3868e-04, 3.2391e-03, -4.6112e-03, 2.7993e-03, -5.3685e-04,\n -1.5767e-03, 1.2643e-03, -1.3289e-03, 4.6217e-03, 1.0089e-04,\n 1.0454e-03, -2.9142e-03, -6.6415e-03, 1.3585e-03, -2.5548e-03,\n 4.9184e-04, -6.1580e-03, -5.8222e-04, 1.1929e-03, 3.2182e-03,\n 2.7901e-03, 1.0416e-03, -1.9157e-03, 6.2199e-04, -1.2743e-03,\n 4.3123e-03, 2.8654e-03, 3.3194e-03, -2.5885e-03, -2.5000e-03,\n 1.8384e-03, 2.6530e-05, 6.0174e-04, 1.0170e-03, 3.0633e-03,\n 8.5593e-05, -1.7482e-03, -2.5823e-03, -2.5581e-04, 1.6970e-04,\n -6.6404e-04, 4.6979e-04, 3.7871e-03, 1.4306e-03, 4.5567e-03,\n -1.9911e-03, 3.8779e-03, 1.1811e-03, 4.9324e-05, -1.5073e-03,\n -1.4473e-03, 6.1911e-03, 6.3455e-04, 1.1103e-03, 4.4917e-03,\n -2.1614e-03, -1.6370e-03, -3.3256e-04, -4.1339e-03, 1.3441e-03,\n -1.2895e-03, -3.9316e-04, -9.4054e-05, -4.9425e-03, 4.2355e-03,\n -3.1536e-04, -5.1264e-04, 1.0058e-03, -3.4481e-04, -6.5958e-04,\n 5.9948e-04, 2.1236e-03, 4.3396e-03, 3.8643e-03, -5.0297e-03,\n 9.4408e-04, -1.0430e-04, -9.1392e-04, 1.1708e-03, 1.0898e-03,\n 5.0076e-04, -2.2531e-03, -3.7913e-04, -7.1798e-04, -1.3433e-03,\n -2.4837e-03, 2.3020e-03, -1.5468e-03, 6.7616e-04, -4.2825e-03,\n 3.5186e-03, -6.1230e-04, 1.8735e-03, 7.4973e-04, -2.7843e-03,\n 6.7976e-04, 4.9042e-04, 9.6869e-04, 4.0131e-04, -3.4258e-03,\n -6.1538e-04, -1.0370e-03, 1.7344e-03, 8.4898e-04, -2.3979e-03,\n 9.0075e-05, 9.6845e-04, -5.8001e-04, -9.6770e-04, 5.5850e-04,\n -3.0697e-03, 5.1806e-04, -9.3040e-05, -2.3227e-04, 3.2812e-03,\n 1.5281e-04, -2.5771e-03, -1.1427e-04, -1.4628e-03, -1.3649e-03,\n 1.6298e-03, -5.4455e-04, 2.1545e-03, -6.6279e-03, 1.8171e-03,\n 2.6769e-03, 7.7456e-04, 2.1885e-03, -2.9084e-03, 8.1732e-04,\n 1.9121e-03, -7.6861e-04, -9.1411e-04, 2.6676e-03, 2.2214e-03,\n -4.0538e-04, 7.3897e-03, -2.1827e-03, -1.3684e-03, 6.0480e-05,\n 5.2873e-04, -2.5772e-03, -3.2169e-03, 4.0062e-03, 5.3568e-04,\n 1.1066e-03, 2.9987e-04, 4.0535e-04, 2.8738e-03, -1.1457e-03,\n 5.1895e-03, 2.5198e-03, 2.7023e-03, -6.2563e-03, 5.4962e-04,\n -2.1011e-03, -3.9244e-03, 1.9395e-03, -4.9924e-03, -2.1870e-03,\n -6.7101e-04, -4.6129e-03, 1.4311e-03, 4.9469e-03, -1.1069e-03,\n 3.1267e-03, 1.3465e-03, -5.8433e-03, 3.5845e-04, -7.2975e-04,\n -2.9422e-03, -3.4296e-04, 2.7776e-03, 4.3762e-04, 3.9966e-03,\n -4.0793e-03, -6.9382e-04, 5.2140e-03, -2.1836e-03, 2.0646e-03,\n -2.4762e-03, 6.9855e-03, -1.5884e-03, 3.3833e-03, 1.6681e-03,\n -4.1923e-03, 3.0822e-03, -8.5816e-04, 2.7240e-03, 6.7617e-04,\n 2.9253e-03, -1.0307e-03, 9.0876e-04, -1.4525e-03, 1.1637e-03,\n -3.1277e-03, 4.4515e-04, 1.7156e-03, -1.1739e-03, -3.1055e-03,\n -1.0718e-04, 2.8353e-03, -1.0143e-03, 2.4220e-05, -7.3523e-04,\n 4.3188e-04, -3.4472e-04, 4.4539e-05, 1.2219e-03, 1.1653e-03,\n -1.6275e-04, 3.4436e-03, -1.8704e-03, 8.4748e-04, 2.8388e-03,\n -5.1770e-04, -2.0403e-03, -2.5884e-03, -1.6251e-03, 2.8823e-03,\n 7.2765e-04, 4.7929e-05], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([7.0074e-05, 1.4504e-04, 8.3383e-05, 1.1859e-04, 1.2920e-04, 1.9679e-04,\n 9.7126e-05, 2.1184e-04, 9.3077e-05, 1.3276e-04, 1.4439e-04, 1.7057e-04,\n 2.0528e-04, 5.0621e-05, 1.7115e-04, 1.4660e-04, 1.0723e-04, 1.0426e-04,\n 1.1966e-04, 1.5028e-04, 8.6547e-05, 1.0092e-04, 1.0460e-04, 1.7145e-04,\n 1.9520e-04, 1.2349e-04, 8.2096e-05, 1.1067e-04, 1.0495e-04, 1.3111e-04,\n 5.5104e-05, 7.7950e-05, 1.2478e-04, 1.0484e-04, 5.9182e-05, 1.6093e-04,\n 1.1783e-04, 9.4021e-05, 1.0210e-04, 6.1226e-05, 1.0742e-04, 8.8795e-05,\n 1.1545e-05, 3.8505e-04, 2.1320e-04, 8.9392e-05, 1.3369e-04, 9.2201e-05,\n 9.8845e-05, 2.4337e-04, 2.4291e-04, 8.4368e-05, 1.2219e-04, 1.3131e-04,\n 1.3309e-04, 7.9517e-05, 8.0080e-05, 2.0302e-04, 8.8592e-05, 1.0627e-04,\n 1.9657e-04, 7.2292e-05, 9.2231e-05, 7.9888e-05, 8.8906e-05, 8.0885e-05,\n 9.7430e-05, 8.8938e-05, 1.5968e-04, 1.1595e-04, 1.3217e-04, 1.0121e-04,\n 8.5084e-05, 1.0503e-04, 1.0652e-04, 8.9354e-05, 8.6435e-05, 1.0776e-04,\n 1.6549e-04, 1.2330e-04, 1.3901e-04, 8.9946e-05, 1.2272e-04, 5.1294e-05,\n 1.2073e-04, 8.6143e-05, 1.5607e-04, 7.8945e-05, 9.0697e-05, 7.9056e-05,\n 6.3406e-05, 1.3889e-04, 1.2547e-04, 1.1861e-04, 7.2026e-05, 1.3414e-04,\n 1.2940e-04, 1.5352e-04, 1.3253e-04, 6.1857e-05, 1.4387e-04, 1.4750e-04,\n 1.3748e-04, 1.2310e-04, 1.3260e-04, 1.6634e-04, 1.0714e-04, 2.6819e-04,\n 9.4639e-05, 1.2394e-04, 1.3573e-04, 1.5061e-04, 9.3981e-05, 1.5180e-04,\n 1.5079e-04, 7.0736e-05, 2.2836e-04, 1.1512e-04, 1.1376e-04, 8.1023e-05,\n 5.3079e-05, 1.7069e-04, 1.1929e-04, 1.6461e-04, 9.8384e-05, 7.8548e-05,\n 9.6869e-05, 1.5541e-04, 9.6130e-05, 1.7729e-04, 1.5989e-04, 8.8842e-05,\n 1.1358e-04, 1.0334e-04, 4.2443e-04, 8.7976e-05, 8.5958e-05, 1.2674e-04,\n 1.0585e-04, 1.2337e-04, 5.2615e-05, 9.3051e-05, 1.2193e-04, 3.0646e-04,\n 1.1842e-04, 1.0207e-04, 6.4831e-05, 1.4399e-04, 1.1192e-04, 2.2061e-04,\n 5.4530e-05, 1.4093e-04, 6.0766e-05, 2.1402e-04, 8.3344e-05, 1.2187e-04,\n 1.7187e-04, 1.3182e-04, 7.8284e-05, 1.0660e-04, 9.9909e-05, 5.0552e-05,\n 9.0536e-05, 1.1438e-04, 4.3105e-05, 1.5305e-04, 1.0640e-04, 6.1099e-05,\n 5.6723e-05, 9.0948e-05, 1.1566e-04, 1.2095e-04, 1.2001e-04, 1.1204e-04,\n 9.2650e-05, 8.1555e-05, 2.8590e-04, 9.1444e-05, 8.5898e-05, 1.4127e-04,\n 8.0728e-05, 8.6164e-05, 2.2110e-04, 1.1632e-04, 7.4638e-05, 1.2750e-04,\n 1.9868e-04, 5.6811e-05, 6.7145e-05, 6.1868e-05, 1.1567e-04, 1.4038e-04,\n 1.5722e-04, 1.2821e-04, 5.2427e-05, 4.8649e-05, 9.3024e-05, 1.4060e-04,\n 5.5707e-05, 5.2044e-05, 7.2048e-05, 1.5155e-04, 1.0763e-04, 6.7629e-05,\n 1.2562e-04, 1.1655e-04, 8.2433e-05, 6.0719e-05, 1.5768e-04, 7.6569e-05,\n 9.1103e-05, 1.3213e-04, 1.2172e-04, 9.4287e-05, 1.7442e-04, 5.8874e-05,\n 6.9990e-05, 5.6588e-05, 1.2041e-04, 1.0766e-04, 8.5789e-05, 6.2434e-05,\n 8.8617e-05, 5.0722e-05, 8.5241e-05, 1.0927e-04, 7.5563e-05, 6.8706e-05,\n 7.2967e-05, 1.3784e-04, 9.4495e-05, 6.5752e-05, 1.2094e-04, 9.3760e-05,\n 7.8932e-05, 8.3923e-05, 1.1143e-04, 9.9328e-06, 1.1026e-04, 3.1497e-04,\n 7.9266e-05, 1.6774e-04, 6.6584e-05, 1.2091e-04, 1.7644e-04, 1.2119e-04,\n 1.4287e-04, 7.0661e-05, 1.1560e-04, 7.6846e-05, 1.5888e-04, 7.5450e-05,\n 8.6594e-05, 1.4948e-04, 7.5536e-05, 8.8307e-05, 8.9062e-05, 1.1152e-04,\n 1.6335e-04, 8.2807e-05, 7.5441e-05, 1.3346e-04, 7.9091e-05, 1.3173e-04,\n 9.0572e-05, 5.9291e-05, 8.0745e-05, 1.1641e-04, 7.2718e-05, 1.2887e-04,\n 7.1999e-05, 1.0641e-04, 8.4208e-05, 8.4596e-05, 7.8250e-05, 8.6757e-05,\n 7.1574e-05, 1.6190e-04, 1.0638e-04, 7.9575e-05, 5.2369e-05, 7.7651e-05,\n 1.3080e-04, 1.3696e-04, 7.1464e-05, 1.3653e-04, 1.0137e-04, 1.0925e-04,\n 1.3910e-04, 1.2030e-04, 8.4626e-05, 9.9873e-05, 1.9634e-04, 7.7393e-05,\n 1.1715e-04, 6.7815e-05, 8.9233e-05, 1.6631e-04, 2.9779e-04, 1.1029e-04,\n 7.8269e-05, 8.9467e-05, 9.1954e-05, 9.3584e-05, 1.8218e-04, 5.3709e-05,\n 1.0845e-04, 1.0745e-04, 1.4827e-04, 1.1470e-04, 1.3671e-04, 1.2386e-04,\n 7.1886e-05, 6.7513e-05, 1.6065e-04, 9.0145e-05, 7.2257e-05, 6.1600e-05,\n 1.0623e-04, 1.4183e-04, 1.8646e-04, 1.2946e-04, 1.4374e-04, 1.2223e-04,\n 1.4368e-04, 2.4883e-04, 7.7722e-05, 1.6657e-04, 1.2976e-04, 1.2019e-04,\n 1.0395e-04, 1.6830e-04, 6.1886e-05, 7.3770e-05, 8.9919e-05, 1.1120e-04,\n 1.6449e-04, 6.0027e-05, 1.1665e-04, 1.2559e-04, 1.1855e-04, 1.0969e-04,\n 1.0153e-04, 8.8797e-05, 1.4891e-04, 1.8122e-04, 1.0569e-04, 8.9262e-05,\n 1.4533e-04, 1.0812e-04, 9.4332e-05, 9.9239e-05, 9.7699e-05, 8.0185e-05,\n 8.9492e-05, 8.7160e-05, 7.9531e-05, 9.7728e-05, 9.6746e-05, 6.0868e-05,\n 7.7299e-05, 1.8308e-04, 2.9931e-04, 7.1092e-05, 1.2848e-04, 8.2840e-05,\n 1.1665e-04, 1.1871e-04, 7.5354e-05, 9.3289e-05, 6.7168e-05, 1.0131e-04,\n 6.7999e-05, 1.2069e-04, 1.9095e-04, 8.4180e-05, 6.1250e-05, 6.2107e-05,\n 8.1529e-05, 9.3112e-05, 1.0182e-04, 1.2420e-04, 1.3021e-04, 9.4198e-05,\n 9.6339e-05, 1.3663e-04, 9.7814e-05, 1.7058e-04, 1.1370e-04, 1.2631e-04,\n 1.4003e-04, 8.5124e-05, 8.3957e-05, 9.8114e-05, 1.1317e-04, 9.6346e-05,\n 6.1299e-05, 1.2445e-04, 9.8999e-05, 1.7122e-04, 9.2840e-05, 7.0647e-05,\n 1.4140e-04, 9.3290e-05, 1.4372e-04, 1.0508e-04, 1.8823e-04, 7.2410e-05,\n 7.4380e-05, 1.4254e-04, 1.5408e-04, 1.2404e-04, 1.4913e-04, 1.0519e-04,\n 9.3122e-05, 5.5062e-05, 1.4983e-04, 6.6913e-05, 1.5380e-04, 1.9506e-04,\n 1.2074e-04, 7.8695e-05, 1.5530e-04, 1.1012e-04, 7.9530e-05, 8.4576e-05,\n 1.9803e-04, 1.0413e-04, 1.1369e-04, 1.0903e-04, 7.2321e-05, 7.8903e-05,\n 1.6028e-04, 1.5721e-04, 7.2938e-05, 1.1015e-04, 1.5247e-04, 9.4005e-05,\n 8.2716e-05, 1.2860e-04, 1.4278e-04, 1.2906e-04, 7.2575e-05, 3.3634e-04,\n 1.0359e-04, 1.1160e-04, 1.0005e-04, 1.2482e-04, 1.0124e-04, 1.3274e-04,\n 1.1458e-04, 1.5478e-04, 1.1226e-04, 2.1033e-04, 1.1482e-04, 6.9128e-05,\n 2.1986e-04, 1.1649e-04, 4.4193e-05, 1.5110e-04, 1.4598e-04, 6.3275e-05,\n 1.1023e-04, 8.4838e-05, 1.7669e-04, 8.7451e-05, 7.9065e-05, 1.3793e-04,\n 1.4937e-04, 1.3802e-04, 1.4302e-04, 2.4577e-04, 1.2199e-04, 1.6285e-04,\n 1.1368e-04, 1.3706e-04, 1.0941e-04, 8.7440e-05, 9.0840e-05, 7.6277e-05,\n 7.0350e-05, 6.5271e-05, 1.6748e-04, 1.1528e-04, 8.6007e-05, 1.1907e-04,\n 2.1413e-04, 9.6597e-05, 1.2586e-04, 1.0899e-04, 1.2783e-04, 1.1932e-04,\n 1.2731e-04, 8.3435e-05, 6.6158e-05, 8.9805e-05, 1.0796e-04, 6.2422e-05,\n 8.2399e-05, 8.2950e-05, 1.2189e-04, 7.9435e-05, 9.1512e-05, 1.0954e-04,\n 1.1297e-04, 5.8982e-05, 9.4424e-05, 1.2342e-04, 1.0090e-04, 1.1591e-04,\n 1.1623e-04, 1.4188e-05], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(3754.)",
|
| 17 |
+
"exp_avg": "tensor([ 7.7160e-04, 1.3040e-03, -2.3662e-03, 4.8894e-03, -2.0242e-03,\n 3.3004e-03, 2.3860e-03, 5.3317e-04, 4.3415e-03, -2.7706e-03,\n -1.2517e-05, -1.9277e-03, 3.9264e-03, -6.3004e-03, 7.8640e-03,\n -4.7496e-03, 1.4265e-03, -6.8923e-04, -3.2062e-03, 3.7283e-03,\n -7.2621e-04, 1.7896e-03, -5.3612e-04, -4.0039e-03, -4.5820e-05,\n 1.2867e-03, -2.9164e-03, 4.0551e-03, -1.3593e-03, -2.3412e-03,\n -1.9883e-03, 4.1118e-04, -2.1317e-03, 4.8038e-03, -6.8032e-04,\n 1.7480e-03, 2.1499e-03, -8.1815e-04, -1.9919e-03, -2.2117e-03,\n -4.2597e-03, 9.7371e-04, 5.6052e-45, 3.3223e-03, -2.9060e-03,\n 1.6294e-03, 1.4835e-03, -1.6589e-03, -7.4579e-03, 2.8788e-03,\n -4.2853e-03, -1.4587e-03, -1.0177e-03, 3.6057e-03, 8.7861e-04,\n 6.1890e-04, -3.3443e-03, -1.2706e-04, -5.4236e-03, -1.1410e-03,\n 4.1578e-05, -1.1166e-03, -1.7279e-03, 1.7546e-04, 2.4905e-03,\n -6.1887e-03, -2.1546e-04, -2.1125e-03, -1.2963e-03, 3.0656e-03,\n 9.3782e-04, 2.9983e-04, -1.2674e-03, 1.4855e-03, -2.8995e-03,\n -9.8855e-03, 9.4102e-04, -6.7174e-04, -1.3620e-04, 2.4594e-03,\n 2.4183e-03, -1.1513e-03, -1.1123e-03, 1.5302e-03, 7.1841e-04,\n -2.8581e-03, -1.2320e-03, 4.4659e-03, 2.1032e-03, -1.9214e-04,\n 8.8644e-04, 2.0765e-03, 3.3214e-04, 9.1590e-04, -5.6989e-04,\n 2.1972e-03, -2.1546e-03, 5.5269e-04, 4.3300e-03, 5.6197e-04,\n 3.4505e-03, 9.7141e-04, -2.1541e-03, -6.3728e-04, 1.8042e-03,\n -3.2110e-03, -6.5707e-04, 1.6689e-03, 4.7072e-04, -5.2896e-03,\n -2.0619e-03, -2.4624e-04, 3.1752e-03, -2.4248e-03, -1.2695e-03,\n 3.5667e-03, 4.5619e-04, 2.1444e-03, 8.5300e-04, -3.7685e-03,\n 2.6372e-03, 2.0647e-03, 1.1467e-03, -3.2503e-03, -1.2159e-03,\n -9.6088e-03, -1.7202e-03, 1.2697e-03, 2.8953e-03, 9.3350e-04,\n -1.3275e-03, -1.7075e-03, 1.2581e-03, 3.8685e-03, -2.1452e-03,\n -4.2850e-05, 1.6822e-03, -1.8715e-03, 1.8022e-03, -6.8795e-04,\n -2.8473e-03, -2.0198e-03, -5.5300e-03, -1.8803e-03, 2.3764e-03,\n 2.6969e-03, -1.1176e-04, -4.1607e-03, 5.9384e-04, -1.7872e-03,\n 6.2244e-04, 1.1622e-03, -2.4560e-03, 2.9107e-03, -3.6956e-03,\n -3.9120e-05, -3.6278e-04, 5.5398e-04, -2.9926e-04, -4.3042e-03,\n -6.4569e-04, 7.0913e-04, -7.8174e-04, -4.8079e-04, 2.5437e-03,\n 5.7473e-04, 7.0643e-03, -3.0579e-04, 1.0467e-03, 8.0812e-04,\n 1.2740e-03, 3.5489e-03, -2.9815e-05, -3.9310e-03, -6.3623e-03,\n 1.1979e-03, 1.1794e-03, -1.6646e-03, 8.1257e-05, -1.6782e-03,\n -1.2053e-03, 3.4163e-03, -7.4550e-03, 2.1566e-03, 6.8230e-03,\n -5.4330e-05, -3.4117e-03, -4.5155e-04, 3.0909e-03, 2.6941e-03,\n -1.3828e-03, 2.5272e-04, 7.0689e-04, 4.2052e-04, -2.3230e-03,\n 1.5551e-03, 3.0518e-03, -1.8512e-04, -9.7569e-04, -6.0463e-04,\n -1.2644e-03, 3.0430e-04, 4.2218e-03, 1.5996e-04, 1.1148e-03,\n -1.7439e-03, 1.6269e-03, -1.7967e-03, 8.1291e-04, -8.4499e-04,\n 2.4241e-04, -3.0643e-03, 5.6690e-04, -1.2533e-03, 6.0960e-03,\n 4.8880e-03, 5.1305e-04, 5.9021e-04, -9.4489e-04, -2.5730e-03,\n 1.3347e-03, 1.8405e-03, 4.0430e-06, 2.4464e-05, -3.4420e-03,\n -2.6913e-03, 2.3731e-03, 2.5068e-03, -7.3211e-03, -1.6372e-03,\n -2.1418e-03, -3.2732e-03, 1.9180e-03, 3.3576e-03, 2.1460e-03,\n -3.3660e-03, -3.9859e-03, 5.6052e-45, -1.0781e-03, 9.9942e-04,\n 2.2700e-03, -1.8333e-03, -3.0155e-03, -1.0689e-03, -2.7424e-03,\n -4.9141e-04, -2.2528e-04, -1.6977e-03, -2.3815e-03, -9.0089e-04,\n -5.3909e-03, 6.5262e-05, 1.2336e-03, -4.4031e-03, -2.1473e-03,\n -7.2800e-04, 6.0230e-03, 6.5287e-05, -1.2790e-03, 4.3949e-03,\n 2.8618e-03, 2.5246e-03, -1.4632e-03, -6.5062e-04, -5.3939e-03,\n 9.4718e-05, 3.9017e-03, -2.2273e-03, 3.8537e-03, -3.6578e-03,\n 1.5070e-03, 4.4956e-03, 5.2696e-03, -2.8577e-03, 2.3715e-04,\n -9.5477e-04, -1.4858e-04, -1.3947e-03, -1.1276e-04, -1.0575e-03,\n -3.6974e-03, -1.7996e-03, -1.6219e-03, -6.4789e-05, -8.1922e-04,\n -3.7269e-03, -5.2884e-04, -6.9015e-04, -7.4773e-04, -1.1460e-03,\n 1.9437e-04, 2.3410e-03, -4.2450e-03, 3.7280e-03, -8.1881e-04,\n -1.8650e-03, 2.0795e-03, -2.5840e-03, 4.2707e-03, -6.2016e-04,\n 8.4056e-04, -3.8160e-03, -1.3612e-02, 8.9288e-04, -1.9357e-03,\n 1.1047e-03, -4.7005e-03, -1.5485e-03, 3.7860e-03, 6.6370e-03,\n 3.5014e-03, 2.6056e-03, -2.2407e-03, 1.5339e-03, -4.6704e-04,\n 9.8946e-03, 5.3981e-03, 4.6724e-03, -3.8002e-03, -3.8323e-03,\n 3.5750e-03, 4.2881e-04, 7.0762e-04, 1.3055e-03, 4.2602e-03,\n -1.8431e-04, -2.2276e-03, -2.4152e-03, -3.0788e-04, 4.8192e-04,\n -8.7383e-04, 1.8175e-05, 2.9389e-03, 1.9357e-03, 6.9415e-03,\n -6.1472e-04, 2.6517e-03, 1.5553e-03, 3.6107e-04, -2.0616e-03,\n -2.4208e-03, 5.9621e-03, -2.1774e-04, 1.8586e-03, 2.5153e-03,\n -2.3252e-03, -2.7408e-03, -6.6591e-04, -4.1029e-03, 2.4775e-03,\n -1.2865e-03, 4.9711e-04, 7.6055e-04, -6.1015e-03, 8.1187e-03,\n -1.5345e-03, -1.3157e-03, 1.0473e-03, -1.9037e-04, -3.0085e-04,\n 8.0487e-05, 4.7397e-03, 4.4051e-03, 8.9298e-03, -4.7659e-03,\n 9.5638e-04, -4.0211e-04, -5.0086e-04, 1.1021e-03, 7.4128e-04,\n 1.8604e-04, -3.7800e-03, -7.0255e-04, -9.6674e-04, -1.1067e-03,\n -3.2719e-03, 2.5501e-03, -3.5795e-03, 1.4396e-03, -4.1656e-03,\n 3.5963e-03, -4.4376e-04, 2.4227e-03, 4.3935e-04, -4.5502e-03,\n 4.0885e-04, 8.4902e-04, 9.4789e-04, 3.8941e-05, -3.6858e-03,\n -5.7405e-04, -1.4543e-03, 2.0796e-03, 1.4574e-03, -2.6208e-03,\n -3.5136e-04, 2.2179e-03, -1.7194e-03, -5.6474e-04, 6.7238e-04,\n -4.7846e-03, 4.1308e-04, 1.5819e-05, 1.7257e-04, 3.3212e-03,\n -5.9581e-04, -2.8856e-03, -3.1061e-05, -3.0991e-03, -1.4999e-03,\n 9.5361e-04, -5.9528e-05, 1.3885e-03, -9.4598e-03, 2.1523e-03,\n 1.6102e-02, 3.0946e-04, 4.9617e-03, -3.6807e-03, 1.0827e-03,\n 2.2677e-03, -4.6617e-04, -8.5905e-04, 3.0527e-03, 6.5455e-03,\n -1.3125e-03, 6.6433e-03, -4.3019e-03, -2.4292e-03, -1.7503e-04,\n 7.4715e-04, -2.6936e-03, -2.8560e-03, 4.8660e-03, 8.3386e-04,\n 1.6460e-03, -5.1924e-04, 6.7852e-04, 3.1081e-03, -1.5572e-04,\n 8.4553e-03, 2.2007e-03, 3.0590e-03, -1.0524e-02, 1.4188e-03,\n -3.2325e-03, -5.0267e-03, 1.5046e-03, -8.0182e-03, -1.5211e-04,\n -1.3177e-03, -4.5805e-03, 1.5397e-03, 5.4909e-03, -1.7596e-03,\n 5.5242e-03, 1.9978e-03, -5.6773e-03, 4.1372e-04, -7.0135e-04,\n -4.0911e-03, -8.0235e-04, 3.1670e-03, 2.0539e-03, 3.5799e-03,\n -4.9701e-03, -5.2961e-04, 4.7394e-03, -4.9134e-03, 1.9910e-03,\n -3.1292e-03, 6.8154e-03, -2.1778e-03, 2.6729e-03, 1.8616e-03,\n -5.2655e-03, 4.5440e-03, -5.2852e-05, 5.8635e-03, 1.0561e-03,\n 3.1252e-03, -2.9914e-03, 1.0923e-03, 1.1463e-04, 1.2250e-03,\n -3.1164e-03, 1.0014e-03, 2.3965e-03, -1.7255e-03, -2.6551e-03,\n 1.4551e-04, 2.7656e-03, -3.1878e-03, 3.4229e-04, -9.6082e-04,\n 1.1574e-03, -2.8291e-04, 3.4666e-04, 1.6246e-03, 1.3716e-03,\n -2.5401e-04, 4.4476e-03, -1.2049e-03, 1.6319e-03, 2.3714e-03,\n -1.0798e-04, -8.6871e-04, -2.9355e-03, -1.3730e-03, 2.6343e-03,\n 1.3303e-03, 5.6052e-45], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([1.5008e-04, 1.1212e-04, 7.8531e-05, 8.6351e-05, 4.9547e-05, 1.4643e-04,\n 1.0496e-04, 1.2192e-04, 1.5315e-04, 1.6832e-04, 4.3243e-04, 2.2076e-04,\n 1.7011e-04, 9.9410e-05, 1.5039e-04, 1.0115e-04, 8.0357e-05, 1.3843e-04,\n 9.9023e-05, 2.6802e-04, 2.2868e-04, 7.9044e-05, 8.0235e-05, 1.3376e-04,\n 3.3128e-04, 1.2183e-04, 1.4694e-04, 2.0388e-04, 2.0614e-04, 9.8352e-05,\n 1.4881e-04, 3.4606e-04, 1.5541e-04, 1.0240e-04, 5.7991e-05, 6.7269e-05,\n 3.5823e-05, 5.7921e-05, 4.6627e-05, 1.2137e-04, 4.4832e-05, 1.0121e-04,\n 4.5298e-07, 2.6096e-04, 2.4684e-04, 1.1921e-04, 9.5044e-05, 2.8794e-05,\n 7.6100e-04, 1.5641e-04, 2.3569e-04, 4.3312e-05, 7.7452e-05, 1.5300e-04,\n 1.1889e-04, 8.2667e-05, 2.5563e-04, 7.4895e-05, 9.5065e-05, 3.9052e-05,\n 1.2988e-04, 9.2172e-05, 1.8237e-04, 8.2074e-05, 8.2065e-05, 1.2653e-04,\n 1.2091e-04, 5.9101e-05, 1.1426e-04, 6.2092e-05, 2.0839e-04, 1.1987e-04,\n 3.0820e-04, 1.9356e-04, 5.7778e-05, 1.8082e-04, 2.3508e-04, 7.1567e-05,\n 1.6710e-04, 1.5812e-04, 2.1694e-04, 1.0033e-04, 7.6199e-05, 3.9233e-05,\n 1.3128e-04, 4.3462e-04, 1.4630e-04, 1.9800e-04, 4.7745e-05, 6.3726e-04,\n 1.4929e-04, 1.2823e-04, 1.0497e-04, 1.2605e-04, 3.6839e-04, 1.2646e-04,\n 1.5586e-04, 3.2080e-04, 1.0816e-04, 5.4009e-04, 1.2864e-04, 2.4625e-04,\n 1.4530e-04, 8.1422e-05, 1.6853e-04, 2.2431e-04, 9.2123e-05, 3.9484e-04,\n 9.9311e-05, 2.0833e-04, 2.1080e-04, 2.1556e-04, 1.4203e-04, 1.0466e-04,\n 1.0585e-04, 6.7601e-05, 1.1548e-04, 1.4310e-04, 1.1487e-04, 2.3364e-04,\n 9.0761e-05, 1.5257e-04, 4.2863e-04, 2.0046e-04, 3.8556e-04, 8.6986e-04,\n 1.1415e-04, 1.0945e-04, 4.6482e-04, 3.2417e-04, 9.2523e-05, 1.3077e-04,\n 1.9936e-04, 6.6740e-05, 2.7450e-04, 1.0049e-04, 1.9506e-04, 2.5612e-04,\n 5.6748e-05, 5.7631e-05, 7.6348e-05, 1.0554e-04, 1.5588e-04, 1.2657e-04,\n 1.3067e-04, 1.1113e-04, 4.3638e-05, 2.0711e-04, 1.3609e-04, 1.4579e-04,\n 4.2147e-05, 2.7833e-04, 6.9329e-05, 1.1215e-04, 6.2416e-05, 4.6229e-05,\n 1.9809e-04, 2.4944e-04, 3.7230e-04, 1.3771e-04, 2.3319e-04, 6.0815e-05,\n 3.8034e-04, 6.8416e-05, 1.1893e-04, 2.9795e-04, 1.6945e-04, 8.3181e-05,\n 1.6395e-04, 7.1261e-05, 4.3902e-05, 1.4040e-04, 1.1279e-04, 1.7588e-04,\n 1.8230e-04, 8.1033e-05, 1.4489e-04, 8.0818e-05, 8.4282e-05, 9.7696e-05,\n 8.1208e-05, 1.5788e-04, 4.2313e-04, 1.3775e-04, 1.2576e-03, 1.1744e-04,\n 2.1754e-04, 1.4490e-04, 8.9758e-05, 4.6918e-05, 8.4593e-05, 1.9804e-04,\n 7.6860e-05, 6.0739e-05, 1.8100e-04, 6.6628e-05, 1.1244e-04, 1.0090e-04,\n 4.0548e-05, 4.8707e-05, 1.1392e-04, 9.3344e-05, 1.0674e-04, 6.0229e-05,\n 2.6446e-05, 9.1269e-05, 1.2590e-04, 9.6631e-05, 1.6662e-04, 3.3997e-05,\n 2.3992e-04, 1.3311e-04, 2.4519e-05, 6.8327e-05, 1.4230e-04, 1.8427e-04,\n 4.6950e-05, 1.3501e-03, 1.3380e-04, 3.3874e-04, 6.4147e-05, 1.5429e-04,\n 1.5352e-04, 1.1460e-04, 1.5247e-04, 2.4766e-04, 7.0937e-05, 6.3753e-05,\n 5.4612e-05, 9.6471e-05, 1.4077e-04, 6.6186e-05, 3.9512e-04, 1.9015e-04,\n 4.6394e-05, 9.9259e-05, 2.1096e-04, 2.5073e-06, 7.2423e-05, 3.3032e-04,\n 6.9476e-05, 2.0797e-04, 4.2680e-04, 6.8730e-05, 1.3454e-04, 1.8126e-04,\n 6.8026e-05, 1.0305e-04, 1.6341e-04, 9.0773e-05, 2.0162e-04, 1.8000e-04,\n 1.5513e-04, 1.5976e-04, 9.2760e-05, 1.1210e-04, 1.2171e-04, 6.9636e-05,\n 5.8416e-05, 1.4147e-04, 1.1298e-04, 1.1833e-04, 1.1803e-04, 7.3742e-05,\n 8.0329e-05, 2.9704e-04, 2.0493e-04, 1.1816e-04, 1.1418e-04, 2.5201e-04,\n 3.7326e-04, 3.2598e-04, 8.5968e-05, 1.2914e-04, 4.5160e-05, 6.2092e-05,\n 1.1592e-04, 1.2628e-04, 1.2193e-04, 1.2614e-04, 1.4536e-04, 1.0140e-04,\n 8.6098e-05, 1.1063e-04, 1.1872e-04, 2.4010e-04, 1.2460e-04, 5.1434e-05,\n 3.2875e-04, 2.3649e-04, 4.1104e-05, 4.1985e-05, 1.4060e-04, 4.3212e-05,\n 1.7074e-04, 1.0808e-04, 8.2399e-05, 1.1587e-04, 1.9003e-04, 2.1891e-04,\n 7.5238e-05, 1.0612e-04, 2.9415e-04, 9.5023e-05, 7.7975e-05, 1.7007e-04,\n 7.4613e-05, 9.4954e-05, 4.6645e-04, 3.0864e-04, 2.3072e-04, 1.3523e-04,\n 1.3747e-04, 2.0375e-04, 6.0313e-05, 3.1479e-04, 1.6169e-04, 9.6352e-05,\n 1.0183e-04, 1.1618e-04, 4.6787e-04, 1.8917e-04, 1.0362e-04, 1.7963e-04,\n 2.7918e-04, 1.9039e-04, 1.2476e-04, 1.9409e-04, 6.6259e-05, 1.7853e-04,\n 1.2223e-04, 1.0495e-04, 2.8405e-05, 8.8380e-05, 1.1422e-04, 8.4255e-05,\n 5.4135e-05, 8.1645e-05, 1.5993e-04, 1.3128e-04, 1.0155e-04, 8.4138e-05,\n 1.6192e-04, 2.0471e-04, 6.8476e-05, 1.7862e-04, 1.6721e-04, 1.1095e-04,\n 9.8829e-05, 1.5187e-04, 1.1730e-04, 1.3743e-04, 9.4432e-05, 1.0163e-04,\n 1.5403e-04, 1.1195e-04, 2.4317e-04, 7.0917e-05, 4.6811e-05, 7.1126e-05,\n 5.7911e-05, 5.2582e-04, 2.2461e-04, 4.1487e-04, 1.2134e-04, 1.4771e-04,\n 3.8427e-05, 2.8929e-04, 5.6262e-05, 5.8667e-05, 1.2499e-04, 1.3180e-04,\n 1.8688e-04, 1.0047e-04, 9.5555e-05, 1.3876e-04, 6.7879e-05, 6.9723e-05,\n 1.5484e-04, 8.1825e-05, 1.2806e-04, 1.1531e-04, 1.5564e-04, 9.1513e-05,\n 1.5632e-04, 1.0964e-04, 7.1628e-05, 1.0964e-04, 8.7683e-05, 8.4945e-05,\n 5.6843e-05, 6.7362e-05, 1.8456e-04, 4.0194e-04, 8.4105e-05, 6.3249e-05,\n 1.5043e-04, 2.8577e-04, 6.9188e-05, 3.0829e-04, 1.9449e-04, 7.0444e-05,\n 1.8698e-04, 9.9994e-05, 1.0379e-04, 7.7764e-05, 5.8453e-04, 4.0962e-05,\n 1.5510e-04, 1.4791e-04, 1.3857e-04, 3.3306e-05, 2.1477e-04, 1.1236e-04,\n 1.3217e-04, 1.4266e-03, 3.1382e-04, 1.9449e-04, 1.3420e-04, 9.6479e-05,\n 1.1880e-04, 7.9835e-05, 1.3712e-04, 1.8092e-04, 4.5249e-04, 2.0178e-04,\n 9.7528e-05, 1.9076e-04, 2.0103e-04, 7.5591e-05, 8.0928e-05, 8.5031e-05,\n 7.5159e-05, 1.7973e-04, 4.2073e-04, 1.5141e-04, 1.2799e-04, 2.2278e-04,\n 9.2718e-05, 4.4179e-05, 2.7707e-04, 1.1651e-04, 1.3489e-04, 4.2370e-04,\n 1.3392e-04, 1.8274e-04, 2.0440e-04, 9.1714e-05, 1.4262e-04, 9.7039e-05,\n 3.5768e-04, 8.2991e-05, 8.8835e-05, 2.6280e-04, 1.5163e-04, 9.7609e-05,\n 1.6682e-04, 1.0741e-04, 7.1708e-05, 9.4124e-05, 1.5912e-04, 1.2243e-04,\n 1.4972e-04, 1.2197e-04, 9.1337e-05, 1.3235e-04, 1.0268e-04, 1.5701e-04,\n 2.0153e-04, 2.4704e-04, 1.1837e-04, 1.4911e-04, 1.8082e-04, 6.0772e-05,\n 1.8118e-04, 1.9609e-04, 1.8680e-04, 2.0598e-04, 3.8925e-04, 9.0047e-05,\n 4.7032e-05, 1.0833e-03, 8.4632e-05, 4.1683e-05, 1.0458e-04, 4.3125e-05,\n 6.8320e-05, 1.2270e-04, 2.3315e-04, 1.2384e-04, 7.0609e-05, 8.4629e-05,\n 3.4362e-04, 7.9066e-05, 9.1709e-05, 2.9072e-04, 1.2121e-04, 1.8641e-04,\n 8.9132e-05, 1.5969e-04, 1.6069e-04, 1.2428e-04, 6.1748e-05, 7.0983e-05,\n 4.3194e-05, 1.1903e-04, 5.0146e-05, 1.0443e-04, 5.2720e-05, 7.6237e-05,\n 8.2963e-05, 5.7589e-07], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(3754.)",
|
| 22 |
+
"exp_avg": "tensor([ 3.1340e-04, 1.5078e-03, -1.5311e-03, 3.3885e-03, -1.2167e-03,\n 1.8104e-03, 1.7608e-03, 2.8365e-04, 3.0480e-03, -1.8589e-03,\n 2.7273e-04, -9.1347e-04, 3.3173e-03, -3.9714e-03, 6.0959e-03,\n -3.1750e-03, 5.5329e-04, -1.1568e-03, -2.0648e-03, 2.0536e-03,\n -6.7329e-04, 1.9627e-03, -3.0343e-04, -3.3205e-03, -6.9707e-04,\n 1.4002e-03, -1.8569e-03, 2.7697e-03, -6.7034e-04, -1.4778e-03,\n -8.2542e-04, 6.8930e-06, -1.1938e-03, 2.9936e-03, -5.2233e-04,\n 1.7714e-03, 1.9536e-03, -3.6594e-04, -1.3797e-03, -6.6086e-04,\n -3.8710e-03, 1.0413e-03, 5.6052e-45, 2.7171e-03, -2.0063e-03,\n 1.1213e-03, 1.9027e-03, -6.1864e-04, -3.3788e-03, 3.2297e-03,\n -2.6606e-03, -1.1606e-03, -5.4530e-04, 2.1993e-03, 4.7747e-04,\n 3.8167e-04, -1.5517e-03, -3.7696e-04, -3.7963e-03, -7.2683e-04,\n 2.6763e-04, -7.7014e-04, -8.5503e-04, 1.9647e-04, 2.0301e-03,\n -2.9644e-03, 4.8538e-05, -1.5710e-03, -1.1697e-03, 2.4692e-03,\n 7.1881e-04, -2.7946e-04, -5.0915e-04, 7.9675e-04, -2.2495e-03,\n -4.6994e-03, 5.3181e-04, -3.4663e-04, 1.2100e-04, 1.4586e-03,\n 1.6341e-03, -7.9583e-04, -9.0918e-04, 1.1837e-03, 6.5911e-04,\n -1.1513e-03, -6.5866e-04, 2.7545e-03, 1.3142e-03, -4.6094e-04,\n 7.0011e-04, 1.5516e-03, 1.2527e-04, 8.3808e-04, 2.5505e-05,\n 1.4103e-03, -1.2191e-03, 7.6654e-04, 3.5095e-03, 5.3562e-05,\n 1.8547e-03, 1.1160e-03, -1.3677e-03, -4.4275e-04, 1.3274e-03,\n -2.3247e-03, -5.0454e-04, 1.9675e-03, 1.0173e-03, -2.5539e-03,\n -2.5292e-03, -7.3418e-04, 2.0791e-03, -1.3743e-03, -1.6409e-03,\n 2.8246e-03, 7.4639e-04, 1.4879e-03, 4.0380e-04, -1.6992e-03,\n 1.8407e-03, 7.0447e-04, 7.0044e-04, -1.9926e-03, -3.6507e-04,\n -3.5151e-03, -2.1012e-03, 1.3068e-03, 1.9629e-03, 6.8924e-04,\n -6.6260e-04, -1.1318e-03, 2.2180e-03, 3.0311e-03, -1.0950e-03,\n 3.7329e-04, 1.1395e-03, -8.1361e-04, 1.2257e-03, -2.7498e-04,\n -1.6332e-03, -1.5663e-03, -2.8554e-03, -1.5686e-03, 1.4305e-03,\n 2.0382e-03, -1.5393e-04, -2.2198e-03, 8.3668e-04, -1.1719e-03,\n 8.1216e-04, 8.9781e-04, -1.3816e-03, 1.5340e-03, -2.5028e-03,\n 7.2589e-04, -5.8839e-04, 1.5652e-04, -4.2265e-04, -3.0473e-03,\n 3.7380e-04, 2.5915e-04, -1.5265e-04, 4.7286e-05, 1.9769e-03,\n 8.2143e-04, 4.4262e-03, 3.4631e-04, 9.1331e-04, 7.7782e-04,\n 1.8175e-03, 2.8886e-03, 9.1487e-05, -2.2841e-03, -2.8497e-03,\n 1.3829e-03, 5.2163e-04, -1.0206e-03, 4.4921e-04, -9.7988e-04,\n -5.4127e-04, 2.2529e-03, -4.7562e-03, 1.1163e-03, 2.5087e-03,\n 1.5737e-04, -2.6210e-03, -4.7379e-04, 2.2997e-03, 1.9262e-03,\n -2.1037e-03, -3.0257e-04, 1.1032e-03, 5.9517e-04, -1.0937e-03,\n 1.1747e-03, 2.3809e-03, 1.7038e-04, -4.0480e-04, -2.2219e-04,\n -6.4624e-04, 6.0702e-04, 3.0526e-03, 4.8754e-04, 9.3667e-04,\n -1.0924e-03, 9.0613e-04, -1.0389e-03, 4.9409e-04, -8.5187e-04,\n 4.8351e-04, -2.2227e-03, 7.5815e-05, -9.8750e-04, 4.2210e-03,\n 2.2675e-03, 9.2186e-05, 5.3745e-04, -2.6832e-04, -1.3157e-03,\n 8.7075e-04, 1.5035e-03, 3.2614e-04, 1.0224e-04, -3.2340e-03,\n -1.3415e-03, 1.4832e-03, 1.4200e-03, -5.8264e-03, -1.1446e-03,\n -1.2193e-03, -2.5077e-03, 6.2689e-04, 2.1236e-03, 1.6122e-03,\n -1.9573e-03, -2.0171e-03, 5.6052e-45, -8.0794e-04, 3.7351e-04,\n 2.2024e-03, -5.5900e-04, -1.3808e-03, -5.7059e-04, -2.4716e-03,\n -4.1422e-05, -5.2759e-04, -1.3827e-03, -1.2282e-03, -3.0077e-04,\n -3.6422e-03, -2.2744e-04, 4.9555e-04, -3.1790e-03, -1.2193e-03,\n -7.1939e-04, 3.8426e-03, 2.8469e-04, -1.0128e-03, 2.9484e-03,\n 1.3805e-03, 1.1725e-03, -9.6891e-04, -1.2614e-04, -3.6874e-03,\n 3.4926e-04, 1.9740e-03, -1.6461e-03, 2.6902e-03, -2.0543e-03,\n 9.0310e-04, 3.1688e-03, 4.0188e-03, -2.1498e-03, 4.2593e-04,\n -1.2021e-03, 3.5675e-04, -6.1591e-04, -2.2420e-05, -1.0240e-03,\n -1.7504e-03, -2.8807e-04, -7.4166e-04, 1.6541e-04, -6.7121e-04,\n -2.1316e-03, -1.0904e-04, -4.4245e-04, -5.0557e-04, -8.0854e-05,\n 3.9512e-04, 2.2559e-03, -2.9807e-03, 2.5036e-03, -1.4486e-04,\n -1.1094e-03, 1.2188e-03, -1.2359e-03, 4.2653e-03, 6.1421e-05,\n 9.4967e-04, -2.3160e-03, -6.3827e-03, 7.5960e-04, -1.5103e-03,\n 5.5231e-04, -4.2421e-03, -4.5140e-04, 1.6701e-03, 3.2534e-03,\n 2.2249e-03, 9.5161e-04, -1.5469e-03, 7.4200e-04, -6.6196e-04,\n 4.3263e-03, 3.4745e-03, 3.7738e-03, -2.3658e-03, -2.6128e-03,\n 2.5118e-03, 2.2939e-04, 4.8026e-04, 7.7495e-04, 3.1758e-03,\n -1.3887e-04, -1.7748e-03, -1.4363e-03, -8.1634e-05, 5.6094e-04,\n -7.0101e-04, 3.2539e-04, 2.6257e-03, 1.6092e-03, 4.1078e-03,\n -8.7386e-04, 2.2655e-03, 1.0350e-03, 6.9178e-04, -1.1001e-03,\n -1.4457e-03, 4.1984e-03, 3.4724e-04, 1.3709e-03, 3.3049e-03,\n -2.0050e-03, -2.0017e-03, -4.1468e-04, -3.3816e-03, 1.7064e-03,\n -1.1192e-03, 5.4087e-05, 1.4356e-04, -3.8651e-03, 4.0134e-03,\n 1.2850e-05, -1.5805e-04, 6.3360e-04, -7.1627e-05, -3.9647e-04,\n 3.1872e-04, 2.1834e-03, 3.4697e-03, 4.4842e-03, -3.6719e-03,\n 1.2976e-03, -2.6140e-04, -4.8661e-04, 1.0079e-03, 7.6085e-04,\n 5.4187e-04, -2.3166e-03, -5.7891e-04, -5.9186e-04, -9.0399e-04,\n -2.3343e-03, 2.1573e-03, -1.8247e-03, 8.0797e-04, -3.0468e-03,\n 3.0873e-03, -4.8563e-05, 1.3962e-03, 4.8474e-04, -3.0042e-03,\n 5.7274e-04, 7.4856e-04, 4.9690e-04, 2.7544e-04, -2.2524e-03,\n -5.4601e-04, -8.3045e-04, 1.9940e-03, 9.5628e-04, -1.8061e-03,\n -3.3951e-05, 1.1408e-03, -8.5350e-04, -6.2222e-04, 8.9245e-04,\n -2.8325e-03, 4.5162e-04, -2.3351e-04, 9.8039e-05, 2.3589e-03,\n 6.3249e-04, -2.2450e-03, 3.6130e-05, -1.7863e-03, -1.4932e-03,\n 1.0980e-03, -2.0726e-04, 1.9900e-03, -6.8766e-03, 1.1978e-03,\n 4.4160e-03, 9.2694e-04, 3.6603e-03, -2.5570e-03, 6.4829e-04,\n 2.1436e-03, -5.6722e-04, -4.6069e-04, 1.9338e-03, 2.9650e-03,\n -6.8450e-04, 5.0524e-03, -2.2677e-03, -1.2260e-03, 1.0954e-04,\n 3.6933e-04, -2.1539e-03, -2.1214e-03, 3.2411e-03, 7.6650e-04,\n 1.2741e-03, 6.1480e-05, 2.9511e-04, 2.4374e-03, -5.3448e-04,\n 5.5773e-03, 2.0896e-03, 2.6139e-03, -7.0862e-03, 8.8690e-04,\n -1.9266e-03, -3.6813e-03, 1.7478e-03, -5.6308e-03, -1.1697e-03,\n -2.5246e-04, -3.0015e-03, 1.2168e-03, 5.3133e-03, -9.8434e-04,\n 2.9913e-03, 5.7846e-04, -3.9288e-03, 3.5435e-04, -6.0750e-04,\n -2.7148e-03, -1.9133e-04, 2.5937e-03, 9.7592e-04, 2.3911e-03,\n -3.1085e-03, -4.6123e-04, 4.1392e-03, -2.2295e-03, 2.2290e-03,\n -1.6726e-03, 6.7735e-03, -1.1915e-03, 2.7187e-03, 1.5680e-03,\n -3.0552e-03, 2.9305e-03, -2.4060e-04, 2.8701e-03, 6.7574e-04,\n 2.7222e-03, -1.1957e-03, 8.8080e-04, -7.2015e-04, 9.8515e-04,\n -3.0090e-03, 6.2000e-04, 1.3742e-03, -1.2307e-03, -2.5617e-03,\n 3.3954e-05, 2.1705e-03, -1.3264e-03, -1.0963e-04, -5.0142e-04,\n 5.0188e-04, -5.2228e-04, 1.1891e-04, 8.0096e-04, 9.3244e-04,\n -5.7166e-05, 3.1716e-03, -1.6352e-03, 7.8608e-04, 1.7404e-03,\n -3.2925e-04, -1.3053e-03, -2.1276e-03, -9.8902e-04, 2.2755e-03,\n 5.9436e-04, 5.6052e-45], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([5.7775e-05, 5.9874e-05, 4.8117e-05, 5.2359e-05, 3.8917e-05, 8.9920e-05,\n 5.1191e-05, 1.0419e-04, 8.0808e-05, 7.9185e-05, 1.3717e-04, 1.0651e-04,\n 1.1119e-04, 4.4843e-05, 9.6851e-05, 6.2945e-05, 4.3311e-05, 7.0788e-05,\n 5.7149e-05, 9.4629e-05, 1.0014e-04, 4.7240e-05, 4.6569e-05, 7.9334e-05,\n 1.6506e-04, 8.1834e-05, 6.5864e-05, 8.9627e-05, 8.2663e-05, 7.0700e-05,\n 5.1779e-05, 7.1097e-05, 8.4501e-05, 5.1453e-05, 3.4875e-05, 6.0735e-05,\n 3.3072e-05, 3.6465e-05, 3.5467e-05, 4.5981e-05, 3.3229e-05, 7.2411e-05,\n 2.8722e-08, 2.1343e-04, 1.3329e-04, 4.8522e-05, 7.8888e-05, 2.2890e-05,\n 1.2252e-04, 1.6927e-04, 1.5021e-04, 3.3062e-05, 4.5198e-05, 6.7798e-05,\n 9.5175e-05, 3.5527e-05, 7.5702e-05, 6.1683e-05, 5.5832e-05, 2.9225e-05,\n 9.7505e-05, 4.1261e-05, 6.6530e-05, 4.7636e-05, 3.9579e-05, 5.4403e-05,\n 5.4750e-05, 3.6884e-05, 6.7715e-05, 4.8072e-05, 8.2722e-05, 7.4256e-05,\n 7.3702e-05, 7.1169e-05, 4.1910e-05, 5.3171e-05, 6.0016e-05, 4.3193e-05,\n 8.4910e-05, 1.0449e-04, 1.0019e-04, 5.8106e-05, 5.5678e-05, 2.2881e-05,\n 8.7020e-05, 9.8484e-05, 7.0018e-05, 8.3585e-05, 3.2474e-05, 1.0770e-04,\n 5.6623e-05, 6.4238e-05, 8.2756e-05, 6.0562e-05, 9.5516e-05, 5.8306e-05,\n 8.2546e-05, 1.6380e-04, 7.1499e-05, 9.1250e-05, 6.2368e-05, 1.0171e-04,\n 7.7311e-05, 5.5167e-05, 7.4493e-05, 1.2881e-04, 5.8689e-05, 2.2202e-04,\n 4.4922e-05, 6.3746e-05, 9.1191e-05, 1.0153e-04, 6.6953e-05, 5.8312e-05,\n 7.9287e-05, 4.2503e-05, 8.6490e-05, 8.3136e-05, 5.3677e-05, 8.7738e-05,\n 4.1772e-05, 8.7564e-05, 1.1891e-04, 1.3117e-04, 1.2199e-04, 1.2630e-04,\n 5.5056e-05, 7.1395e-05, 1.3256e-04, 1.6801e-04, 6.9904e-05, 5.0273e-05,\n 8.8885e-05, 4.1732e-05, 2.8390e-04, 4.5917e-05, 1.1025e-04, 8.9996e-05,\n 4.2339e-05, 4.0211e-05, 3.7263e-05, 5.4131e-05, 7.4514e-05, 1.1841e-04,\n 5.9152e-05, 5.8646e-05, 2.7544e-05, 9.6109e-05, 5.8853e-05, 1.2295e-04,\n 2.6297e-05, 1.1049e-04, 3.7672e-05, 9.0311e-05, 4.0717e-05, 4.7833e-05,\n 1.1438e-04, 8.9505e-05, 7.4469e-05, 7.8302e-05, 7.7510e-05, 2.6127e-05,\n 1.0591e-04, 4.4119e-05, 4.5328e-05, 1.2874e-04, 7.4008e-05, 3.7101e-05,\n 4.2985e-05, 4.5484e-05, 3.8495e-05, 8.0181e-05, 5.1170e-05, 9.3157e-05,\n 7.1344e-05, 4.0644e-05, 1.3256e-04, 5.0695e-05, 4.4945e-05, 8.4517e-05,\n 5.8934e-05, 6.7184e-05, 1.8621e-04, 7.7935e-05, 1.6822e-04, 7.0330e-05,\n 1.4832e-04, 7.0167e-05, 5.3744e-05, 2.9100e-05, 5.6101e-05, 6.9499e-05,\n 6.0772e-05, 5.3497e-05, 4.6090e-05, 2.7011e-05, 6.8546e-05, 7.4126e-05,\n 1.8717e-05, 2.4321e-05, 5.9902e-05, 6.3421e-05, 6.0144e-05, 3.3968e-05,\n 2.6122e-05, 4.9254e-05, 4.9786e-05, 3.6443e-05, 8.1198e-05, 2.8272e-05,\n 8.2894e-05, 7.7029e-05, 2.7587e-05, 4.6796e-05, 7.3945e-05, 6.1647e-05,\n 3.2974e-05, 1.2608e-04, 5.1593e-05, 9.8143e-05, 4.0294e-05, 7.7343e-05,\n 5.9515e-05, 4.1987e-05, 5.8544e-05, 9.7737e-05, 3.7165e-05, 3.7168e-05,\n 3.3105e-05, 5.5165e-05, 4.4698e-05, 4.0011e-05, 1.1847e-04, 7.8282e-05,\n 3.0229e-05, 4.9250e-05, 7.4317e-05, 8.7794e-08, 5.0170e-05, 1.9341e-04,\n 4.5542e-05, 1.1526e-04, 8.7580e-05, 5.4919e-05, 1.0013e-04, 9.6204e-05,\n 5.7385e-05, 5.1779e-05, 7.2822e-05, 4.9767e-05, 1.2016e-04, 6.3748e-05,\n 6.5375e-05, 1.0338e-04, 4.0576e-05, 6.5198e-05, 7.0453e-05, 5.3098e-05,\n 4.9110e-05, 6.8141e-05, 4.8591e-05, 6.3868e-05, 4.9997e-05, 5.9345e-05,\n 4.8554e-05, 7.9813e-05, 6.1031e-05, 7.1874e-05, 4.9870e-05, 8.8742e-05,\n 1.0329e-04, 1.0788e-04, 4.9046e-05, 7.5020e-05, 3.2601e-05, 3.3645e-05,\n 6.7210e-05, 8.5507e-05, 6.3273e-05, 5.9405e-05, 5.7393e-05, 4.7760e-05,\n 4.9751e-05, 7.8574e-05, 5.5272e-05, 9.8844e-05, 5.8540e-05, 3.1554e-05,\n 1.0872e-04, 7.9654e-05, 2.8351e-05, 2.8681e-05, 7.8624e-05, 3.2579e-05,\n 7.6935e-05, 4.1567e-05, 5.3146e-05, 9.1068e-05, 1.7118e-04, 8.0117e-05,\n 4.0731e-05, 5.1622e-05, 6.9485e-05, 4.8427e-05, 6.9690e-05, 4.9266e-05,\n 4.2339e-05, 5.8445e-05, 1.5315e-04, 9.8939e-05, 9.4759e-05, 6.0179e-05,\n 5.0252e-05, 4.6996e-05, 5.0974e-05, 7.3627e-05, 6.7056e-05, 5.1020e-05,\n 4.9920e-05, 8.2858e-05, 2.0566e-04, 1.0595e-04, 7.1900e-05, 8.4446e-05,\n 1.2197e-04, 1.4606e-04, 7.7174e-05, 1.1527e-04, 5.4717e-05, 6.9641e-05,\n 8.1101e-05, 6.5048e-05, 2.0627e-05, 5.9023e-05, 5.5875e-05, 5.4138e-05,\n 4.2584e-05, 2.9370e-05, 7.9525e-05, 6.0018e-05, 6.6697e-05, 4.4612e-05,\n 6.9975e-05, 1.0226e-04, 6.1955e-05, 1.2338e-04, 9.1619e-05, 6.5370e-05,\n 7.1661e-05, 9.2120e-05, 7.0379e-05, 5.7387e-05, 6.0877e-05, 3.9749e-05,\n 5.7846e-05, 5.7072e-05, 9.2430e-05, 3.6716e-05, 3.5497e-05, 2.8143e-05,\n 3.7162e-05, 1.7782e-04, 1.4098e-04, 9.9311e-05, 6.2984e-05, 7.0965e-05,\n 3.1909e-05, 1.0323e-04, 3.2148e-05, 3.4467e-05, 5.4845e-05, 7.0417e-05,\n 6.4225e-05, 6.8271e-05, 7.3519e-05, 5.1445e-05, 4.5691e-05, 3.7813e-05,\n 6.2548e-05, 4.7319e-05, 5.7006e-05, 7.0050e-05, 7.5697e-05, 4.1106e-05,\n 7.2771e-05, 9.2490e-05, 5.8657e-05, 7.1899e-05, 5.2656e-05, 5.0626e-05,\n 5.1042e-05, 3.8791e-05, 8.5801e-05, 1.1665e-04, 5.4388e-05, 3.9468e-05,\n 4.7276e-05, 1.0528e-04, 5.1830e-05, 1.4936e-04, 7.7743e-05, 3.9952e-05,\n 9.7639e-05, 5.0267e-05, 6.4817e-05, 4.6773e-05, 1.6118e-04, 3.3859e-05,\n 6.9132e-05, 9.2100e-05, 7.3235e-05, 3.5615e-05, 1.3092e-04, 7.4340e-05,\n 5.2094e-05, 1.0607e-04, 1.2731e-04, 9.2919e-05, 8.3195e-05, 7.1780e-05,\n 6.6972e-05, 4.7259e-05, 8.2972e-05, 7.0127e-05, 1.0310e-04, 6.0314e-05,\n 7.7104e-05, 7.4912e-05, 8.8689e-05, 4.8173e-05, 3.6205e-05, 4.9865e-05,\n 5.8406e-05, 8.7656e-05, 8.9869e-05, 7.2037e-05, 7.8681e-05, 7.5608e-05,\n 4.4982e-05, 3.4190e-05, 1.1809e-04, 7.1277e-05, 6.1148e-05, 2.8026e-04,\n 7.8150e-05, 7.3643e-05, 8.9949e-05, 6.8989e-05, 7.7406e-05, 6.1029e-05,\n 1.0113e-04, 5.8124e-05, 5.7790e-05, 1.7492e-04, 8.1495e-05, 4.5550e-05,\n 1.2395e-04, 5.4408e-05, 2.8579e-05, 6.0011e-05, 1.0140e-04, 5.8585e-05,\n 6.7968e-05, 6.7238e-05, 5.6452e-05, 4.6688e-05, 4.6518e-05, 7.1716e-05,\n 8.1502e-05, 9.1541e-05, 6.9948e-05, 1.6485e-04, 8.5379e-05, 6.7466e-05,\n 1.1545e-04, 8.5187e-05, 8.0927e-05, 7.0673e-05, 9.5128e-05, 3.7612e-05,\n 3.7216e-05, 1.2844e-04, 6.5492e-05, 3.1144e-05, 4.7219e-05, 5.7755e-05,\n 6.3314e-05, 5.8740e-05, 9.3723e-05, 6.7376e-05, 5.0737e-05, 4.6063e-05,\n 1.0468e-04, 5.5377e-05, 4.5400e-05, 8.4308e-05, 6.6907e-05, 7.2448e-05,\n 4.8104e-05, 5.9138e-05, 6.5183e-05, 5.1948e-05, 3.6081e-05, 4.5641e-05,\n 3.4692e-05, 5.1606e-05, 3.9775e-05, 6.4267e-05, 4.8518e-05, 5.5005e-05,\n 4.9105e-05, 1.5465e-08], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(3754.)",
|
| 27 |
+
"exp_avg": "tensor([[-1.9617e-05, 2.8756e-05, -9.8689e-06, ..., -5.6574e-06,\n -2.1713e-05, 5.6052e-45],\n [ 2.9087e-05, -3.2945e-05, -1.7236e-05, ..., -8.4623e-05,\n -1.7703e-05, 5.6052e-45],\n [ 1.1267e-05, -4.3230e-05, 2.2005e-05, ..., -1.0261e-05,\n 8.0665e-06, 5.6052e-45],\n ...,\n [ 2.0696e-05, -6.1361e-06, -4.2285e-06, ..., 1.2770e-05,\n -2.9851e-05, -5.6052e-45],\n [-4.3188e-05, 1.1244e-04, 2.9648e-05, ..., -1.6626e-05,\n -1.9183e-05, -5.6052e-45],\n [-3.9667e-05, 3.0284e-05, 2.5185e-05, ..., -4.1005e-05,\n 1.3469e-05, -5.6052e-45]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[1.6226e-08, 4.3109e-08, 1.7259e-08, ..., 3.3404e-08, 1.3368e-08,\n 6.0439e-09],\n [3.0269e-08, 5.8670e-08, 4.5318e-08, ..., 8.0789e-08, 4.8954e-08,\n 1.9742e-09],\n [2.9695e-08, 4.3224e-08, 5.2375e-08, ..., 6.2671e-08, 2.0862e-08,\n 1.9311e-09],\n ...,\n [3.3943e-08, 8.2636e-08, 3.8875e-08, ..., 9.2814e-08, 2.7578e-08,\n 4.4711e-09],\n [3.1284e-08, 1.2045e-07, 3.5332e-08, ..., 7.2981e-08, 4.5597e-08,\n 5.9649e-09],\n [4.4203e-08, 5.8177e-08, 3.2561e-08, ..., 7.7554e-08, 3.9316e-08,\n 3.0947e-09]], device='cuda:0')"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"param_groups": [
|
| 32 |
+
{
|
| 33 |
+
"lr": 0.00975530705321762,
|
| 34 |
+
"name": "shared",
|
| 35 |
+
"betas": [
|
| 36 |
+
0.9,
|
| 37 |
+
0.999
|
| 38 |
+
],
|
| 39 |
+
"eps": 1e-08,
|
| 40 |
+
"weight_decay": 1e-05,
|
| 41 |
+
"amsgrad": false,
|
| 42 |
+
"maximize": false,
|
| 43 |
+
"foreach": null,
|
| 44 |
+
"capturable": false,
|
| 45 |
+
"differentiable": false,
|
| 46 |
+
"fused": null,
|
| 47 |
+
"decoupled_weight_decay": true,
|
| 48 |
+
"initial_lr": 0.01,
|
| 49 |
+
"params": [
|
| 50 |
+
0,
|
| 51 |
+
1,
|
| 52 |
+
2,
|
| 53 |
+
3
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"lr": 0.00975530705321762,
|
| 58 |
+
"name": "scale_256",
|
| 59 |
+
"betas": [
|
| 60 |
+
0.9,
|
| 61 |
+
0.999
|
| 62 |
+
],
|
| 63 |
+
"eps": 1e-08,
|
| 64 |
+
"weight_decay": 1e-05,
|
| 65 |
+
"amsgrad": false,
|
| 66 |
+
"maximize": false,
|
| 67 |
+
"foreach": null,
|
| 68 |
+
"capturable": false,
|
| 69 |
+
"differentiable": false,
|
| 70 |
+
"fused": null,
|
| 71 |
+
"decoupled_weight_decay": true,
|
| 72 |
+
"initial_lr": 0.01,
|
| 73 |
+
"params": [
|
| 74 |
+
4
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"lr": 0.00975530705321762,
|
| 79 |
+
"name": "scale_512",
|
| 80 |
+
"betas": [
|
| 81 |
+
0.9,
|
| 82 |
+
0.999
|
| 83 |
+
],
|
| 84 |
+
"eps": 1e-08,
|
| 85 |
+
"weight_decay": 1e-05,
|
| 86 |
+
"amsgrad": false,
|
| 87 |
+
"maximize": false,
|
| 88 |
+
"foreach": null,
|
| 89 |
+
"capturable": false,
|
| 90 |
+
"differentiable": false,
|
| 91 |
+
"fused": null,
|
| 92 |
+
"decoupled_weight_decay": true,
|
| 93 |
+
"initial_lr": 0.01,
|
| 94 |
+
"params": [
|
| 95 |
+
5
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"lr": 0.004877665762479736,
|
| 100 |
+
"name": "fusion",
|
| 101 |
+
"betas": [
|
| 102 |
+
0.9,
|
| 103 |
+
0.999
|
| 104 |
+
],
|
| 105 |
+
"eps": 1e-08,
|
| 106 |
+
"weight_decay": 1e-05,
|
| 107 |
+
"amsgrad": false,
|
| 108 |
+
"maximize": false,
|
| 109 |
+
"foreach": null,
|
| 110 |
+
"capturable": false,
|
| 111 |
+
"differentiable": false,
|
| 112 |
+
"fused": null,
|
| 113 |
+
"decoupled_weight_decay": true,
|
| 114 |
+
"initial_lr": 0.005,
|
| 115 |
+
"params": [
|
| 116 |
+
6
|
| 117 |
+
]
|
| 118 |
+
}
|
| 119 |
+
]
|
| 120 |
+
},
|
| 121 |
+
"scheduler_state_dict": {
|
| 122 |
+
"T_0": 10,
|
| 123 |
+
"T_i": 10,
|
| 124 |
+
"T_mult": 2,
|
| 125 |
+
"eta_min": 1e-06,
|
| 126 |
+
"T_cur": 1,
|
| 127 |
+
"base_lrs": [
|
| 128 |
+
0.01,
|
| 129 |
+
0.01,
|
| 130 |
+
0.01,
|
| 131 |
+
0.005
|
| 132 |
+
],
|
| 133 |
+
"last_epoch": 1,
|
| 134 |
+
"_step_count": 0,
|
| 135 |
+
"_is_initial": false,
|
| 136 |
+
"_get_lr_called_within_step": false,
|
| 137 |
+
"_last_lr": [
|
| 138 |
+
0.00975530705321762,
|
| 139 |
+
0.00975530705321762,
|
| 140 |
+
0.00975530705321762,
|
| 141 |
+
0.004877665762479736
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
"metrics": {
|
| 145 |
+
"best_val_acc": 61.635333333333335,
|
| 146 |
+
"best_epoch": 0,
|
| 147 |
+
"scale_accuracies": {
|
| 148 |
+
"256": 61.635333333333335
|
| 149 |
+
},
|
| 150 |
+
"training_history": {
|
| 151 |
+
"epochs": [
|
| 152 |
+
1
|
| 153 |
+
],
|
| 154 |
+
"train_loss": [
|
| 155 |
+
3.9435249049420933
|
| 156 |
+
],
|
| 157 |
+
"train_acc": [
|
| 158 |
+
54.38726307083047
|
| 159 |
+
],
|
| 160 |
+
"val_acc": [
|
| 161 |
+
61.635333333333335
|
| 162 |
+
],
|
| 163 |
+
"scale_accs": {
|
| 164 |
+
"256": [
|
| 165 |
+
61.635333333333335
|
| 166 |
+
]
|
| 167 |
+
},
|
| 168 |
+
"lr": [
|
| 169 |
+
0.00975530705321762
|
| 170 |
+
]
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
"train_config": {
|
| 174 |
+
"name": "david_training",
|
| 175 |
+
"run_id": "20251012_235237",
|
| 176 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 177 |
+
"model_variant": [
|
| 178 |
+
"clip_vit_b16",
|
| 179 |
+
"clip_vit_laion_b32",
|
| 180 |
+
"clip_vit_b32"
|
| 181 |
+
],
|
| 182 |
+
"num_classes": 1000,
|
| 183 |
+
"preset": "small_fast",
|
| 184 |
+
"custom_config_path": null,
|
| 185 |
+
"num_classes_override": null,
|
| 186 |
+
"use_belly_override": null,
|
| 187 |
+
"belly_expand_override": null,
|
| 188 |
+
"progressive_training_override": true,
|
| 189 |
+
"scale_warmup_epochs_override": {
|
| 190 |
+
"256": 0,
|
| 191 |
+
"512": 2
|
| 192 |
+
},
|
| 193 |
+
"num_epochs": 10,
|
| 194 |
+
"batch_size": 1024,
|
| 195 |
+
"learning_rate": 0.01,
|
| 196 |
+
"weight_decay": 1e-05,
|
| 197 |
+
"warmup_epochs": 3,
|
| 198 |
+
"use_rose_loss": true,
|
| 199 |
+
"rose_initial_weight": 0.1,
|
| 200 |
+
"rose_max_weight": 0.8,
|
| 201 |
+
"rose_weight_schedule": "adaptive",
|
| 202 |
+
"use_cayley_loss": false,
|
| 203 |
+
"cayley_weight": 0.01,
|
| 204 |
+
"scale_loss_balance": null,
|
| 205 |
+
"use_mixed_precision": false,
|
| 206 |
+
"gradient_clip": 15.0,
|
| 207 |
+
"scheduler_type": "cosine_restarts",
|
| 208 |
+
"min_lr": 1e-06,
|
| 209 |
+
"freeze_strategy": "never",
|
| 210 |
+
"freeze_threshold": 90.0,
|
| 211 |
+
"unfreeze_on_plateau": true,
|
| 212 |
+
"patience": 10,
|
| 213 |
+
"track_gradients": true,
|
| 214 |
+
"gradient_scale_threshold": 1e-05,
|
| 215 |
+
"gradient_scale_multiplier": 10.0,
|
| 216 |
+
"log_interval": 50,
|
| 217 |
+
"val_interval": 1,
|
| 218 |
+
"save_interval": 5,
|
| 219 |
+
"log_fusion_weights": true,
|
| 220 |
+
"log_loss_components": true,
|
| 221 |
+
"save_format": "safetensors",
|
| 222 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 223 |
+
"upload_to_hub": true,
|
| 224 |
+
"base_dir": "./david_training",
|
| 225 |
+
"num_workers": 10,
|
| 226 |
+
"pin_memory": true,
|
| 227 |
+
"prefetch_factor": 4,
|
| 228 |
+
"persistent_workers": true
|
| 229 |
+
}
|
| 230 |
+
}
|