{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:31.883900Z", "iopub.status.busy": "2024-05-01T20:02:31.883672Z", "iopub.status.idle": "2024-05-01T20:02:31.887224Z", "shell.execute_reply": "2024-05-01T20:02:31.886473Z", "shell.execute_reply.started": "2024-05-01T20:02:31.883877Z" }, "id": "i9FKaBPLQEqo", "tags": [] }, "outputs": [], "source": [ "# !pip install transformers==4.40.1\n", "# !pip install pymorphy2\n", "# !pip install evaluate\n", "# !pip install wordclouda" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-01T20:02:31.888519Z", "iopub.status.busy": "2024-05-01T20:02:31.888272Z", "iopub.status.idle": "2024-05-01T20:02:37.159457Z", "shell.execute_reply": "2024-05-01T20:02:37.158362Z", "shell.execute_reply.started": "2024-05-01T20:02:31.888495Z" }, "id": "YIm8hJ6Pg4Mi", "outputId": "1d7505d8-393e-484f-db12-120ca6e38a44", "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to\n", "[nltk_data] /home/appuser/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ] } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from wordcloud import WordCloud\n", "\n", "import numpy as np\n", "import pickle\n", "from tqdm.notebook import tqdm\n", "tqdm.pandas()\n", "\n", "import pymorphy2\n", "import string\n", "import re\n", "import nltk\n", "nltk.download('stopwords')\n", "from nltk.corpus import stopwords\n", "\n", "import evaluate\n", "from torch.utils.data import DataLoader, TensorDataset, Dataset\n", "from sklearn.model_selection import train_test_split\n", "from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, TrainingArguments, Trainer\n", "import torch\n", "import torch.nn as nn" ] }, { "cell_type": "markdown", "source": [ "# Подготовка данных" ], "metadata": { "id": "TTMcDFwneX03" } }, { "cell_type": "markdown", "source": [ "## Подготовка текста" ], "metadata": { "id": "DiMWNBAzecLr" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "840WjRyCqwKs", "tags": [] }, "outputs": [], "source": [ "posts = pd.concat([pd.read_csv('posts_0-30000.csv'),\n", " pd.read_csv('posts_0-30000 (1).csv'),\n", " pd.read_csv('posts_0-30000 (2).csv'),\n", " pd.read_csv('posts_0-30000 (3).csv'),\n", " pd.read_csv('posts_0-30000 (4).csv')])\\\n", " .drop('Unnamed: 0', axis = 1).drop_duplicates().dropna()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "PKtDmuFir0Fp", "tags": [] }, "outputs": [], "source": [ "posts.head(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ZBNNvtlR3DlN", "tags": [] }, "outputs": [], "source": [ "rus_stopwords = stopwords.words('russian')\n", "morph = pymorphy2.MorphAnalyzer(probability_estimator_cls=None)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "42rkxqpb3DWZ", "tags": [] }, "outputs": [], "source": [ "def remove_stopwords(txt):\n", " s = ''\n", " txt = txt.strip()\n", " txt = txt.translate(str.maketrans({key: \" {0} \".format(key) for key in string.punctuation}))\n", " txt = re.sub(r'[^\\w\\s]|\\n', ' ', txt)\n", " txt = txt.lower()\n", " txt = re.sub('[^а-яА-ЯёЁ*\\W]',' ',txt)\n", " for word in txt.split():\n", " word = morph.parse(word)[0].normal_form\n", " if word not in rus_stopwords:\n", " if word not in ['также', 'весь', 'это', 'который', 'иза', 'еще', 'ещё', 'ее', 'её', 'свой']:\n", " s = s+ word + ' '\n", " s = s[:-1]\n", " return s" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2xg8k_Ec2_Vf", "tags": [] }, "outputs": [], "source": [ "posts['text_norm'] = [remove_stopwords(i) for i in tqdm(posts['text'])]" ] }, { "cell_type": "markdown", "source": [ "## Подготовка целевой переменной" ], "metadata": { "id": "kWA_ZTx5enUW" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 501 }, "execution": { "iopub.execute_input": "2024-05-01T20:02:40.727856Z", "iopub.status.busy": "2024-05-01T20:02:40.726941Z", "iopub.status.idle": "2024-05-01T20:02:40.799382Z", "shell.execute_reply": "2024-05-01T20:02:40.798725Z", "shell.execute_reply.started": "2024-05-01T20:02:40.727810Z" }, "outputId": "1c16a1c6-ac5a-4226-c6e6-b25be91c7ec4", "tags": [], "id": "MO82YrWBcuqO" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idowner_iddateviewslikesrepoststexttext_normконверсияlen_text
2225143638732-403167051653559978350561197923819В сети завирусилась речь британского епископа ...сеть завируситься речь британский епископ рича...0.07824658
1860445144079-403167051663849380199370131482384Толпы добровольцев в Чечне идут к военкоматам ...толпа доброволец чечня идти военкомат объявить...0.08986310
2279743389527-403167051652094360255504211611354В Норильске, несмотря на метель, жители вышли ...норильск несмотря метель житель выйти отпраздн...0.09341920
2178843811107-403167051654853750334553245453757В небе над подмосковным Серпуховом самолётами ...небо подмосковный серпухов самолёт написать ро...0.09582623
375764788459-2628406416701381281971752404Более 4 тыс. световых декоративных конструкций...тыс светов декоративный конструкция украсить с...0.26616631
\n", "
" ], "text/plain": [ " id owner_id date views likes reposts \\\n", "22251 43638732 -40316705 1653559978 350561 19792 3819 \n", "18604 45144079 -40316705 1663849380 199370 13148 2384 \n", "22797 43389527 -40316705 1652094360 255504 21161 1354 \n", "21788 43811107 -40316705 1654853750 334553 24545 3757 \n", "37576 4788459 -26284064 1670138128 19717 5240 4 \n", "\n", " text \\\n", "22251 В сети завирусилась речь британского епископа ... \n", "18604 Толпы добровольцев в Чечне идут к военкоматам ... \n", "22797 В Норильске, несмотря на метель, жители вышли ... \n", "21788 В небе над подмосковным Серпуховом самолётами ... \n", "37576 Более 4 тыс. световых декоративных конструкций... \n", "\n", " text_norm конверсия len_text \n", "22251 сеть завируситься речь британский епископ рича... 0.078246 58 \n", "18604 толпа доброволец чечня идти военкомат объявить... 0.089863 10 \n", "22797 норильск несмотря метель житель выйти отпраздн... 0.093419 20 \n", "21788 небо подмосковный серпухов самолёт написать ро... 0.095826 23 \n", "37576 тыс светов декоративный конструкция украсить с... 0.266166 31 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "posts['конверсия'] = (2*(posts.reposts.astype(float)) + posts.likes.astype(float)) / posts.views.astype(float)\n", "posts['конверсия'] = posts['конверсия'].fillna(0)\n", "posts.sort_values('конверсия').tail(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:40.807099Z", "iopub.status.busy": "2024-05-01T20:02:40.806672Z", "iopub.status.idle": "2024-05-01T20:02:41.241809Z", "shell.execute_reply": "2024-05-01T20:02:41.240678Z", "shell.execute_reply.started": "2024-05-01T20:02:40.807076Z" }, "id": "sc8yhzL8ecVf", "tags": [] }, "outputs": [], "source": [ "posts['len_text'] = [len(i.split()) for i in posts['text_norm']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:41.244031Z", "iopub.status.busy": "2024-05-01T20:02:41.243189Z", "iopub.status.idle": "2024-05-01T20:02:41.291067Z", "shell.execute_reply": "2024-05-01T20:02:41.290053Z", "shell.execute_reply.started": "2024-05-01T20:02:41.244000Z" }, "id": "vZZaDQq_Uc9U", "tags": [] }, "outputs": [], "source": [ "posts = posts[posts['len_text'] >= 5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-01T20:02:41.297591Z", "iopub.status.busy": "2024-05-01T20:02:41.297336Z", "iopub.status.idle": "2024-05-01T20:02:41.317651Z", "shell.execute_reply": "2024-05-01T20:02:41.316914Z", "shell.execute_reply.started": "2024-05-01T20:02:41.297568Z" }, "id": "GhLwJtjzecQE", "outputId": "83b446ea-1c84-465f-84e8-f95d6f9787f6", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "count 137722.000000\n", "mean 29.740906\n", "std 24.654668\n", "min 5.000000\n", "10% 9.000000\n", "25% 15.000000\n", "35% 18.000000\n", "50% 22.000000\n", "60% 26.000000\n", "70% 32.000000\n", "75% 37.000000\n", "80% 42.000000\n", "90% 59.000000\n", "95% 76.000000\n", "99% 122.000000\n", "max 815.000000\n", "Name: len_text, dtype: float64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "posts['len_text'].describe([.1,.25, .35, .5, .6, 0.7, .75, .8, .9, .95, .99])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-01T20:02:41.339036Z", "iopub.status.busy": "2024-05-01T20:02:41.338518Z", "iopub.status.idle": "2024-05-01T20:02:41.354420Z", "shell.execute_reply": "2024-05-01T20:02:41.353683Z", "shell.execute_reply.started": "2024-05-01T20:02:41.339011Z" }, "id": "dz_Xru12eb-s", "outputId": "b958b987-9b5d-4017-8352-e48f8a28a929", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "count 137722.000000\n", "mean 0.005406\n", "std 0.005915\n", "min 0.000000\n", "25% 0.001929\n", "50% 0.003531\n", "75% 0.006577\n", "max 0.266166\n", "Name: конверсия, dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "posts['конверсия'].describe()" ] }, { "cell_type": "markdown", "source": [ "# Загрузка модели" ], "metadata": { "id": "eq7x1M_yeIHI" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-01T20:02:41.356005Z", "iopub.status.busy": "2024-05-01T20:02:41.355355Z", "iopub.status.idle": "2024-05-01T20:02:41.389901Z", "shell.execute_reply": "2024-05-01T20:02:41.389093Z", "shell.execute_reply.started": "2024-05-01T20:02:41.355981Z" }, "id": "xyczUzaNPqcd", "outputId": "697738dd-ad4a-4288-8a5c-03d639f49668", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "device(type='cuda', index=0)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "device" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-01T20:02:41.391576Z", "iopub.status.busy": "2024-05-01T20:02:41.391029Z", "iopub.status.idle": "2024-05-01T20:02:44.597511Z", "shell.execute_reply": "2024-05-01T20:02:44.596308Z", "shell.execute_reply.started": "2024-05-01T20:02:41.391548Z" }, "id": "lqe7kH0nKnxB", "outputId": "96222ae2-f823-48ba-f768-bb7e6544cb59", "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-multilingual-cased')\n", "model = AutoModelForSequenceClassification.from_pretrained('distilbert/distilbert-base-multilingual-cased', num_labels = 1)\n", "model = model.to(device)" ] }, { "cell_type": "markdown", "source": [ "# Разбиение данных" ], "metadata": { "id": "B-fhkjXOeCJ7" } }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:44.599767Z", "iopub.status.busy": "2024-05-01T20:02:44.599029Z", "iopub.status.idle": "2024-05-01T20:02:44.850977Z", "shell.execute_reply": "2024-05-01T20:02:44.849916Z", "shell.execute_reply.started": "2024-05-01T20:02:44.599735Z" }, "id": "1B1KCZuTdviE", "tags": [] }, "outputs": [], "source": [ "posts_train, posts_test = train_test_split(posts, test_size = 0.3, random_state=21)\n", "posts_test, posts_eval = train_test_split(posts_test, test_size = 0.5, random_state=21)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:44.869804Z", "iopub.status.busy": "2024-05-01T20:02:44.869257Z", "iopub.status.idle": "2024-05-01T20:02:44.876424Z", "shell.execute_reply": "2024-05-01T20:02:44.875657Z", "shell.execute_reply.started": "2024-05-01T20:02:44.869778Z" }, "tags": [], "id": "PPYLAZU6cuqa" }, "outputs": [], "source": [ "class BERTDataset(Dataset):\n", " def __init__(self, df, tokenizer, max_len):\n", " self.max_len = max_len\n", " self.text = df.text_norm\n", " self.tokenizer = tokenizer\n", " self.targets = df[\"конверсия\"]\n", "\n", " def __len__(self):\n", " return len(self.text)\n", "\n", " def __getitem__(self, index):\n", " text = self.text[index]\n", " label = self.targets[index]\n", " encoding = self.tokenizer.encode_plus(\n", " text,\n", " truncation=True,\n", " add_special_tokens=True,\n", " max_length=self.max_len,\n", " padding='max_length',\n", " return_attention_mask = True,\n", " return_tensors ='pt'\n", " )\n", " return {\n", " 'input_ids': encoding['input_ids'].flatten().to(device),\n", " 'attention_mask': encoding['attention_mask'].flatten().to(device),\n", " 'labels': torch.tensor(label, dtype=torch.float).to(device)\n", " }" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:44.885935Z", "iopub.status.busy": "2024-05-01T20:02:44.885695Z", "iopub.status.idle": "2024-05-01T20:02:44.952808Z", "shell.execute_reply": "2024-05-01T20:02:44.951919Z", "shell.execute_reply.started": "2024-05-01T20:02:44.885914Z" }, "id": "zvgJt7sZj310", "tags": [] }, "outputs": [], "source": [ "train_dataset = BERTDataset(posts_train.reset_index(drop=True), tokenizer, 512)\n", "test_dataset = BERTDataset(posts_test.reset_index(drop=True), tokenizer, 512)\n", "eval_dataset = BERTDataset(posts_eval.reset_index(drop=True), tokenizer, 512)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:44.954755Z", "iopub.status.busy": "2024-05-01T20:02:44.954024Z", "iopub.status.idle": "2024-05-01T20:02:44.959257Z", "shell.execute_reply": "2024-05-01T20:02:44.958498Z", "shell.execute_reply.started": "2024-05-01T20:02:44.954726Z" }, "tags": [], "id": "LrXnB1NXcuqd" }, "outputs": [], "source": [ "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n", "test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)\n", "eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)" ] }, { "cell_type": "markdown", "source": [ "# Обучние" ], "metadata": { "id": "6z-st5ZWd51h" } }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:44.973342Z", "iopub.status.busy": "2024-05-01T20:02:44.973106Z", "iopub.status.idle": "2024-05-01T20:02:44.984566Z", "shell.execute_reply": "2024-05-01T20:02:44.983765Z", "shell.execute_reply.started": "2024-05-01T20:02:44.973321Z" }, "tags": [], "id": "EaC99Ezdcuqg", "outputId": "edd1f623-ac21-4827-dcce-f3adb9516fb5" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/appuser/.conda/envs/pytorch/lib/python3.9/site-packages/transformers/optimization.py:521: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n" ] } ], "source": [ "from transformers import AdamW\n", "loss_fn = torch.nn.MSELoss()\n", "optimizer = AdamW(model.parameters(), lr=5e-6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T20:02:44.985856Z", "iopub.status.busy": "2024-05-01T20:02:44.985609Z", "iopub.status.idle": "2024-05-01T21:27:07.431869Z", "shell.execute_reply": "2024-05-01T21:27:07.428328Z", "shell.execute_reply.started": "2024-05-01T20:02:44.985835Z" }, "scrolled": true, "tags": [], "colab": { "referenced_widgets": [ "ee2ce2e76dab43dfa37c3f296f4b403c", "728f4167498349359722d24e19667844" ] }, "id": "AShT1n5acuqh", "outputId": "124480f2-23fd-421f-9469-306961ff1c1b" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ee2ce2e76dab43dfa37c3f296f4b403c", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00)\n", "test: tensor(0.0014)\n", "train: tensor(0.0026, device='cuda:0', grad_fn=)\n", "train: tensor(0.0027, device='cuda:0', grad_fn=)\n", "train: tensor(0.0028, device='cuda:0', grad_fn=)\n", "train: tensor(0.0024, device='cuda:0', grad_fn=)\n", "train: tensor(0.0027, device='cuda:0', grad_fn=)\n", "train: tensor(0.0025, device='cuda:0', grad_fn=)\n", "train: tensor(0.0026, device='cuda:0', grad_fn=)\n", "train: tensor(0.0040, device='cuda:0', grad_fn=)\n", "train: tensor(0.0027, device='cuda:0', grad_fn=)\n", "train: tensor(0.0021, device='cuda:0', grad_fn=)\n", "train: tensor(0.0021, device='cuda:0', grad_fn=)\n", "train: tensor(0.0025, device='cuda:0', grad_fn=)\n", "train: tensor(0.0016, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0026, device='cuda:0', grad_fn=)\n", "train: tensor(0.0022, device='cuda:0', grad_fn=)\n", "train: tensor(0.0018, device='cuda:0', grad_fn=)\n", "train: tensor(0.0019, device='cuda:0', grad_fn=)\n", "train: tensor(0.0021, device='cuda:0', grad_fn=)\n", "train: tensor(0.0025, device='cuda:0', grad_fn=)\n", "train: tensor(0.0013, device='cuda:0', grad_fn=)\n", "train: tensor(0.0024, device='cuda:0', grad_fn=)\n", "train: tensor(0.0023, device='cuda:0', grad_fn=)\n", "train: tensor(0.0026, device='cuda:0', grad_fn=)\n", "train: tensor(0.0017, device='cuda:0', grad_fn=)\n", "train: tensor(0.0016, device='cuda:0', grad_fn=)\n", "train: tensor(0.0020, device='cuda:0', grad_fn=)\n", "train: tensor(0.0018, device='cuda:0', grad_fn=)\n", "train: tensor(0.0015, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0021, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0022, device='cuda:0', grad_fn=)\n", "train: tensor(0.0014, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0017, device='cuda:0', grad_fn=)\n", "train: tensor(0.0013, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0014, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0014, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0021, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0018, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0012, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "test: tensor(0.0002)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0011, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0010, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0007, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0009, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0008, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0006, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "test: tensor(0.0001)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0005, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0004, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "test: tensor(7.8905e-05)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.4409e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(9.4359e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.7676e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(8.3881e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.6760e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.4582e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9476e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.6448e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "test: tensor(5.3786e-05)\n", "train: tensor(7.4675e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.7017e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.8196e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.8554e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.6274e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.7471e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.0350e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9891e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.1354e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7669e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.7048e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.2780e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.0231e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6163e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.9517e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.0929e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7995e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.8445e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(9.4104e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5888e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6651e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0672e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.0847e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.2222e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9095e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0146e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7133e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.5806e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.2566e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.6575e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.0541e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(9.9842e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.8013e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "test: tensor(5.1566e-05)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.5574e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.1961e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(6.4618e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.7549e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8057e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4596e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.0027e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4110e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.5554e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4931e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2056e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2443e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4111e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.0040e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.5823e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5478e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9565e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.6807e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7872e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.1503e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8676e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.8496e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9736e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.8649e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6236e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9806e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.0285e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.8920e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.5397e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.5377e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.8289e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1493e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9718e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9024e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.3119e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.9139e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8021e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9351e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.3665e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.0382e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2868e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6852e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.2155e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(8.9188e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9865e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0113e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5456e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.3941e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.8320e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6869e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1786e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0021, device='cuda:0', grad_fn=)\n", "train: tensor(7.3605e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0725e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7435e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8506e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.0756e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8769e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1069e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1770e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3872e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.5722e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1323e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6678e-05, device='cuda:0', grad_fn=)\n", "test: tensor(4.1904e-05)\n", "train: tensor(2.9071e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5017e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4795e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1634e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4618e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6720e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7756e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7288e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1131e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8580e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6127e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5097e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1088e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5676e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3836e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(5.7486e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.3487e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.4836e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0521e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2144e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4397e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0621e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.4675e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.7451e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4953e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.7619e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4272e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6843e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.2092e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5339e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7565e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.4892e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5798e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1476e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7979e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.1348e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.6536e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4681e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4963e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3640e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3329e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8929e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5802e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8608e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.5504e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4836e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8480e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.6039e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6766e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.7171e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1894e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(4.9281e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.5253e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6591e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.2216e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4487e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1958e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5018e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7665e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7931e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.7747e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8756e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2389e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2842e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(4.2070e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(5.1033e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9589e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1339e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3638e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2775e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4585e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.3821e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6432e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5112e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9810e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5225e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.1670e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0760e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1493e-05, device='cuda:0', grad_fn=)\n", "test: tensor(4.0964e-05)\n", "train: tensor(5.0631e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4017e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6071e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0956e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4850e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6999e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9657e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9490e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6954e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1836e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7515e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2049e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.8329e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8432e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8407e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(8.1116e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5260e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9017e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2920e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.7640e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8568e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(7.9242e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5815e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2347e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3157e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0606e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6770e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2374e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0291e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8360e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.0247e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6047e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2358e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8858e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2810e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1327e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1814e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5520e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.8958e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1322e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5254e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4620e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9860e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2204e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3684e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3370e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3954e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2583e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.3505e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6102e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0040e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5160e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7989e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7348e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4795e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2703e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5971e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.6907e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2522e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8059e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9218e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7182e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8305e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2118e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.4338e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(4.7972e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(7.1497e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8425e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0302e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2086e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5450e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2621e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.4734e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0260e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8747e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.4572e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0507e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1627e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.1004e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5150e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6642e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5830e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.0780e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6347e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4438e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.8512e-05)\n", "train: tensor(7.9670e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1261e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4952e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8831e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7043e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4961e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0210e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.5623e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9050e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3336e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.2842e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3139e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2533e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(6.7775e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9090e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0128e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0874e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1856e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5149e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7547e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.7102e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3820e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.5826e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5980e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1507e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6144e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1751e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3725e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5703e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.3700e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5840e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8793e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1650e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4477e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3423e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.0631e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1002e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.4290e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5268e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9973e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9775e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0709e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3097e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7938e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1256e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5302e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.2708e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6543e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9631e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1727e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8637e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1331e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0885e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0843e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9923e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1766e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0407e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1319e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0585e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2372e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5708e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8027e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4969e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3491e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2599e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8186e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4109e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1182e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3461e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9050e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3758e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5122e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1692e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3164e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7111e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0639e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8414e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7909e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1101e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7127e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4100e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5298e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.7390e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3825e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5372e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8470e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0869e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2729e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1025e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.0671e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2914e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.5815e-05)\n", "train: tensor(3.4416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1623e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1518e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6720e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8000e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1607e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2928e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2827e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6676e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5837e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6693e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8644e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7410e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8387e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9856e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9526e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7591e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1557e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6733e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8675e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7971e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3842e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7616e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1972e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1733e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6010e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7914e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8932e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9938e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4504e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1752e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0807e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6241e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8339e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2680e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8379e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5290e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7527e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5473e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3993e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9424e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3586e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.8360e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4159e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8956e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0695e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1690e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1862e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3453e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6561e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1268e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4722e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8480e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8211e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9058e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7889e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0689e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9736e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7868e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1618e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3339e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1703e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2796e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1112e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6048e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8303e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0476e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1014e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4404e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9803e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7526e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4348e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1812e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0252e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0677e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1532e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4933e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3420e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1014e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6681e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4824e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7236e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2049e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4983e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5507e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8595e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9377e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0774e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3877e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6334e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3764e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9080e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1129e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5964e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2238e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.0746e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6632e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8902e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.5584e-05)\n", "train: tensor(9.4180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2385e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3800e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6429e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6419e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3783e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2584e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7874e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0023e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7379e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7308e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5088e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2138e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0424e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9187e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8957e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6274e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4869e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2898e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.2880e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9586e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7922e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1511e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8671e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6411e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4634e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5210e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5620e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8797e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4998e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7984e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1427e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.3117e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2396e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6590e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1617e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.8760e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0003, device='cuda:0', grad_fn=)\n", "train: tensor(3.4573e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9380e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.3081e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0656e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6349e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5472e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8790e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0410e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9533e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8221e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4029e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7495e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.6336e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.8538e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5908e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1883e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.1259e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4122e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1696e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6489e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0141e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2919e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9782e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3892e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1448e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9399e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7150e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6767e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0719e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8924e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5338e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1945e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7345e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1471e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9555e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9940e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0567e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7333e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2228e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0582e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2308e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4602e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5513e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.4752e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3012e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1463e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7076e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8164e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6498e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2094e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5507e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.1223e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6522e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.3786e-05)\n", "train: tensor(9.6648e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2643e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6171e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7334e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4961e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8393e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6687e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6311e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4467e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6045e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5858e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2340e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2820e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0245e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2369e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6421e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3450e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3206e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8312e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.8096e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0596e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9493e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5759e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8834e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7744e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8273e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5210e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.3040e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(6.3758e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7072e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2227e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3473e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6154e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1193e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7420e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0691e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1440e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3333e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3319e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5813e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.8467e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8704e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6187e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.5181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3359e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8086e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6171e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3152e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3274e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5926e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8094e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6175e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9777e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0771e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8008e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3717e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3172e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4562e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1530e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3660e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1941e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8488e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1643e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2740e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5688e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2730e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8054e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.5129e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7701e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8380e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1367e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.5885e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8709e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9194e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9970e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6201e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2042e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3697e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6129e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8138e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4753e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0487e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6921e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8574e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0980e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6858e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1329e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4678e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9625e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6778e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1678e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5404e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.3799e-05)\n", "train: tensor(4.2473e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2183e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6013e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3618e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3258e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3092e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3054e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8585e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4941e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9818e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7751e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3891e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7008e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.3954e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0100e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1512e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9593e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8090e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9372e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9201e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2591e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4774e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9982e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9398e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8934e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4330e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.3921e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4112e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.3390e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6003e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8754e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8234e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4480e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7477e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6374e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6478e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4079e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4232e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.5242e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0927e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3138e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4889e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9735e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1483e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3064e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6171e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3072e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9516e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8989e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1369e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1528e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3543e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9031e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1665e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1905e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0097e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8272e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4447e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5681e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9723e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7725e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9421e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0751e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1755e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3649e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5140e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4399e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5527e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8885e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5249e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.6481e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3075e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3486e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.3165e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9464e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0378e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1985e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5048e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.0374e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9470e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8403e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7640e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.3565e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0442e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9796e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1950e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3496e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6516e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2139e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.2738e-05)\n", "train: tensor(3.1419e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2959e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9618e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9965e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5809e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4854e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6336e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7400e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9277e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6357e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1683e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9745e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5333e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0145e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4476e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8094e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0988e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7941e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6884e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1851e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4240e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6977e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2530e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9934e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6318e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5685e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3225e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8962e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4251e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5486e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7470e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2511e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2323e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3898e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5927e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9818e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4892e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5569e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1020e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.5890e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8060e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2719e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5497e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1803e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0881e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1228e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1363e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0421e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0062e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1576e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0971e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1208e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0895e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8743e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1036e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9675e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8043e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2723e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3557e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8579e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1855e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9418e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5685e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2011e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0526e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1687e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9732e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9433e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3217e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.0515e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6067e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3185e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1816e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5124e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9177e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6037e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9343e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8592e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1224e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9471e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1433e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0249e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.1463e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4893e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1217e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5956e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8958e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7424e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7854e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3551e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7981e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4544e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2972e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9559e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3281e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.2801e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.2320e-05)\n", "train: tensor(3.2793e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4617e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2325e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9477e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7704e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.7512e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8182e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3566e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9609e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3095e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.5843e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0742e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7078e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6513e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4156e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8773e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9773e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8319e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0785e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9947e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9110e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3065e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7304e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2616e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0158e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4876e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8944e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7630e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7355e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0316e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9083e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9854e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0406e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6961e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9462e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4301e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6574e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9651e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2791e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5697e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7596e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9880e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6124e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5576e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(8.2119e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9174e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6962e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9911e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9757e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7859e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3095e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3908e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6393e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1841e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8317e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6237e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2076e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0612e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3335e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2385e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8135e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1626e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0047e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1573e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2451e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0278e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.8634e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3949e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2909e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7741e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5170e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6169e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3669e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1439e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6498e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.8614e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5994e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1847e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8887e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3782e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6695e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7837e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1554e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4273e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0310e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8646e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7541e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9624e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7988e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5364e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2014e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5131e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4104e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8429e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.1711e-05)\n", "train: tensor(2.3289e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5593e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4650e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5375e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7990e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3023e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0547e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7351e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6559e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6936e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3372e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4345e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0184e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2398e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7761e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6754e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5355e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8343e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1651e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5275e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(4.6592e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9762e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7266e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9502e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9811e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9096e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4015e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4051e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6548e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1477e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3639e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4903e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1145e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1611e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5235e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8266e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1566e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4740e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7110e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6904e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2195e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0350e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6749e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4952e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8088e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4534e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4248e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.2250e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4226e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5082e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.8210e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8905e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1311e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5027e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1703e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1046e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1455e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6192e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6876e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2860e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(5.3685e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3956e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3696e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7799e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5514e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1222e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9865e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3644e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8845e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2622e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4703e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1461e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0595e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2877e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6571e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1879e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5585e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5144e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4490e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2600e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6961e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0054e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7635e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4759e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6733e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5748e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7632e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8696e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2130e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4071e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7863e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6628e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0916e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1434e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1090e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.1508e-05)\n", "train: tensor(7.9012e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3689e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2423e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3653e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4813e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8290e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8104e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.8924e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6548e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2318e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4695e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9974e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8025e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5743e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5494e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0314e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3608e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8306e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3737e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5197e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1055e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6617e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1328e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1670e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3161e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4639e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0538e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8425e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4769e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2200e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9405e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0604e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3820e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6860e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9235e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0736e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5238e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6704e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1555e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4650e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4729e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3204e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0511e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7531e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9063e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9157e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0839e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9183e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2049e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2213e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0280e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5177e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0440e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7780e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6476e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2790e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1687e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1504e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1225e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2362e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5951e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3513e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.8240e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4888e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9914e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3003e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3270e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.7225e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4718e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7248e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1252e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2553e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8782e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2083e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0930e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5281e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7618e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7577e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2457e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9902e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1924e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0473e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2887e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5730e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8958e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2488e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8060e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7970e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4848e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7092e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8500e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4521e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6642e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9373e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1699e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.1651e-05)\n", "train: tensor(4.9284e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5215e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0550e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9292e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9730e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4776e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1333e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4456e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6771e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6140e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4505e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5493e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7375e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5907e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3529e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7087e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9527e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2795e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7849e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3902e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6967e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1911e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9023e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9309e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3881e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9090e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8526e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.8978e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3527e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4337e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0404e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5591e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9593e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6824e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3421e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8838e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8334e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4993e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1759e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4035e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2065e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8250e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2140e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3407e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1350e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0780e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9427e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7755e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.4109e-06, device='cuda:0', grad_fn=)\n", "train: tensor(8.4233e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6149e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2891e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9528e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4812e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1014e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8751e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0980e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7728e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1543e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0047e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2661e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0222e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1330e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9944e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9531e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0168e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6608e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9283e-06, device='cuda:0', grad_fn=)\n", "train: tensor(5.5198e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1805e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4134e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9546e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5383e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4248e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1350e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1395e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.2585e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5775e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5783e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2301e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2514e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5730e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7692e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(9.8598e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1429e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1559e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9654e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7018e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4987e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7882e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7337e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.5622e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4847e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.3112e-05)\n", "train: tensor(6.6087e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9580e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8633e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3529e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1932e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0354e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1060e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2187e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7808e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7412e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2556e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8548e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4177e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3842e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9809e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9370e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9666e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1015e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7207e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2013e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1269e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3969e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7308e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9877e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0870e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0177e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0786e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9989e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1011e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3752e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4771e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3845e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9230e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2313e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1220e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3693e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5926e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8488e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5053e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.1912e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0519e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8442e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8915e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3704e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0836e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8522e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1530e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8854e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2985e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2888e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7153e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3616e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3147e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4282e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5812e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5640e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1633e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0803e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6228e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0911e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0845e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4220e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9644e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1710e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1925e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8925e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9211e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8110e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6666e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3583e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8119e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9220e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1826e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4659e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9634e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.0365e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.0242e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4931e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0263e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5992e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3337e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7853e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6538e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3287e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9652e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7988e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9117e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2124e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(2.4779e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3727e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4686e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4913e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9132e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0843e-05)\n", "train: tensor(1.8766e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8035e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1212e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2274e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4015e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7756e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7861e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0311e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9634e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7632e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.2667e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6487e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5522e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8830e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1798e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.5661e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3403e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8645e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5466e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7216e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0256e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3016e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2343e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5615e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9744e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4426e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(4.0479e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3866e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7290e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6496e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9690e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6282e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2738e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9443e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(2.5479e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8030e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3392e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3943e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5388e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7016e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.8471e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6189e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(8.5041e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0711e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2915e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1576e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4359e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7239e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4108e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6675e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9622e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.5300e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3101e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3464e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2646e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9613e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6332e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2327e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1681e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0501e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2569e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1738e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9547e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5980e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6892e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0169e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5270e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8784e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0401e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2406e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1988e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5828e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3580e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7832e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4253e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7745e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1580e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7787e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7257e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4524e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3771e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.7856e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5302e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8074e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2578e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6822e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3855e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6872e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6557e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0109e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1518e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8930e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0766e-05)\n", "train: tensor(1.9274e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1913e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9242e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5198e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8985e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7206e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7253e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9113e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8988e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5487e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2625e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8298e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5941e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0261e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5976e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6932e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.9281e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2583e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6605e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1195e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5237e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5403e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0359e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8371e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3641e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4882e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1847e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5394e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1076e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7549e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9403e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(4.0841e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9105e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0035e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6077e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9252e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5360e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8800e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0578e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9431e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8374e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6202e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4809e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0099e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7005e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8457e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5793e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3743e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7031e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2193e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3103e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.6680e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2453e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0173e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8644e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7254e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5964e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1135e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1154e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3572e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7292e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0027e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7039e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.3195e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3738e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2335e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1226e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4288e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5756e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1547e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6152e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4848e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8473e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.1951e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8224e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7065e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9851e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3232e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0004e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.4289e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9126e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9773e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6643e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4225e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5852e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(1.9820e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6073e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9189e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3471e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4447e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3499e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6266e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8322e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0322e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0881e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0389e-05)\n", "train: tensor(1.7872e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.6292e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4064e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8637e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5605e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4225e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1951e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3150e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4093e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8953e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6425e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5614e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5745e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9451e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1128e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2387e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5700e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1275e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6311e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.7682e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0142e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3431e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8558e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0914e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4995e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5219e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0873e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9420e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2178e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2206e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1337e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.3291e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9840e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8265e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2384e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5489e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9356e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1034e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7943e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4462e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1816e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0192e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1458e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3412e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7461e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5036e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7808e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6678e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4481e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0822e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1362e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7839e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8166e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2638e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0196e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5329e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1749e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1323e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8001e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1681e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0193e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(6.9246e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0004e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5289e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4853e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6045e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0887e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9910e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1554e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8621e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2602e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2222e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5880e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9451e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4418e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8577e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2571e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0906e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4592e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9040e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4390e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0977e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0875e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6030e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0331e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2938e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6717e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3091e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6307e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(4.7808e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6668e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4148e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3449e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0973e-05)\n", "train: tensor(1.4898e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4607e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2441e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8413e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1253e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2139e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9108e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9638e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1613e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7022e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4597e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8753e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6444e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8317e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7115e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2915e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0474e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.3888e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4887e-06, device='cuda:0', grad_fn=)\n", "train: tensor(4.9357e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8632e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8981e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6359e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4146e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5234e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9064e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0086e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1634e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1469e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2239e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4161e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7765e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.0757e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9916e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3975e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6362e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8588e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3626e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2054e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2367e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8937e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5669e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8922e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6044e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4208e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3825e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6407e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4439e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4142e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7672e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.6290e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7126e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6430e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9832e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.9462e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5715e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.6287e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2203e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8033e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5151e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.6182e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0306e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.0293e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7196e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7791e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5916e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6686e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7487e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3518e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4182e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0972e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1358e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1699e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1814e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8018e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4396e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1207e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7929e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2301e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9476e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1432e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4285e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9022e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1265e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7999e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9943e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4623e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4874e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7150e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6137e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2497e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3615e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8164e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0265e-05)\n", "train: tensor(9.7484e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6342e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6173e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8905e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5657e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1245e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9813e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1440e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.0262e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8975e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4613e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8581e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1793e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4772e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7339e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8641e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2127e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4462e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4100e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0693e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1120e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0146e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5170e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1260e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0517e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5458e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2840e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9375e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0803e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5311e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4858e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9424e-06, device='cuda:0', grad_fn=)\n", "train: tensor(3.1560e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8682e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(1.8623e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4577e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3789e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4206e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2362e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2346e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.0845e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.8058e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1105e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7095e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1270e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9867e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4312e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8133e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1994e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.0615e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9509e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7273e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3424e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6206e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9841e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7355e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0158e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1902e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8611e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9382e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8781e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3074e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2232e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7031e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9931e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6166e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1826e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8689e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8209e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7656e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2442e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0409e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4761e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5721e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1438e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0403e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1177e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5525e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1609e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7878e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6397e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9809e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9042e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6623e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3925e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2841e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4501e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3344e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2961e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8016e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6723e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7165e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2213e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6526e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6579e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2030e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0329e-05)\n", "train: tensor(1.4255e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6235e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9387e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.1806e-06, device='cuda:0', grad_fn=)\n", "train: tensor(3.5457e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9419e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6218e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8567e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4527e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3050e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5582e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1472e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2930e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6622e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3418e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5650e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.4612e-06, device='cuda:0', grad_fn=)\n", "train: tensor(5.0188e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4419e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7497e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5193e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.6088e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5531e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0782e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.3521e-06, device='cuda:0', grad_fn=)\n", "train: tensor(1.6666e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4197e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9665e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.8432e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7498e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7293e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2591e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1938e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6060e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1620e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0030e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3094e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1397e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2293e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7984e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3508e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2315e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1983e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7955e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5186e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0266e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3335e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3700e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5484e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6288e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4492e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.0002e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5389e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1182e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9223e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4826e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2120e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1532e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8598e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8654e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8037e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6159e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5065e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0750e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2518e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4484e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8309e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.4143e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5198e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3969e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0831e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0374e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1708e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5532e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1592e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2944e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9795e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4221e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6108e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2709e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6178e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6973e-06, device='cuda:0', grad_fn=)\n", "train: tensor(5.0134e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5510e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8344e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0327e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6024e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1328e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8582e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7792e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7835e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8532e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5747e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6589e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3132e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0190e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0318e-05)\n", "train: tensor(2.9431e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(2.0035e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4617e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0290e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4530e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.4103e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2966e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2523e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.2150e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7972e-06, device='cuda:0', grad_fn=)\n", "train: tensor(1.1003e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5702e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1291e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3096e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4066e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7628e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8334e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8753e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1122e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4986e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7449e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1176e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.8259e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7530e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5057e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0664e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4102e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4351e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5863e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3976e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5751e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8286e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0300e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.1196e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4459e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1223e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0514e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0538e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3394e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3771e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5118e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7173e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8912e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5421e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2659e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2810e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5060e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2454e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4566e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5748e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7613e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3904e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4540e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3336e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2665e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5120e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0381e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8640e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2233e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1688e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8062e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4324e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3886e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3990e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9782e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6524e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2492e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2561e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7964e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2438e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0056e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7859e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(7.0367e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7673e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5358e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6977e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0545e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4995e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3057e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1937e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6431e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.5983e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5883e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5608e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8064e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0953e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1410e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7643e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2214e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2779e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4391e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9587e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6309e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4458e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.2045e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.1677e-05)\n", "train: tensor(2.1293e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1662e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8861e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2814e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6495e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6370e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3794e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8396e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.7246e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.6365e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3957e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5812e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4744e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8523e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2525e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4127e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6063e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0930e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.2934e-06, device='cuda:0', grad_fn=)\n", "train: tensor(1.8559e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8513e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6067e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5034e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3330e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4849e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4742e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2373e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7655e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7090e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3797e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.4529e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3733e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4846e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2194e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7042e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0493e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0209e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6140e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8591e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7321e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6039e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2842e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5633e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2404e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9573e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9003e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4743e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2299e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5653e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1171e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0477e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5250e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1416e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0678e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8804e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4704e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2778e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1714e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4279e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1248e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4194e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3577e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.4713e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7879e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3740e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1890e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5501e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1488e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4830e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5209e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7859e-06, device='cuda:0', grad_fn=)\n", "train: tensor(1.8786e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.9679e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.1864e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0656e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9572e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5798e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1524e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3229e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8122e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(5.8168e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9897e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5015e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1460e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2436e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5559e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3128e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5838e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9467e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0336e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2202e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2125e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3179e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4301e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6759e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6308e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3467e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8342e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1245e-05, device='cuda:0', grad_fn=)\n", "test: tensor(2.9980e-05)\n", "train: tensor(2.8035e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5377e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3536e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4321e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8931e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4880e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2801e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8263e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1057e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4638e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1106e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7551e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9233e-06, device='cuda:0', grad_fn=)\n", "train: tensor(3.2026e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5211e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5108e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2957e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5439e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2219e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1844e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6562e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0229e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.8885e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8629e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5213e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6762e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5342e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6474e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8226e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5755e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.5040e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7233e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8760e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5585e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8514e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3817e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7422e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2764e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4007e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8113e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3116e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4206e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4035e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7100e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6153e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3969e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8957e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1495e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8435e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2586e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7829e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9007e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1194e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.1859e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4169e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3784e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5135e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6323e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.0796e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9863e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9796e-06, device='cuda:0', grad_fn=)\n", "train: tensor(2.9774e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.7314e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3766e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1497e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2219e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0007e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7308e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6780e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0794e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9907e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8978e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5816e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9781e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.2127e-06, device='cuda:0', grad_fn=)\n", "train: tensor(2.4628e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6191e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2683e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5146e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8976e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9454e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6987e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1084e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9173e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9766e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3985e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0223e-06, device='cuda:0', grad_fn=)\n", "train: tensor(1.9472e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3976e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7403e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4820e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5949e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1192e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3447e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8831e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1627e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.8106e-05, device='cuda:0', grad_fn=)\n", "test: tensor(3.0509e-05)\n", "train: tensor(1.1255e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5945e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1191e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3060e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7054e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.2909e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.3381e-06, device='cuda:0', grad_fn=)\n", "train: tensor(1.0199e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0626e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0255e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2153e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3534e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9288e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4451e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9222e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8229e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5790e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4949e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.1340e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.7985e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4631e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0302e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3066e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3228e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4053e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.4301e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3586e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6927e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.4524e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3589e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6741e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.6043e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6926e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9509e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8147e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7208e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8058e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1220e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2228e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3366e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.5747e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7339e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(6.6581e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8530e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0421e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.8562e-06, device='cuda:0', grad_fn=)\n", "train: tensor(2.2275e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1048e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5708e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6105e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3966e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.7237e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3015e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.1231e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.5468e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8586e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5563e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2973e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.2586e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3036e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2830e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8728e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3786e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8283e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1571e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1400e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6189e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.5850e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3834e-06, device='cuda:0', grad_fn=)\n", "train: tensor(9.2795e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5023e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4419e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3340e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0776e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3453e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.7451e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6140e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.2493e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5064e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3506e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.3170e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.3962e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.3549e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2831e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9077e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7216e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2366e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2296e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7512e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3026e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.9964e-06, device='cuda:0', grad_fn=)\n", "train: tensor(4.4375e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8607e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.2493e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1279e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7080e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "test: tensor(2.9956e-05)\n", "train: tensor(7.7181e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3732e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5527e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.0866e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3155e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7125e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0002, device='cuda:0', grad_fn=)\n", "train: tensor(3.3919e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.5866e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2607e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.9541e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.9833e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9338e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4224e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7112e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6289e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5436e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2270e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0525e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4664e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.0540e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3890e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4321e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.1750e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2475e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1967e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8103e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8668e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9230e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.1222e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6407e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5847e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0483e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.1735e-06, device='cuda:0', grad_fn=)\n", "train: tensor(5.3589e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4103e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8157e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2042e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5951e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4085e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7974e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.5614e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2215e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6154e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2941e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.9484e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3636e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1463e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.7386e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2652e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3980e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4791e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2888e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.3567e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.1510e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.7732e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3384e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.9572e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6109e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4865e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5856e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.5077e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5698e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2103e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.0441e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.0320e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.8532e-05, device='cuda:0', grad_fn=)\n", "train: tensor(7.3655e-06, device='cuda:0', grad_fn=)\n", "train: tensor(4.1838e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2713e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.5938e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1761e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2475e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.9180e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8333e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.8434e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.3952e-06, device='cuda:0', grad_fn=)\n", "train: tensor(2.7131e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.8537e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.2385e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0820e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.6128e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0309e-05, device='cuda:0', grad_fn=)\n", "train: tensor(0.0001, device='cuda:0', grad_fn=)\n", "train: tensor(3.4775e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.0370e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.7248e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3472e-05, device='cuda:0', grad_fn=)\n", "train: tensor(4.6342e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8577e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1209e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4466e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.6301e-05, device='cuda:0', grad_fn=)\n", "train: tensor(8.6910e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2327e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.7055e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.5923e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5306e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.6231e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.4272e-05, device='cuda:0', grad_fn=)\n", "test: tensor(2.9841e-05)\n", "train: tensor(1.2275e-05, device='cuda:0', grad_fn=)\n", "train: tensor(6.4046e-05, device='cuda:0', grad_fn=)\n", "train: tensor(3.2155e-05, device='cuda:0', grad_fn=)\n", "train: tensor(9.5544e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.0215e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.2732e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.1160e-05, device='cuda:0', grad_fn=)\n", "train: tensor(5.3837e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.5510e-05, device='cuda:0', grad_fn=)\n", "train: tensor(2.4084e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.3247e-05, device='cuda:0', grad_fn=)\n", "train: tensor(1.8455e-05, device='cuda:0', grad_fn=)\n" ] } ], "source": [ "loses_list_train = []\n", "loses_list_test = []\n", "\n", "num_epochs = 1\n", "num_steps = (len(posts_train)//32)*num_epochs\n", "counter_of_step = 0\n", "\n", "for epoch in tqdm(range(num_epochs)):\n", " for batch in tqdm(train_loader):\n", " model.train()\n", " input_ids, attention_mask, labels = batch\n", " input_ids = batch['input_ids']\n", " attention_mask = batch['attention_mask']\n", " labels = batch['labels']\n", " optimizer.zero_grad()\n", " outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n", " loss = outputs.loss\n", " loses_list_train.append(loss)\n", " print('train: ', loss)\n", " loss.backward()\n", " optimizer.step()\n", "\n", " if counter_of_step%100 == 0:\n", " model.eval()\n", " with torch.no_grad():\n", " loss_test_batches = []\n", " for batch in test_loader:\n", " input_ids = batch['input_ids']\n", " attention_mask = batch['attention_mask']\n", " labels = batch['labels']\n", " outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n", " loss_test_batches.append(outputs.loss)\n", " loss_test = torch.mean(torch.tensor(loss_test_batches))\n", " loses_list_test.append(loss_test)\n", " print('test: ', loss_test)\n", "\n", " counter_of_step+=1" ] }, { "cell_type": "markdown", "source": [ "# Сохранение и анализ" ], "metadata": { "id": "Kn9MXB7odqz6" } }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T21:29:51.036202Z", "iopub.status.busy": "2024-05-01T21:29:51.034800Z", "iopub.status.idle": "2024-05-01T21:29:52.718947Z", "shell.execute_reply": "2024-05-01T21:29:52.717739Z", "shell.execute_reply.started": "2024-05-01T21:29:51.036157Z" }, "tags": [], "id": "crzzhjDPcuqj" }, "outputs": [], "source": [ "model.save_pretrained('distilbert-base-multilingual-cased-checkpoint-02052024')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-01T21:49:26.688787Z", "iopub.status.busy": "2024-05-01T21:49:26.687422Z", "iopub.status.idle": "2024-05-01T21:49:26.906354Z", "shell.execute_reply": "2024-05-01T21:49:26.905403Z", "shell.execute_reply.started": "2024-05-01T21:49:26.688744Z" }, "tags": [], "id": "uJm73tQccuq4", "outputId": "25276642-9352-448c-855f-d9a1f90b88e7" }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y_train = [i.to('cpu').detach().numpy().tolist() for i in loses_list_train]\n", "y_test = [i.to('cpu').detach().numpy().tolist() for i in loses_list_test]\n", "x_test = [i*100 for i in range(len(y_test))]\n", "plt.plot(range(len(y_train)), y_train)\n", "plt.plot(x_test, y_test, color = 'red')\n", "plt.title('MSE loss')\n", "plt.legend(labels=['Loss train', 'Loss test'])\n", "plt.xlabel('Step')\n", "plt.ylabel('MSE')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "U5fm7yuGcuq6" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "TPU", "colab": { "provenance": [] }, "kernelspec": { "display_name": "Pytorch", "language": "python", "name": "pytorch_python" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" }, "widgets": { "application/vnd.jupyter.widget-state+json": {} } }, "nbformat": 4, "nbformat_minor": 0 }