{ "cells": [ { "cell_type": "markdown", "id": "22fce411", "metadata": { "papermill": { "duration": 0.010561, "end_time": "2023-07-20T14:28:01.754512", "exception": false, "start_time": "2023-07-20T14:28:01.743951", "status": "completed" }, "tags": [] }, "source": [ "# Setup" ] }, { "cell_type": "code", "execution_count": 1, "id": "9731dbdc", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:01.776525Z", "iopub.status.busy": "2023-07-20T14:28:01.775654Z", "iopub.status.idle": "2023-07-20T14:28:01.788643Z", "shell.execute_reply": "2023-07-20T14:28:01.787784Z" }, "papermill": { "duration": 0.026316, "end_time": "2023-07-20T14:28:01.790737", "exception": false, "start_time": "2023-07-20T14:28:01.764421", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "id": "78a4d10e", "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "execution": { "iopub.execute_input": "2023-07-20T14:28:01.812708Z", "iopub.status.busy": "2023-07-20T14:28:01.811212Z", "iopub.status.idle": "2023-07-20T14:28:06.771346Z", "shell.execute_reply": "2023-07-20T14:28:06.770400Z" }, "papermill": { "duration": 4.973439, "end_time": "2023-07-20T14:28:06.773932", "exception": false, "start_time": "2023-07-20T14:28:01.800493", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n", " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" ] } ], "source": [ "import random\n", "import csv\n", "from pathlib import Path\n", "from kaggle_secrets import UserSecretsClient\n", "import copy\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from tqdm.auto import tqdm\n", "from sklearn.model_selection import train_test_split\n", "import wandb\n", "import torch\n", "from torch import nn\n", "from torch.utils.data import Dataset, DataLoader\n", "\n", "dataset_path = Path('/kaggle/input/myanimelist-dataset-animes-profiles-reviews')\n", "output_path = Path('/kaggle/working')" ] }, { "cell_type": "code", "execution_count": 3, "id": "5f19c3c7", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:06.796003Z", "iopub.status.busy": "2023-07-20T14:28:06.795459Z", "iopub.status.idle": "2023-07-20T14:28:06.805410Z", "shell.execute_reply": "2023-07-20T14:28:06.804554Z" }, "papermill": { "duration": 0.023374, "end_time": "2023-07-20T14:28:06.807410", "exception": false, "start_time": "2023-07-20T14:28:06.784036", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Ensure deterministic behavior\n", "torch.backends.cudnn.deterministic = True\n", "random.seed(hash('setting random seeds') % 2**32 - 1)\n", "np.random.seed(hash('improves reproducibility') % 2**32 - 1)\n", "torch.manual_seed(hash('by removing stochasticity') % 2**32 - 1)\n", "torch.cuda.manual_seed_all(hash('so runs are repeatable') % 2**32 - 1)" ] }, { "cell_type": "code", "execution_count": 4, "id": "01dc078c", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:06.827990Z", "iopub.status.busy": "2023-07-20T14:28:06.827480Z", "iopub.status.idle": "2023-07-20T14:28:06.898655Z", "shell.execute_reply": "2023-07-20T14:28:06.897736Z" }, "papermill": { "duration": 0.084035, "end_time": "2023-07-20T14:28:06.901165", "exception": false, "start_time": "2023-07-20T14:28:06.817130", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using cuda device\n" ] } ], "source": [ "# Get appropriate device for training\n", "device = (\n", " 'cuda'\n", " if torch.cuda.is_available()\n", " else 'mps'\n", " if torch.backends.mps.is_available()\n", " else 'cpu'\n", ")\n", "print(f'Using {device} device')" ] }, { "cell_type": "code", "execution_count": 5, "id": "379a3fdc", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:06.922221Z", "iopub.status.busy": "2023-07-20T14:28:06.921351Z", "iopub.status.idle": "2023-07-20T14:28:08.961625Z", "shell.execute_reply": "2023-07-20T14:28:08.960641Z" }, "papermill": { "duration": 2.052923, "end_time": "2023-07-20T14:28:08.963787", "exception": false, "start_time": "2023-07-20T14:28:06.910864", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Log in to Weights and Biases for model metric tracking\n", "user_secrets = UserSecretsClient()\n", "wandb_api_key = user_secrets.get_secret('WANDB_API_KEY')\n", "wandb.login(key=wandb_api_key)" ] }, { "cell_type": "code", "execution_count": 6, "id": "9453fe4f", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:08.986015Z", "iopub.status.busy": "2023-07-20T14:28:08.985233Z", "iopub.status.idle": "2023-07-20T14:28:20.697286Z", "shell.execute_reply": "2023-07-20T14:28:20.696399Z" }, "papermill": { "duration": 11.7251, "end_time": "2023-07-20T14:28:20.699412", "exception": false, "start_time": "2023-07-20T14:28:08.974312", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
profileanime_uidscore
0DesolatePsyche340968
1baekbeans3459910
2skrn288917
3edgewalker0029049
4aManOfCulture99418110
\n", "
" ], "text/plain": [ " profile anime_uid score\n", "0 DesolatePsyche 34096 8\n", "1 baekbeans 34599 10\n", "2 skrn 28891 7\n", "3 edgewalker00 2904 9\n", "4 aManOfCulture99 4181 10" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews = pd.read_csv(dataset_path/'reviews.csv', usecols=['profile', 'anime_uid', 'score'])\n", "reviews.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "c6d3fcd4", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:20.721906Z", "iopub.status.busy": "2023-07-20T14:28:20.721620Z", "iopub.status.idle": "2023-07-20T14:28:20.725602Z", "shell.execute_reply": "2023-07-20T14:28:20.724617Z" }, "papermill": { "duration": 0.017475, "end_time": "2023-07-20T14:28:20.727938", "exception": false, "start_time": "2023-07-20T14:28:20.710463", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Use a smaller subset of dataset for quicker iteration\n", "# reviews = reviews.sample(100)" ] }, { "cell_type": "code", "execution_count": 8, "id": "caf09e3b", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:20.749397Z", "iopub.status.busy": "2023-07-20T14:28:20.749148Z", "iopub.status.idle": "2023-07-20T14:28:21.008571Z", "shell.execute_reply": "2023-07-20T14:28:21.007628Z" }, "papermill": { "duration": 0.272956, "end_time": "2023-07-20T14:28:21.011045", "exception": false, "start_time": "2023-07-20T14:28:20.738089", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidtitle
028891Haikyuu!! Second Season
123273Shigatsu wa Kimi no Uso
234599Made in Abyss
35114Fullmetal Alchemist: Brotherhood
431758Kizumonogatari III: Reiketsu-hen
\n", "
" ], "text/plain": [ " uid title\n", "0 28891 Haikyuu!! Second Season\n", "1 23273 Shigatsu wa Kimi no Uso\n", "2 34599 Made in Abyss\n", "3 5114 Fullmetal Alchemist: Brotherhood\n", "4 31758 Kizumonogatari III: Reiketsu-hen" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "animes = pd.read_csv(dataset_path/'animes.csv', usecols=['uid', 'title'])\n", "animes.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "f5d8f61f", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:21.033908Z", "iopub.status.busy": "2023-07-20T14:28:21.033619Z", "iopub.status.idle": "2023-07-20T14:28:21.103374Z", "shell.execute_reply": "2023-07-20T14:28:21.102455Z" }, "papermill": { "duration": 0.08356, "end_time": "2023-07-20T14:28:21.105486", "exception": false, "start_time": "2023-07-20T14:28:21.021926", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
profileanime_uidscoreuidtitle
0DesolatePsyche34096834096Gintama.
1DesolatePsyche34096834096Gintama.
2claudinou34096834096Gintama.
3claudinou34096834096Gintama.
4PeterFromRussia34096834096Gintama.
..................
317474Kuromizue975199751Strike Witches Movie
317475ryanxwonbin975189751Strike Witches Movie
317476AobaSuzukaze9751109751Strike Witches Movie
3174777jaws7975199751Strike Witches Movie
317478arsonal975189751Strike Witches Movie
\n", "

317479 rows Γ— 5 columns

\n", "
" ], "text/plain": [ " profile anime_uid score uid title\n", "0 DesolatePsyche 34096 8 34096 Gintama.\n", "1 DesolatePsyche 34096 8 34096 Gintama.\n", "2 claudinou 34096 8 34096 Gintama.\n", "3 claudinou 34096 8 34096 Gintama.\n", "4 PeterFromRussia 34096 8 34096 Gintama.\n", "... ... ... ... ... ...\n", "317474 Kuromizue 9751 9 9751 Strike Witches Movie\n", "317475 ryanxwonbin 9751 8 9751 Strike Witches Movie\n", "317476 AobaSuzukaze 9751 10 9751 Strike Witches Movie\n", "317477 7jaws7 9751 9 9751 Strike Witches Movie\n", "317478 arsonal 9751 8 9751 Strike Witches Movie\n", "\n", "[317479 rows x 5 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add anime titles to dataframe\n", "reviews = pd.merge(reviews, animes, left_on='anime_uid', right_on='uid')\n", "reviews" ] }, { "cell_type": "markdown", "id": "646debf5", "metadata": { "papermill": { "duration": 0.010909, "end_time": "2023-07-20T14:28:21.128178", "exception": false, "start_time": "2023-07-20T14:28:21.117269", "status": "completed" }, "tags": [] }, "source": [ "# Data Exploration" ] }, { "cell_type": "code", "execution_count": 10, "id": "d00c0e69", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:21.152326Z", "iopub.status.busy": "2023-07-20T14:28:21.150647Z", "iopub.status.idle": "2023-07-20T14:28:30.381153Z", "shell.execute_reply": "2023-07-20T14:28:30.380101Z" }, "papermill": { "duration": 9.244604, "end_time": "2023-07-20T14:28:30.383592", "exception": false, "start_time": "2023-07-20T14:28:21.138988", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "cross_tabulation = pd.crosstab(reviews.profile, reviews.title, reviews.score, aggfunc=np.sum)" ] }, { "cell_type": "markdown", "id": "8dc9252e", "metadata": { "execution": { "iopub.execute_input": "2023-07-18T20:29:28.442128Z", "iopub.status.busy": "2023-07-18T20:29:28.441735Z", "iopub.status.idle": "2023-07-18T20:29:28.449166Z", "shell.execute_reply": "2023-07-18T20:29:28.447674Z", "shell.execute_reply.started": "2023-07-18T20:29:28.442097Z" }, "papermill": { "duration": 0.010565, "end_time": "2023-07-20T14:28:30.405702", "exception": false, "start_time": "2023-07-20T14:28:30.395137", "status": "completed" }, "tags": [] }, "source": [ "Here, we preview the cross-tabulated data with the users and animes with the most ratings." ] }, { "cell_type": "code", "execution_count": 11, "id": "e086966a", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:30.429528Z", "iopub.status.busy": "2023-07-20T14:28:30.429231Z", "iopub.status.idle": "2023-07-20T14:28:30.609691Z", "shell.execute_reply": "2023-07-20T14:28:30.608634Z" }, "papermill": { "duration": 0.195586, "end_time": "2023-07-20T14:28:30.613032", "exception": false, "start_time": "2023-07-20T14:28:30.417446", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titleDeath NoteSteins;GateKimi no Na wa.Fullmetal Alchemist: BrotherhoodClannad: After StoryToradora!Mahou Shoujo Madokaβ˜…MagicaMirai NikkiTengen Toppa Gurren LagannAno Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai.
profile
Stark700NaNNaNNaNNaNNaNNaNNaN36.0NaNNaN
Sidewinder51NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ktulu007NaN28.0NaN32.0NaN20.036.0NaN24.028.0
LegendAquaNaNNaN40.0NaNNaNNaNNaNNaNNaNNaN
ggultra2764NaNNaN24.024.0NaNNaN32.016.0NaNNaN
literaturenerd24.0NaNNaN28.0NaNNaN20.016.0NaNNaN
BanjoTheBearNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
BabyGirl0630136.032.036.040.0NaNNaNNaN28.0NaNNaN
PyraXadon28.0NaNNaNNaN40.0NaN36.0NaNNaN36.0
angelsreviewNaNNaNNaNNaNNaN24.032.0NaNNaNNaN
\n", "
" ], "text/plain": [ "title Death Note Steins;Gate Kimi no Na wa. \\\n", "profile \n", "Stark700 NaN NaN NaN \n", "Sidewinder51 NaN NaN NaN \n", "ktulu007 NaN 28.0 NaN \n", "LegendAqua NaN NaN 40.0 \n", "ggultra2764 NaN NaN 24.0 \n", "literaturenerd 24.0 NaN NaN \n", "BanjoTheBear NaN NaN NaN \n", "BabyGirl06301 36.0 32.0 36.0 \n", "PyraXadon 28.0 NaN NaN \n", "angelsreview NaN NaN NaN \n", "\n", "title Fullmetal Alchemist: Brotherhood Clannad: After Story \\\n", "profile \n", "Stark700 NaN NaN \n", "Sidewinder51 NaN NaN \n", "ktulu007 32.0 NaN \n", "LegendAqua NaN NaN \n", "ggultra2764 24.0 NaN \n", "literaturenerd 28.0 NaN \n", "BanjoTheBear NaN NaN \n", "BabyGirl06301 40.0 NaN \n", "PyraXadon NaN 40.0 \n", "angelsreview NaN NaN \n", "\n", "title Toradora! Mahou Shoujo Madokaβ˜…Magica Mirai Nikki \\\n", "profile \n", "Stark700 NaN NaN 36.0 \n", "Sidewinder51 NaN NaN NaN \n", "ktulu007 20.0 36.0 NaN \n", "LegendAqua NaN NaN NaN \n", "ggultra2764 NaN 32.0 16.0 \n", "literaturenerd NaN 20.0 16.0 \n", "BanjoTheBear NaN NaN NaN \n", "BabyGirl06301 NaN NaN 28.0 \n", "PyraXadon NaN 36.0 NaN \n", "angelsreview 24.0 32.0 NaN \n", "\n", "title Tengen Toppa Gurren Lagann \\\n", "profile \n", "Stark700 NaN \n", "Sidewinder51 NaN \n", "ktulu007 24.0 \n", "LegendAqua NaN \n", "ggultra2764 NaN \n", "literaturenerd NaN \n", "BanjoTheBear NaN \n", "BabyGirl06301 NaN \n", "PyraXadon NaN \n", "angelsreview NaN \n", "\n", "title Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. \n", "profile \n", "Stark700 NaN \n", "Sidewinder51 NaN \n", "ktulu007 28.0 \n", "LegendAqua NaN \n", "ggultra2764 NaN \n", "literaturenerd NaN \n", "BanjoTheBear NaN \n", "BabyGirl06301 NaN \n", "PyraXadon 36.0 \n", "angelsreview NaN " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_groups = reviews.groupby('profile').score.count()\n", "top_users = user_groups.sort_values(ascending=False)[:10].keys()\n", "\n", "anime_groups = reviews.groupby('title').score.count()\n", "top_animes = anime_groups.sort_values(ascending=False)[:10].keys()\n", "\n", "cross_tabulation.loc[top_users, top_animes]" ] }, { "cell_type": "markdown", "id": "0805df78", "metadata": { "papermill": { "duration": 0.011274, "end_time": "2023-07-20T14:28:30.636265", "exception": false, "start_time": "2023-07-20T14:28:30.624991", "status": "completed" }, "tags": [] }, "source": [ "It looks like the review scores show a right-skew." ] }, { "cell_type": "code", "execution_count": 12, "id": "2cce7e27", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:30.660153Z", "iopub.status.busy": "2023-07-20T14:28:30.659859Z", "iopub.status.idle": "2023-07-20T14:28:30.961525Z", "shell.execute_reply": "2023-07-20T14:28:30.960623Z" }, "papermill": { "duration": 0.315931, "end_time": "2023-07-20T14:28:30.963642", "exception": false, "start_time": "2023-07-20T14:28:30.647711", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(array([ 4870., 5164., 10229., 10909., 16495., 24766., 39624., 57463.,\n", " 70498., 77461.]),\n", " array([ 0. , 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 11. ]),\n", " )" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.hist(reviews.score)" ] }, { "cell_type": "markdown", "id": "1e11dae1", "metadata": { "papermill": { "duration": 0.011356, "end_time": "2023-07-20T14:28:30.987234", "exception": false, "start_time": "2023-07-20T14:28:30.975878", "status": "completed" }, "tags": [] }, "source": [ "# Create DataLoaders" ] }, { "cell_type": "markdown", "id": "317f12b9", "metadata": { "papermill": { "duration": 0.011235, "end_time": "2023-07-20T14:28:31.010233", "exception": false, "start_time": "2023-07-20T14:28:30.998998", "status": "completed" }, "tags": [] }, "source": [ "We must create indexes for each user and anime to correspond to." ] }, { "cell_type": "code", "execution_count": 13, "id": "2cd2c452", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.035287Z", "iopub.status.busy": "2023-07-20T14:28:31.034709Z", "iopub.status.idle": "2023-07-20T14:28:31.119861Z", "shell.execute_reply": "2023-07-20T14:28:31.118932Z" }, "papermill": { "duration": 0.100153, "end_time": "2023-07-20T14:28:31.122158", "exception": false, "start_time": "2023-07-20T14:28:31.022005", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "unique_users = reviews.profile.unique()\n", "user_to_index = {}\n", "for index, user in enumerate(unique_users):\n", " user_to_index[user] = index\n", " \n", "unique_animes = reviews.title.unique()\n", "anime_to_index = {}\n", "index_to_anime = {}\n", "for index, anime in enumerate(unique_animes):\n", " anime_to_index[anime] = index\n", " index_to_anime[index] = anime" ] }, { "cell_type": "code", "execution_count": 14, "id": "baaff81e", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.147263Z", "iopub.status.busy": "2023-07-20T14:28:31.146975Z", "iopub.status.idle": "2023-07-20T14:28:31.156162Z", "shell.execute_reply": "2023-07-20T14:28:31.155345Z" }, "papermill": { "duration": 0.023843, "end_time": "2023-07-20T14:28:31.158136", "exception": false, "start_time": "2023-07-20T14:28:31.134293", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class ReviewDataset(Dataset):\n", " '''\n", " A class for a Pytorch dataset that stores users, animes, and scores.\n", " '''\n", " \n", " def __init__(self, dataframe, user_to_index, anime_to_index):\n", " # Convert users to integers\n", " user_indexes = dataframe.profile.map(user_to_index)\n", " \n", " # Convert animes to integers\n", " anime_indexes = dataframe.title.map(anime_to_index)\n", " \n", " self.X = pd.DataFrame({'user_index': user_indexes, 'anime_index': anime_indexes})\n", " self.y = dataframe.score.astype(np.intc)\n", " \n", " def __len__(self):\n", " return len(self.X)\n", " \n", " def __getitem__(self, index):\n", " X = torch.tensor(self.X.iloc[index], dtype=torch.int32).to(device)\n", " y = torch.tensor([self.y.iloc[index]], dtype=torch.float32).to(device)\n", " return X, y" ] }, { "cell_type": "code", "execution_count": 15, "id": "a6eb2143", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.183086Z", "iopub.status.busy": "2023-07-20T14:28:31.182563Z", "iopub.status.idle": "2023-07-20T14:28:31.265246Z", "shell.execute_reply": "2023-07-20T14:28:31.264245Z" }, "papermill": { "duration": 0.097842, "end_time": "2023-07-20T14:28:31.267461", "exception": false, "start_time": "2023-07-20T14:28:31.169619", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
profileanime_uidscoreuidtitle
196752samuel_sfx14741914741Chuunibyou demo Koi ga Shitai!
210305Popaglockin1205120Fruits Basket
213579Zyzoxing31859931859Hai to Gensou no Grimgar
204607azuslu7jpg10357910357Jinrui wa Suitai Shimashita
29003JyoStar40010400Seihou Bukyou Outlaw Star
..................
119879ratchet573508185081Bakemonogatari
259178Lord_Odous10793610793Guilty Crown
131932LacePendragon13601913601Psycho-Pass
146867Tozzy32932Neon Genesis Evangelion: The End of Evangelion
121958BaronBrixius389931038993Karakai Jouzu no Takagi-san 2
\n", "

253983 rows Γ— 5 columns

\n", "
" ], "text/plain": [ " profile anime_uid score uid \\\n", "196752 samuel_sfx 14741 9 14741 \n", "210305 Popaglockin 120 5 120 \n", "213579 Zyzoxing 31859 9 31859 \n", "204607 azuslu7jpg 10357 9 10357 \n", "29003 JyoStar 400 10 400 \n", "... ... ... ... ... \n", "119879 ratchet573 5081 8 5081 \n", "259178 Lord_Odous 10793 6 10793 \n", "131932 LacePendragon 13601 9 13601 \n", "146867 Tozzy 32 9 32 \n", "121958 BaronBrixius 38993 10 38993 \n", "\n", " title \n", "196752 Chuunibyou demo Koi ga Shitai! \n", "210305 Fruits Basket \n", "213579 Hai to Gensou no Grimgar \n", "204607 Jinrui wa Suitai Shimashita \n", "29003 Seihou Bukyou Outlaw Star \n", "... ... \n", "119879 Bakemonogatari \n", "259178 Guilty Crown \n", "131932 Psycho-Pass \n", "146867 Neon Genesis Evangelion: The End of Evangelion \n", "121958 Karakai Jouzu no Takagi-san 2 \n", "\n", "[253983 rows x 5 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_reviews, test_reviews = train_test_split(reviews, test_size=0.2, random_state=42)\n", "validation_reviews, test_reviews = train_test_split(test_reviews, test_size=0.5, random_state=42)\n", "train_reviews" ] }, { "cell_type": "markdown", "id": "281f132a", "metadata": { "papermill": { "duration": 0.012187, "end_time": "2023-07-20T14:28:31.292358", "exception": false, "start_time": "2023-07-20T14:28:31.280171", "status": "completed" }, "tags": [] }, "source": [ "# Create Model" ] }, { "cell_type": "code", "execution_count": 16, "id": "e94c1403", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.317800Z", "iopub.status.busy": "2023-07-20T14:28:31.317470Z", "iopub.status.idle": "2023-07-20T14:28:31.327482Z", "shell.execute_reply": "2023-07-20T14:28:31.326612Z" }, "papermill": { "duration": 0.025283, "end_time": "2023-07-20T14:28:31.329478", "exception": false, "start_time": "2023-07-20T14:28:31.304195", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class CollaborativeFilteringNeuralNetwork(nn.Module):\n", " '''\n", " Creates a neural network with embedding layers.\n", " \n", " Arguments:\n", " num_users:\n", " Number of unique users\n", " \n", " num_items:\n", " Number of unique items\n", " \n", " num_factors:\n", " Number of latent factors for each user and item\n", " \n", " hiddens:\n", " A list of integers defining the number of units in each hidden layer.\n", " \n", " embedding_dropout:\n", " Dropout rate to apply after embeddings layer\n", " \n", " dropouts:\n", " List of dropout rates to apply after each hidden layer\n", " '''\n", " \n", " def __init__(self, num_users, num_items, num_factors, hiddens, embedding_dropout, dropouts):\n", " super().__init__()\n", " \n", " def generate_layers(num_in):\n", " '''\n", " Generator that creates layers\n", " '''\n", " \n", " for num_out, dropout in zip(hiddens, dropouts):\n", " yield nn.Linear(num_in, num_out)\n", " yield nn.ReLU()\n", " if dropout > 0.:\n", " yield nn.Dropout(dropout)\n", " num_in = num_out\n", " \n", " # Output layer\n", " yield nn.Linear(num_in, 1)\n", " yield nn.Sigmoid()\n", " \n", " self.user_embeddings = nn.Embedding(num_users, num_factors)\n", " self.item_embeddings = nn.Embedding(num_items, num_factors)\n", " self.embedding_dropout = nn.Dropout(embedding_dropout)\n", " self.linear_relu_stack = nn.Sequential(*list(generate_layers(num_factors * 2)))\n", " \n", " def forward(self, x):\n", " user_embeddings = self.user_embeddings(x[:, 0])\n", " item_embeddings = self.item_embeddings(x[:, 1])\n", " nn_input = torch.cat((user_embeddings, item_embeddings), dim=1)\n", " nn_input = self.embedding_dropout(nn_input)\n", " nn_output = self.linear_relu_stack(nn_input)\n", " return nn_output" ] }, { "cell_type": "markdown", "id": "195a2871", "metadata": { "papermill": { "duration": 0.011791, "end_time": "2023-07-20T14:28:31.353325", "exception": false, "start_time": "2023-07-20T14:28:31.341534", "status": "completed" }, "tags": [] }, "source": [ "# Train Model" ] }, { "cell_type": "code", "execution_count": 17, "id": "909e1b36", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.378627Z", "iopub.status.busy": "2023-07-20T14:28:31.378334Z", "iopub.status.idle": "2023-07-20T14:28:31.389128Z", "shell.execute_reply": "2023-07-20T14:28:31.388234Z" }, "papermill": { "duration": 0.025972, "end_time": "2023-07-20T14:28:31.391045", "exception": false, "start_time": "2023-07-20T14:28:31.365073", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def model_pipeline(init_arguments):\n", " with wandb.init(**init_arguments):\n", " config = wandb.config\n", " \n", " # Make datasets\n", " train_dataset = ReviewDataset(train_reviews, user_to_index, anime_to_index)\n", " validation_dataset = ReviewDataset(validation_reviews, user_to_index, anime_to_index)\n", " test_dataset = ReviewDataset(test_reviews, user_to_index, anime_to_index)\n", "\n", " # Make dataloaders\n", " train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)\n", " validation_dataloader = DataLoader(validation_dataset, batch_size=config.batch_size, shuffle=True)\n", " test_dataloader = DataLoader(test_dataset, batch_size = config.batch_size, shuffle=True)\n", " \n", " # Make model\n", " num_users = len(unique_users)\n", " num_items = len(unique_animes)\n", " model = CollaborativeFilteringNeuralNetwork(\n", " num_users, \n", " num_items, \n", " config.latent_factors, \n", " config.hidden_layers,\n", " config.embedding_dropout,\n", " config.dropouts\n", " ).to(device)\n", " \n", " loss_function = nn.MSELoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)\n", "\n", " print('πŸ’ͺ Training! ≧◑≦\\n')\n", " train(train_dataloader, validation_dataloader, model, config.epochs, loss_function, optimizer, config.patience)\n", " \n", " print('πŸ§ͺ Testing! ≧◑≦\\n')\n", " test(test_dataloader, model, loss_function, False)\n", " \n", " print('πŸ“¦ Exporting! ≧◑≦\\n')\n", " \n", " # Export model\n", " sample_users = torch.randint(len(unique_users), (config.batch_size, 1))\n", " sample_animes = torch.randint(len(unique_animes), (config.batch_size, 1))\n", " sample_input = torch.cat((sample_users, sample_animes), 1)\n", " sample_input = sample_input.to(device)\n", " export_model(model, sample_input, 'model.onnx')\n", " \n", " # Export anime indexes\n", " export_anime_indexes('anime_indexes.csv')\n", " \n", " # Export anime embeddings\n", " anime_embeddings = model.item_embeddings.weight\n", " export_anime_embeddings(anime_embeddings, 'anime_embeddings.csv')\n", "\n", " return model, anime_embeddings" ] }, { "cell_type": "code", "execution_count": 18, "id": "8cab5f27", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.417120Z", "iopub.status.busy": "2023-07-20T14:28:31.416179Z", "iopub.status.idle": "2023-07-20T14:28:31.425747Z", "shell.execute_reply": "2023-07-20T14:28:31.424918Z" }, "papermill": { "duration": 0.024826, "end_time": "2023-07-20T14:28:31.427847", "exception": false, "start_time": "2023-07-20T14:28:31.403021", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def train(dataloader, validation_dataloader, model, epochs, loss_function, optimizer, patience):\n", " '''\n", " Training loop.\n", " '''\n", " \n", " # Tell wandb to watch what the model gets up to: gradients, weights, and more!\n", " wandb.watch(model, loss_function, log='all', log_freq=10)\n", " \n", " examples_seen = 0\n", " best_validation_loss = np.inf\n", " best_weights = None\n", " # Put model in training mode. Important for batch normalization and dropout\n", " model.train()\n", " \n", " for epoch in tqdm(range(epochs)):\n", " print(f'Epoch {epoch+1}\\n-------------------------------')\n", " \n", " for batch, (X, y) in enumerate(dataloader):\n", " loss = train_batch(X, y, model, loss_function, optimizer)\n", " examples_seen += len(X)\n", "\n", " # Report metrics every couple of batches\n", " if batch % 200 == 0:\n", " train_log(loss, examples_seen, epoch)\n", " \n", " # Validate model after each epoch\n", " validation_loss = test(validation_dataloader, model, loss_function, True)\n", " \n", " # Early stopping\n", " if validation_loss < best_validation_loss:\n", " best_validation_loss = validation_loss\n", " best_weights = copy.deepcopy(model.state_dict())\n", " no_improvements_streak = 0\n", " else:\n", " no_improvements_streak = 1\n", " \n", " if no_improvements_streak >= patience:\n", " print(f'Early stopping after {epoch} epochs')\n", " break;\n", " \n", " model.load_state_dict(best_weights)" ] }, { "cell_type": "code", "execution_count": 19, "id": "46b1853c", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.452945Z", "iopub.status.busy": "2023-07-20T14:28:31.452677Z", "iopub.status.idle": "2023-07-20T14:28:31.458316Z", "shell.execute_reply": "2023-07-20T14:28:31.457368Z" }, "papermill": { "duration": 0.020376, "end_time": "2023-07-20T14:28:31.460286", "exception": false, "start_time": "2023-07-20T14:28:31.439910", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def train_batch(X, y, model, loss_function, optimizer):\n", " '''\n", " Trains a single batch.\n", " '''\n", " \n", " # Move tensors to device\n", " X, y = X.to(device), y.to(device)\n", "\n", " # Compute prediction error\n", " predictions = model(X) * 10\n", " loss = loss_function(predictions, y)\n", "\n", " # Backpropogation\n", " loss.backward()\n", " optimizer.step()\n", " optimizer.zero_grad()\n", " \n", " return loss.item()" ] }, { "cell_type": "code", "execution_count": 20, "id": "680c21b6", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.485910Z", "iopub.status.busy": "2023-07-20T14:28:31.485110Z", "iopub.status.idle": "2023-07-20T14:28:31.490372Z", "shell.execute_reply": "2023-07-20T14:28:31.489549Z" }, "papermill": { "duration": 0.020358, "end_time": "2023-07-20T14:28:31.492581", "exception": false, "start_time": "2023-07-20T14:28:31.472223", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def train_log(loss, examples_seen, epoch):\n", " '''\n", " Print progress and save metrics to Weights and Biases\n", " '''\n", " \n", " wandb.log({'epoch': epoch, 'loss': loss}, step=examples_seen)\n", " print(f'Training loss after {examples_seen:>5d} examples: {loss:>7f}')" ] }, { "cell_type": "code", "execution_count": 21, "id": "50c4f129", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.517933Z", "iopub.status.busy": "2023-07-20T14:28:31.517193Z", "iopub.status.idle": "2023-07-20T14:28:31.524564Z", "shell.execute_reply": "2023-07-20T14:28:31.523746Z" }, "papermill": { "duration": 0.022071, "end_time": "2023-07-20T14:28:31.526511", "exception": false, "start_time": "2023-07-20T14:28:31.504440", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def test(dataloader, model, loss_function, isValidation):\n", " '''\n", " Does validation/testing.\n", " '''\n", "\n", " num_batches = len(dataloader)\n", " # Set the model to evaluation mode - important for batch normalization and dropout layers\n", " model.eval()\n", " loss = 0\n", " \n", " with torch.no_grad():\n", " for X, y in dataloader:\n", " # Move tensors to device\n", " X, y = X.to(device), y.to(device)\n", " \n", " # Compute prediction error\n", " predictions = model(X) * 10\n", " \n", " # Compute loss and accuracy\n", " loss += loss_function(predictions, y).item()\n", " \n", " \n", " loss /= num_batches\n", " \n", " if isValidation:\n", " wandb.log({'validation_loss': loss})\n", " print(f'Validation Error: \\n Validation loss: {loss:>8f} \\n')\n", " else:\n", " wandb.log({'test_loss': loss})\n", " print(f'Test Error: \\n Test loss: {loss:>8f} \\n')\n", " \n", " return loss" ] }, { "cell_type": "code", "execution_count": 22, "id": "31623e52", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.552118Z", "iopub.status.busy": "2023-07-20T14:28:31.551237Z", "iopub.status.idle": "2023-07-20T14:28:31.556694Z", "shell.execute_reply": "2023-07-20T14:28:31.555859Z" }, "papermill": { "duration": 0.020071, "end_time": "2023-07-20T14:28:31.558688", "exception": false, "start_time": "2023-07-20T14:28:31.538617", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def export_model(model, sample_input, file_name):\n", " torch.onnx.export(model, sample_input, file_name)\n", " wandb.save(str(output_path/file_name))\n", " print('Model exported!')" ] }, { "cell_type": "code", "execution_count": 23, "id": "838374ed", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.584069Z", "iopub.status.busy": "2023-07-20T14:28:31.583332Z", "iopub.status.idle": "2023-07-20T14:28:31.589761Z", "shell.execute_reply": "2023-07-20T14:28:31.588910Z" }, "papermill": { "duration": 0.021682, "end_time": "2023-07-20T14:28:31.592081", "exception": false, "start_time": "2023-07-20T14:28:31.570399", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def export_anime_indexes(file_name):\n", " with open(file_name, mode='w', newline='') as file:\n", " writer = csv.writer(file)\n", "\n", " # Write header\n", " writer.writerow(['Index', 'Anime'])\n", "\n", " # Write key-value pairs\n", " for index, anime in index_to_anime.items():\n", " writer.writerow([index, anime])\n", "\n", " wandb.save(str(output_path/file_name))\n", " print('Anime indexes CSV exported!')" ] }, { "cell_type": "code", "execution_count": 24, "id": "70adc6a3", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.619126Z", "iopub.status.busy": "2023-07-20T14:28:31.618261Z", "iopub.status.idle": "2023-07-20T14:28:31.624758Z", "shell.execute_reply": "2023-07-20T14:28:31.623783Z" }, "papermill": { "duration": 0.022205, "end_time": "2023-07-20T14:28:31.626906", "exception": false, "start_time": "2023-07-20T14:28:31.604701", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def export_anime_embeddings(anime_embeddings, file_name):\n", " with open(file_name, mode='w', newline='') as file:\n", " writer = csv.writer(file)\n", "\n", " for row in anime_embeddings:\n", " writer.writerow(row.tolist())\n", "\n", " wandb.save(str(output_path/file_name))\n", " print('Anime embeddings CSV exported!')" ] }, { "cell_type": "code", "execution_count": 25, "id": "c91b1867", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.653487Z", "iopub.status.busy": "2023-07-20T14:28:31.652729Z", "iopub.status.idle": "2023-07-20T14:28:31.658277Z", "shell.execute_reply": "2023-07-20T14:28:31.657433Z" }, "papermill": { "duration": 0.021024, "end_time": "2023-07-20T14:28:31.660308", "exception": false, "start_time": "2023-07-20T14:28:31.639284", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "config = {\n", " 'architecture': 'Neural Collaborative Filtering',\n", " 'epochs': 100,\n", " 'batch_size': 2000,\n", " 'hidden_layers': [500, 500, 500],\n", " 'learning_rate': 1e-3,\n", " 'latent_factors': 150,\n", " 'embedding_dropout': 0.05,\n", " 'dropouts': [0.5, 0.5, 0.25],\n", " 'patience': 10\n", "}" ] }, { "cell_type": "code", "execution_count": 26, "id": "3f1ea3db", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.685953Z", "iopub.status.busy": "2023-07-20T14:28:31.685190Z", "iopub.status.idle": "2023-07-20T14:28:31.689968Z", "shell.execute_reply": "2023-07-20T14:28:31.689117Z" }, "papermill": { "duration": 0.019747, "end_time": "2023-07-20T14:28:31.692156", "exception": false, "start_time": "2023-07-20T14:28:31.672409", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "init_arguments = {\n", " 'project': 'anime-collaborative-filtering-system',\n", " 'config': config,\n", " 'name': 'serious',\n", " 'notes': \"I'm using the whole dataset and serious parameters this time!\"\n", "}" ] }, { "cell_type": "code", "execution_count": 27, "id": "9b75fef5", "metadata": { "execution": { "iopub.execute_input": "2023-07-20T14:28:31.717683Z", "iopub.status.busy": "2023-07-20T14:28:31.716930Z", "iopub.status.idle": "2023-07-20T15:52:24.225700Z", "shell.execute_reply": "2023-07-20T15:52:24.224713Z" }, "papermill": { "duration": 5032.524102, "end_time": "2023-07-20T15:52:24.228198", "exception": false, "start_time": "2023-07-20T14:28:31.704096", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33meddiezhuang\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.5\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/kaggle/working/wandb/run-20230720_142831-jeqqjth0\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mserious\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/eddiezhuang/anime-collaborative-filtering-system\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: πŸš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/eddiezhuang/anime-collaborative-filtering-system/runs/jeqqjth0\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "πŸ’ͺ Training! ≧◑≦\n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b2526a62bb494e6d86266bb070ee637e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/100 [00:00