Spaces:

quyanh
/

Book-Recommender-System

Sleeping

App Files Files Community

quyanh commited on May 26, 2023

Commit

97ce7fb

•

1 Parent(s): 713f497

initial commit

Browse files

Files changed (20) hide show

Demo Data.ipynb +718 -0
README.md +4 -7
app.py +57 -0
dataset/books.csv +0 -0
dataset/ratings.csv +0 -0
dataset/users.csv +0 -0
preprocessing.py +79 -0
processed/R.npy +3 -0
processed/Y.npy +3 -0
processed/book_id_map.json +1 -0
processed/summary_book.csv +0 -0
processed/user_id_map.json +1 -0
recommend.py +55 -0
requirements.txt +51 -0
train.py +86 -0
utils_c.py +40 -0
weight/W.npy +3 -0
weight/X.npy +3 -0
weight/b.npy +3 -0
weight/predicted.npy +3 -0

Demo Data.ipynb ADDED Viewed

	@@ -0,0 +1,718 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6c97a769",
+   "metadata": {},
+   "source": [
+    "# Overview data\n",
+    "\n",
+    "**Note: In this notebook, I assume the dataset is cleaned and ignore EDA.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a54afd58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import warnings\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cdb44c97",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>User-ID</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>Age</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>nyc, new york, usa</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>stockton, california, usa</td>\n",
+       "      <td>18.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>moscow, yukon territory, russia</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>porto, v.n.gaia, portugal</td>\n",
+       "      <td>17.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>farnborough, hants, united kingdom</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   User-ID                            Location   Age\n",
+       "0        1                  nyc, new york, usa   NaN\n",
+       "1        2           stockton, california, usa  18.0\n",
+       "2        3     moscow, yukon territory, russia   NaN\n",
+       "3        4           porto, v.n.gaia, portugal  17.0\n",
+       "4        5  farnborough, hants, united kingdom   NaN"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "path = \"./dataset\"\n",
+    "\n",
+    "# user dataset\n",
+    "user_df = pd.read_csv(f\"{path}/users.csv\", delimiter=';', encoding='ISO-8859-1')\n",
+    "user_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "fe62dfa3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "b'Skipping line 6452: expected 8 fields, saw 9\\nSkipping line 43667: expected 8 fields, saw 10\\nSkipping line 51751: expected 8 fields, saw 9\\n'\n",
+      "b'Skipping line 92038: expected 8 fields, saw 9\\nSkipping line 104319: expected 8 fields, saw 9\\nSkipping line 121768: expected 8 fields, saw 9\\n'\n",
+      "b'Skipping line 144058: expected 8 fields, saw 9\\nSkipping line 150789: expected 8 fields, saw 9\\nSkipping line 157128: expected 8 fields, saw 9\\nSkipping line 180189: expected 8 fields, saw 9\\nSkipping line 185738: expected 8 fields, saw 9\\n'\n",
+      "b'Skipping line 209388: expected 8 fields, saw 9\\nSkipping line 220626: expected 8 fields, saw 9\\nSkipping line 227933: expected 8 fields, saw 11\\nSkipping line 228957: expected 8 fields, saw 10\\nSkipping line 245933: expected 8 fields, saw 9\\nSkipping line 251296: expected 8 fields, saw 9\\nSkipping line 259941: expected 8 fields, saw 9\\nSkipping line 261529: expected 8 fields, saw 9\\n'\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ISBN</th>\n",
+       "      <th>Book-Title</th>\n",
+       "      <th>Book-Author</th>\n",
+       "      <th>Year-Of-Publication</th>\n",
+       "      <th>Publisher</th>\n",
+       "      <th>Image-URL-S</th>\n",
+       "      <th>Image-URL-M</th>\n",
+       "      <th>Image-URL-L</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0195153448</td>\n",
+       "      <td>Classical Mythology</td>\n",
+       "      <td>Mark P. O. Morford</td>\n",
+       "      <td>2002</td>\n",
+       "      <td>Oxford University Press</td>\n",
+       "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0002005018</td>\n",
+       "      <td>Clara Callan</td>\n",
+       "      <td>Richard Bruce Wright</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>HarperFlamingo Canada</td>\n",
+       "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0060973129</td>\n",
+       "      <td>Decision in Normandy</td>\n",
+       "      <td>Carlo D'Este</td>\n",
+       "      <td>1991</td>\n",
+       "      <td>HarperPerennial</td>\n",
+       "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0374157065</td>\n",
+       "      <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
+       "      <td>Gina Bari Kolata</td>\n",
+       "      <td>1999</td>\n",
+       "      <td>Farrar Straus Giroux</td>\n",
+       "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0393045218</td>\n",
+       "      <td>The Mummies of Urumchi</td>\n",
+       "      <td>E. J. W. Barber</td>\n",
+       "      <td>1999</td>\n",
+       "      <td>W. W. Norton &amp;amp; Company</td>\n",
+       "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
+       "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         ISBN                                         Book-Title  \\\n",
+       "0  0195153448                                Classical Mythology   \n",
+       "1  0002005018                                       Clara Callan   \n",
+       "2  0060973129                               Decision in Normandy   \n",
+       "3  0374157065  Flu: The Story of the Great Influenza Pandemic...   \n",
+       "4  0393045218                             The Mummies of Urumchi   \n",
+       "\n",
+       "            Book-Author Year-Of-Publication                   Publisher  \\\n",
+       "0    Mark P. O. Morford                2002     Oxford University Press   \n",
+       "1  Richard Bruce Wright                2001       HarperFlamingo Canada   \n",
+       "2          Carlo D'Este                1991             HarperPerennial   \n",
+       "3      Gina Bari Kolata                1999        Farrar Straus Giroux   \n",
+       "4       E. J. W. Barber                1999  W. W. Norton &amp; Company   \n",
+       "\n",
+       "                                         Image-URL-S  \\\n",
+       "0  http://images.amazon.com/images/P/0195153448.0...   \n",
+       "1  http://images.amazon.com/images/P/0002005018.0...   \n",
+       "2  http://images.amazon.com/images/P/0060973129.0...   \n",
+       "3  http://images.amazon.com/images/P/0374157065.0...   \n",
+       "4  http://images.amazon.com/images/P/0393045218.0...   \n",
+       "\n",
+       "                                         Image-URL-M  \\\n",
+       "0  http://images.amazon.com/images/P/0195153448.0...   \n",
+       "1  http://images.amazon.com/images/P/0002005018.0...   \n",
+       "2  http://images.amazon.com/images/P/0060973129.0...   \n",
+       "3  http://images.amazon.com/images/P/0374157065.0...   \n",
+       "4  http://images.amazon.com/images/P/0393045218.0...   \n",
+       "\n",
+       "                                         Image-URL-L  \n",
+       "0  http://images.amazon.com/images/P/0195153448.0...  \n",
+       "1  http://images.amazon.com/images/P/0002005018.0...  \n",
+       "2  http://images.amazon.com/images/P/0060973129.0...  \n",
+       "3  http://images.amazon.com/images/P/0374157065.0...  \n",
+       "4  http://images.amazon.com/images/P/0393045218.0...  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# book dataset\n",
+    "book_df = pd.read_csv(f\"{path}/books.csv\", delimiter=';', encoding='ISO-8859-1', error_bad_lines=False)\n",
+    "book_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d9fa4750",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>User-ID</th>\n",
+       "      <th>ISBN</th>\n",
+       "      <th>Book-Rating</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>276725</td>\n",
+       "      <td>034545104X</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>276726</td>\n",
+       "      <td>0155061224</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>276727</td>\n",
+       "      <td>0446520802</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>276729</td>\n",
+       "      <td>052165615X</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>276729</td>\n",
+       "      <td>0521795028</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>276733</td>\n",
+       "      <td>2080674722</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>276736</td>\n",
+       "      <td>3257224281</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>276737</td>\n",
+       "      <td>0600570967</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>276744</td>\n",
+       "      <td>038550120X</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>276745</td>\n",
+       "      <td>342310538</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   User-ID        ISBN  Book-Rating\n",
+       "0   276725  034545104X            0\n",
+       "1   276726  0155061224            5\n",
+       "2   276727  0446520802            0\n",
+       "3   276729  052165615X            3\n",
+       "4   276729  0521795028            6\n",
+       "5   276733  2080674722            0\n",
+       "6   276736  3257224281            8\n",
+       "7   276737  0600570967            6\n",
+       "8   276744  038550120X            7\n",
+       "9   276745   342310538           10"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# rating dataset\n",
+    "rating_df = pd.read_csv(f\"{path}/ratings.csv\", delimiter=';', encoding='ISO-8859-1')\n",
+    "rating_df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "53c66ec4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rating_df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "691767c0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Mean-Rating</th>\n",
+       "      <th>Num-Rating</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ISBN</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0330299891</th>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0375404120</th>\n",
+       "      <td>1.5</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0586045007</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9022906116</th>\n",
+       "      <td>3.5</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9032803328</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Mean-Rating  Num-Rating\n",
+       "ISBN                                \n",
+       " 0330299891          3.0           2\n",
+       " 0375404120          1.5           2\n",
+       " 0586045007          0.0           1\n",
+       " 9022906116          3.5           2\n",
+       " 9032803328          0.0           1"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "function = {\n",
+    "    \"Book-Rating\": \"mean\",\n",
+    "    \"User-ID\": \"count\"\n",
+    "}\n",
+    "\n",
+    "summary_rating = rating_df.groupby(\"ISBN\").agg(function, axis=0)\n",
+    "summary_rating = summary_rating.rename(columns={\"Book-Rating\": \"Mean-Rating\", \"User-ID\": \"Num-Rating\"})\n",
+    "summary_rating.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3e20611a",
+   "metadata": {},
+   "source": [
+    "**Note:** In this repo, I only consider `book_df` and `rating_df`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "82e1b680",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ISBN</th>\n",
+       "      <th>Book-Title</th>\n",
+       "      <th>Book-Author</th>\n",
+       "      <th>Year-Of-Publication</th>\n",
+       "      <th>Publisher</th>\n",
+       "      <th>Mean-Rating</th>\n",
+       "      <th>Num-Rating</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0195153448</td>\n",
+       "      <td>Classical Mythology</td>\n",
+       "      <td>Mark P. O. Morford</td>\n",
+       "      <td>2002</td>\n",
+       "      <td>Oxford University Press</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0002005018</td>\n",
+       "      <td>Clara Callan</td>\n",
+       "      <td>Richard Bruce Wright</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>HarperFlamingo Canada</td>\n",
+       "      <td>4.928571</td>\n",
+       "      <td>14.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0060973129</td>\n",
+       "      <td>Decision in Normandy</td>\n",
+       "      <td>Carlo D'Este</td>\n",
+       "      <td>1991</td>\n",
+       "      <td>HarperPerennial</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0374157065</td>\n",
+       "      <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
+       "      <td>Gina Bari Kolata</td>\n",
+       "      <td>1999</td>\n",
+       "      <td>Farrar Straus Giroux</td>\n",
+       "      <td>4.272727</td>\n",
+       "      <td>11.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0393045218</td>\n",
+       "      <td>The Mummies of Urumchi</td>\n",
+       "      <td>E. J. W. Barber</td>\n",
+       "      <td>1999</td>\n",
+       "      <td>W. W. Norton &amp;amp; Company</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         ISBN                                         Book-Title  \\\n",
+       "0  0195153448                                Classical Mythology   \n",
+       "1  0002005018                                       Clara Callan   \n",
+       "2  0060973129                               Decision in Normandy   \n",
+       "3  0374157065  Flu: The Story of the Great Influenza Pandemic...   \n",
+       "4  0393045218                             The Mummies of Urumchi   \n",
+       "\n",
+       "            Book-Author Year-Of-Publication                   Publisher  \\\n",
+       "0    Mark P. O. Morford                2002     Oxford University Press   \n",
+       "1  Richard Bruce Wright                2001       HarperFlamingo Canada   \n",
+       "2          Carlo D'Este                1991             HarperPerennial   \n",
+       "3      Gina Bari Kolata                1999        Farrar Straus Giroux   \n",
+       "4       E. J. W. Barber                1999  W. W. Norton &amp; Company   \n",
+       "\n",
+       "   Mean-Rating  Num-Rating  \n",
+       "0     0.000000         1.0  \n",
+       "1     4.928571        14.0  \n",
+       "2     5.000000         3.0  \n",
+       "3     4.272727        11.0  \n",
+       "4     0.000000         1.0  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = book_df.merge(summary_rating, how=\"left\", left_on=\"ISBN\", right_on=\"ISBN\")\n",
+    "df.drop(columns=[\"Image-URL-S\", \"Image-URL-M\", \"Image-URL-L\"], inplace=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fb397a05",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ISBN                      0\n",
+       "Book-Title                0\n",
+       "Book-Author               1\n",
+       "Year-Of-Publication       0\n",
+       "Publisher                 2\n",
+       "Mean-Rating            1209\n",
+       "Num-Rating             1209\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "7c7139ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save\n",
+    "df.to_csv(f\"{path}/summary_book.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "latex_envs": {
+   "LaTeX_envs_menu_present": true,
+   "autoclose": false,
+   "autocomplete": true,
+   "bibliofile": "biblio.bib",
+   "cite_by": "apalike",
+   "current_citInitial": 1,
+   "eqLabelWithNumbers": true,
+   "eqNumInitial": 1,
+   "hotkeys": {
+    "equation": "Ctrl-E",
+    "itemize": "Ctrl-I"
+   },
+   "labels_anchors": false,
+   "latex_user_defs": false,
+   "report_style_numbering": false,
+   "user_envs_cfg": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

README.md CHANGED Viewed

@@ -1,12 +1,9 @@
 ---
 title: Book Recommender System
-emoji: 📉
-colorFrom: blue
-colorTo: gray
 sdk: streamlit
-sdk_version: 1.21.0
 app_file: app.py
 pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Book Recommender System
+emoji: 👀
+colorFrom: purple
+colorTo: purple
 sdk: streamlit
 app_file: app.py
 pinned: false
+---

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import json
+import streamlit as st
+import numpy as np
+import pandas as pd
+# Parameters
+data_dir = f'./processed'
+weight_dir = f'./weight'
+info_path = f'./processed/summary_book.csv'
+num = 10
+lb = 0
+# Load R matrix from file
+R = np.load(f'{data_dir}/R.npy', allow_pickle=True)
+# Load prediction
+prediction = np.load(f'{weight_dir}/predicted.npy', allow_pickle=True)
+# Load dictionary from JSON file
+with open(f'{data_dir}/user_id_map.json', 'r') as file:
+    user2id = json.load(file)
+with open(f'{data_dir}/book_id_map.json', 'r') as file:
+    book2id = json.load(file)
+# Define the input and output functions for Gradio
+def recommend_books(user_id):
+    # Recommend
+    user_idx = user2id[str(user_id)]
+    predict = prediction[:, user_idx]  # get prediction for user
+    predict_dict = {book: np.round(predict[idx], 2) for book, idx in book2id.items()}
+    # Load information about book
+    book_df = pd.read_csv(info_path)
+    book_df = book_df[book_df["Num-Rating"] > lb]
+    book_df['predict'] = book_df["ISBN"].map(predict_dict)
+    df = book_df.nlargest(num, "predict").reset_index(drop=True)
+    df["context"] = df.apply(
+        lambda book: f"{book['Book-Title']} ({book['Year-Of-Publication']}) - by {book['Book-Author']}", axis=1
+    )
+    return df['context'].values
+st.title('Book Recommender System')
+# Display dialogue box that contains content
+user_id = st.selectbox(
+    'Enter your ID:',
+    user2id.keys()
+)
+# Setting a button
+if st.button('Recommend'):
+    recommendations = recommend_books(user_id)
+    st.write('**_Your ID:_**', user_id)
+    st.write('**_Your top 10 recommendations:_**')
+    for num, i in enumerate(recommendations):
+        st.write(num + 1, ':', i)

dataset/books.csv ADDED Viewed

Binary file (77.8 MB). View file

dataset/ratings.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/users.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessing.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import json
+import yaml
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from jsonargparse import ArgumentParser
+def parse_args():
+    """Parse command-line arguments."""
+    parser = ArgumentParser()
+    parser.add_argument("--rating_path", type=str, required=True, default="./dataset/ratings.csv")
+    parser.add_argument("--book_path", type=str, required=True, default="./dataset/books.csv")
+    parser.add_argument("--out_dir", type=str, required=True, default="./processed")
+    parser.add_argument("--limit", required=True, type=int, default=1000)
+    return vars(parser.parse_args())
+def main(
+    rating_path,
+    book_path,
+    out_dir,
+    limit,
+    **kwargs
+):
+    data = pd.read_csv(rating_path, delimiter=';', nrows=limit, encoding='ISO-8859-1')
+    # Make Y
+    Y = data.pivot(index='ISBN', columns='User-ID', values='Book-Rating')
+    Y = Y.fillna(0)
+    Y = Y.values
+    # Make R
+    R = np.where(Y != 0, 1, 0)
+    # Save Y and R as dense matrices
+    out_dir_path = Path(out_dir)
+    if out_dir_path.exists():
+        assert out_dir_path.is_dir()
+    else:
+        out_dir_path.mkdir(parents=True)
+    np.save(f'{out_dir_path}/Y.npy', Y)
+    np.save(f'{out_dir_path}/R.npy', R)
+    # Create mappings for book and user IDs
+    book_lst = data['ISBN'].unique()
+    user_lst = data['User-ID'].unique()
+    book_id_map = {book_id: i for i, book_id in enumerate(book_lst)}
+    user_id_map = {user_id: i for i, user_id in enumerate(user_lst)}
+    # Convert keys to compatible types
+    book_id_map = {str(key): value for key, value in book_id_map.items()}
+    user_id_map = {str(key): value for key, value in user_id_map.items()}
+    # Save book_id_map to file
+    with open(f'{out_dir_path}/book_id_map.json', 'w') as f:
+        json.dump(book_id_map, f)
+    # Save user_id_map to file
+    with open(f'{out_dir_path}/user_id_map.json', 'w') as f:
+        json.dump(user_id_map, f)
+    # Get summary
+    function = {
+        "Book-Rating": "mean",
+        "User-ID": "count"
+    }
+    book_df = pd.read_csv(book_path, delimiter=';', encoding='ISO-8859-1', on_bad_lines='skip')
+    summary_rating = data.groupby("ISBN").agg(function, axis=0)
+    summary_rating = summary_rating.rename(columns={"Book-Rating": "Mean-Rating", "User-ID": "Num-Rating"})
+    df = book_df.merge(summary_rating, how="left", left_on="ISBN", right_on="ISBN")
+    df.drop(columns=["Image-URL-S", "Image-URL-M", "Image-URL-L"], inplace=True)
+    df.to_csv(f"{out_dir_path}/summary_book.csv", index=False)
+if __name__ == "__main__":
+    main(**parse_args())

processed/R.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e1255c1daea3561d8e326acb7271127549923abed46da5de0e092a8664b227f
+size 1293760

processed/Y.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1701a14103e9b7259e27b06c7eb9e0b71af75078e0eef6b2e4b6c163f281f7ee
+size 1293760

processed/book_id_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"034545104X": 0, "0155061224": 1, "0446520802": 2, "052165615X": 3, "0521795028": 4, "2080674722": 5, "3257224281": 6, "0600570967": 7, "038550120X": 8, "342310538": 9, "0425115801": 10, "0449006522": 11, "0553561618": 12, "055356451X": 13, "0786013990": 14, "0786014512": 15, "0060517794": 16, "0451192001": 17, "0609801279": 18, "0671537458": 19, "0679776818": 20, "0943066433": 21, "1570231028": 22, "1885408226": 23, "0747558167": 24, "3442437407": 25, "033390804X": 26, "3596218098": 27, "0684867621": 28, "0451166892": 29, "8440682697": 30, "034544003X": 31, "0380000059": 32, "0380711524": 33, "0451167317": 34, "0451454952": 35, "0843920262": 36, "3404122879": 37, "3404182928": 38, "3404611306": 39, "342662429": 40, "3426690179": 41, "3442424216": 42, "3442425573": 43, "3453092007": 44, "3453157745": 45, "3453176944": 46, "3453185137": 47, "3453185323": 48, "3453213025": 49, "3453877241": 50, "3492226604": 51, "3517017442": 52, "3596125006": 53, "B0000BLD7X": 54, "N3453124715": 55, "9029716894": 56, "9057868059": 57, "0140279091": 58, "0553572369": 59, "0571058086": 60, "3499230933": 61, "3596151465": 62, "0099543818": 63, "3404147723": 64, "3423111321": 65, "3442136644": 66, "3492232000": 67, "8434811634": 68, "8484330478": 69, "8484332039": 70, "2864322102": 71, "8402065945": 72, "8423314901": 73, "842333533X": 74, "8427911769": 75, "8433914456": 76, "8437606322": 77, "8445072919": 78, "8466300821": 79, "847765011X": 80, "8478442588": 81, "8495368099": 82, "0345443683": 83, "043935806X": 84, "055310666X": 85, "0330332775": 86, "0330367358": 87, "0006379702": 88, "3423084049": 89, "3442131340": 90, "3446202102": 91, "3453073398": 92, "3453115783": 93, "3499134004": 94, "349915398X": 95, "3548603203": 96, "3764501383": 97, "3791535889": 98, "0061054143": 99, "0061054151": 100, "0061056774": 101, "0671021346": 102, "0671024108": 103, "1562827898": 104, "9726954835": 105, "0064405176": 106, "0439104769": 107, "0440498058": 108, "8807817144": 109, "8845915913": 110, "0395547032": 111, "0440414121": 112, "8879839993": 113, "8401328322": 114, "8401461189": 115, "8401471796": 116, "8423996565": 117, "8424130081": 118, "8426449476": 119, "8426449573": 120, "8478884831": 121, "8478885218": 122, "8478885463": 123, "8478886044": 124, "8495618052": 125, "0091830893": 126, "0586207414": 127, "0812571029": 128, "0671749609": 129, "0140062718": 130, "0140260498": 131, "0060096195": 132, "0141310340": 133, "0142302198": 134, "0156006065": 135, "0375821813": 136, "038076041X": 137, "0439087597": 138, "0439401399": 139, "0552546933": 140, "0689804458": 141, "0699854289": 142, "0786812508": 143, "0786817070": 144, "0805057706": 145, "1551925060": 146, "1573248533": 147, "000651118X": 148, "0385272324": 149, "2038701962": 150, "2070264564": 151, "2070334430": 152, "2070403734": 153, "2080680676": 154, "2232122263": 155, "2253044903": 156, "2253150711": 157, "2266076000": 158, "2277221678": 159, "2290321028": 160, "2842190009": 161, "0440225825": 162, "0316781266": 163, "0345446860": 164, "0671876244": 165, "3125785006": 166, "0380005239": 167, "1593080255": 168, "0330201700": 169, "0385729340": 170, "3809407536": 171, "0446364193": 172, "3257200552": 173, "3379015180": 174, "3404145909": 175, "3404148576": 176, "3404921178": 177, "3423071516": 178, "3423204885": 179, "3423205806": 180, "3426029553": 181, "3426622610": 182, "3426671298": 183, "344215121X": 184, "3442413508": 185, "3442422035": 186, "3442435773": 187, "3442437717": 188, "3442441080": 189, "3442442796": 190, "3442444020": 191, "3442446414": 192, "3442448530": 193, "3442449820": 194, "3453137442": 195, "3453870190": 196, "3455077331": 197, "347354034X": 198, "3492231322": 199, "349912176X": 200, "3499222213": 201, "3499228297": 202, "3499232529": 203, "3499233436": 204, "3499264528": 205, "3499433443": 206, "3506464078": 207, "3548602967": 208, "3551551677": 209, "3551551685": 210, "3551551693": 211, "3551551936": 212, "359621078X": 213, "3608932240": 214, "360893541X": 215, "3608935428": 216, "3608935436": 217, "374661922X": 218, "3770131495": 219, "3809024589": 220, "3821815191": 221, "3932069234": 222, "3423100424": 223, "3442096596": 224, "3442440777": 225, "3453009304": 226, "3453042905": 227, "3453061187": 228, "3453071174": 229, "3453127013": 230, "3453211014": 231, "3462026062": 232, "349926028X": 233, "3596122279": 234, "3596287200": 235, "3922524443": 236, "0449217264": 237, "0140621741": 238, "055321358X": 239, "8420457477": 240, "8448034023": 241, "0671034944": 242, "074931012X": 243, "0843946415": 244, "2010173929": 245, "20103389": 246, "2012003494": 247, "2013218826": 248, "2013220162": 249, "207033015X": 250, "2070332985": 251, "2203142278": 252, "2205040561": 253, "221096900X": 254, "2253005274": 255, "2723402983": 256, "2747002748": 257, "2800108584": 258, "2800134259": 259, "2800134267": 260, "2800135522": 261, "2800135565": 262, "2800135719": 263, "2800135727": 264, "2803616998": 265, "2907572458": 266, "8439598459": 267, "014014899X": 268, "0140252517": 269, "0140269967": 270, "0140328742": 271, "0140366830": 272, "0140367446": 273, "0425131378": 274, "0517642689": 275, "0600571165": 276, "077104450X": 277, "0789706032": 278, "0836218833": 279, "0836220889": 280, "088365721X": 281, "1559712252": 282, "1567616089": 283, "2890510328": 284, "3257227264": 285, "0141011904": 286, "342313075X": 287, "3423201509": 288, "3423206616": 289, "344242529": 290, "3446200452": 291, "349223903X": 292, "3499231603": 293, "3704320196": 294, "3886807843": 295, "0345423402": 296, "1569312435": 297, "1892213141": 298, "8530805461": 299, "3257218516": 300, "3404126343": 301, "3404131606": 302, "3404139178": 303, "3404143299": 304, "3404614542": 305, "340645724X": 306, "3426192543": 307, "3426702266": 308, "3442421357": 309, "3442443806": 310, "345309221X": 311, "3453108361": 312, "3453126912": 313, "3453130901": 314, "3453132262": 315, "3596129389": 316, "3596237874": 317, "3596247500": 318, "3809410357": 319, "0345249372": 320, "044020562X": 321, "9026935722": 322, "0425167097": 323, "042518109X": 324, "0425188221": 325, "8500010452": 326, "8501023450": 327, "8571648972": 328, "8588615126": 329, "8589885291": 330, "009975181X": 331, "00273755": 332, "014366020444": 333, "0688172377": 334, "8481305464": 335, "8496075850": 336, "8804321008": 337, "8804375299": 338, "8804407808": 339, "8804464895": 340, "8804510579": 341, "8804512652": 342, "8806144146": 343, "8806155873": 344, "8807700735": 345, "8807806746": 346, "8807810751": 347, "880781112X": 348, "8807812495": 349, "8807813157": 350, "8807816059": 351, "8817106100": 352, "8817112917": 353, "8817877028": 354, "8820024381": 355, "883041915X": 356, "8831760122": 357, "8833908488": 358, "8838910987": 359, "8842806978": 360, "8845219747": 361, "8845249689": 362, "8846200624": 363, "88741800047": 364, "8876846565": 365, "8877825200": 366, "8879285513": 367, "8879285645": 368, "8879832905": 369, "8881110288": 370, "888634712X": 371, "8887432252": 372, "8888424121": 373, "0449210197": 374, "8817151068": 375, "0151446474": 376, "8433967606": 377, "8496280012": 378, "899792145": 379, "B158991965": 380, "0064430227": 381, "0671723650": 382, "0812533550": 383, "8806162160": 384, "884590184X": 385, "0446613843": 386, "0140270272": 387, "0440241537": 388, "0099460343": 389, "0375901582": 390, "0439317746": 391, "0440228840": 392, "0738205737": 393, "1566911605": 394, "0060542845": 395, "0449148831": 396, "1551666308": 397, "8420430943": 398, "8420636282": 399, "8432205311": 400, "000225669X": 401, "0099549611": 402, "0701162767": 403, "1852422580": 404, "042513976X": 405, "0441008291": 406, "0006511929": 407, "002542730X": 408, "0060520507": 409, "0060930934": 410, "0060951303": 411, "0099414732": 412, "0140154078": 413, "0140327592": 414, "0140367616": 415, "0141181222": 416, "0151010633": 417, "0192834312": 418, "0194216748": 419, "0240514866": 420, "0316666343": 421, "0345391810": 422, "0375400699": 423, "0385504209": 424, "043527242X": 425, "0439284031": 426, "0439286182": 427, "0439286239": 428, "0449221148": 429, "0451527747": 430, "0486282112": 431, "0486424499": 432, "0553275283": 433, "0582530431": 434, "0590502123": 435, "0595132189": 436, "0613329740": 437, "063403541X": 438, "067172939X": 439, "0671729438": 440, "0671746502": 441, "0679721851": 442, "0679745580": 443, "0691000980": 444, "0747545111": 445, "0749931434": 446, "0754000117": 447, "0804106304": 448, "0812583566": 449, "0843128240": 450, "0879517344": 451, "0971880107": 452, "1400001625": 453, "1400002672": 454, "1853262404": 455, "2061007074": 456, "2266095536": 457, "2290309494": 458, "2742739351": 459, "3522149904": 460, "3775713328": 461, "8401009421": 462, "8401327199": 463, "8401462231": 464, "840149236X": 465, "8408011200": 466, "8420427462": 467, "8420432113": 468, "8420444367": 469, "8420789895": 470, "8422655500": 471, "8422657104": 472, "842265783X": 473, "8422693445": 474, "8423325105": 475, "8423662152": 476, "8423951537": 477, "8423970647": 478, "8423976645": 479, "8426109799": 480, "8427007450": 481, "8429712372": 482, "8429714936": 483, "842975295": 484, "8429753419": 485, "8432227706": 486, "8434830809": 487, "8434840391": 488, "8440627203": 489, "8440696833": 490, "8445071572": 491, "8447306194": 492, "8447312054": 493, "8449416078": 494, "8472453723": 495, "8473068971": 496, "8474104823": 497, "8474263123": 498, "8475071163": 499, "8475961290": 500, "8476722338": 501, "8478809783": 502, "8482180088": 503, "8483221306": 504, "8489163499": 505, "8495501090": 506, "8495501198": 507, "8496077152": 508, "8496077209": 509, "8496246140": 510, "8497931467": 511, "950491036X": 512, "9508521481": 513, "9871138016": 514, "0460010239": 515, "0749336145": 516, "1899344705": 517, "0439135494": 518, "059030271X": 519, "0590453661": 520, "0590470108": 521, "0590483404": 522, "0671021354": 523, "0671026283": 524, "0671027506": 525, "0671727109": 526, "0060964049": 527, "0380807343": 528, "0439064864": 529, "2.02.032126.2": 530, "2.264.03602.8": 531, "2020058863": 532, "2020062399": 533, "2020101653": 534, "202011528X": 535, "20202006935": 536, "20203119888": 537, "2020386666": 538, "2020564777": 539, "2020591944": 540, "2070365832": 541, "2070378411": 542, "2070386023": 543, "2070394956": 544, "2070404587": 545, "2070404706": 546, "2070404722": 547, "2070406962": 548, "2070408450": 549, "2070425770": 550, "2070725804": 551, "2080680692": 552, "2226070109": 553, "2226126570": 554, "2226135022": 555, "2253030570": 556, "2253049417": 557, "2253050407": 558, "2253055972": 559, "225307659X": 560, "2253171670": 561, "2253172367": 562, "2264010991": 563, "2264013257": 564, "2264018194": 565, "2264024674": 566, "226402593X": 567, "2264027568": 568, "2264029463": 569, "226403114": 570, "2264031158": 571, "2264033282": 572, "226403601X": 573, "2264036036": 574, "2266040820": 575, "2266047280": 576, "226604960": 577, "22660861003": 578, "2266096451": 579, "2266102028": 580, "2266102621": 581, "2266104535": 582, "2266105698": 583, "2266121367": 584, "2277302228": 585, "2290303488": 586, "2290308285": 587, "2290312924": 588, "2290315524": 589, "2290321559": 590, "2702424131": 591, "2742724028": 592, "2742724613": 593, "2742729038": 594, "2743602295": 595, "2841469824": 596, "2842611462": 597, "2868696627": 598, "2869304129": 599, "2869304560": 600, "2869304870": 601, "2869305583": 602, "2895400644": 603, "2907572121": 604, "290757213X": 605, "1566190096": 606, "0749317256": 607, "3453150538": 608, "3492224628": 609, "3492224768": 610, "3596147700": 611, "0060011939": 612, "0099283697": 613, "0140187758": 614, "0316154601": 615, "059035342X": 616, "0713628944": 617, "0752844040": 618, "1853260665": 619, "2070414256": 620, "2070419657": 621, "2253063339": 622, "2253137243": 623, "2253144452": 624, "2253152072": 625, "2264027134": 626, "226612269X": 627, "2290318329": 628, "2702400612": 629, "2702401694": 630, "2878580753": 631, "8408040383": 632, "8423310353": 633, "844140321X": 634, "8484601072": 635, "0330373269": 636, "8882461327": 637, "349202436X": 638, "8807813858": 639, "0140182551": 640, "0446310786": 641, "0886776783": 642, "3257008155": 643, "329300301X": 644, "3419528078": 645, "3423118709": 646, "3440054594": 647, "3442723078": 648, "3453034120": 649, "3480204015": 650, "3492238882": 651, "3499121808": 652, "3499153629": 653, "3502513333": 654, "3502517371": 655, "3502551685": 656, "3596119502": 657, "3596221234": 658, "3596850185": 659, "3772402542": 660, "3772420176": 661, "3794170180": 662, "3806852456": 663, "3808572612": 664, "3821812028": 665, "3922708072": 666, "7321578936": 667, "0312966970": 668, "680ISBN359623": 669, "0340818182": 670, "0061096261": 671, "0312169817": 672, "0312864590": 673, "0345348664": 674, "0345348672": 675, "0345354621": 676, "0345385764": 677, "0345435036": 678, "0385264356": 679, "0425104273": 680, "0425172546": 681, "0440218667": 682, "0440220602": 683, "0446522856": 684, "0451180054": 685, "0451198514": 686, "0515131520": 687, "0517093715": 688, "0553266306": 689, "0765342987": 690, "0812550153": 691, "0812550307": 692, "0843949163": 693, "0886773741": 694, "0886774802": 695, "0886774829": 696, "0886775426": 697, "0886775957": 698, "0886777178": 699, "1572971835": 700, "9722900684": 701, "3499221489": 702, "2422614189": 703, "0345425596": 704, "0590259970": 705, "0590260251": 706, "0590417827": 707, "0590426702": 708, "0590436422": 709, "0590436449": 710, "0590436457": 711, "0590436465": 712, "0590436481": 713, "059043649X": 714, "0590436503": 715, "0590442589": 716, "0590442988": 717, "0590442996": 718, "0590448234": 719, "0590448595": 720, "0590456458": 721, "0590456504": 722, "0590470485": 723, "0590483056": 724, "0060930187": 725, "0375760911": 726, "0689817851": 727, "0874869870": 728, "0061081450": 729, "0061083259": 730, "0061087017": 731, "0061094404": 732, "0312925689": 733, "0440295653": 734, "0671793489": 735, "0684845768": 736, "0701169176": 737, "0312244266": 738, "880701601X": 739, "8817134899": 740, "0099268345": 741, "3100970616": 742, "3257062354": 743, "3257228317": 744, "3423128879": 745, "3423202327": 746, "342677609X": 747, "3442054753": 748, "3442727073": 749, "3453132041": 750, "3478387507": 751, "3492045170": 752, "3492230814": 753, "3499101505": 754, "3545202461": 755, "3548359698": 756, "3550075359": 757, "3596154766": 758, "3596214629": 759, "0671011367": 760, "0618045996": 761, "3822858617": 762, "0864425589": 763, "8817860751": 764, "0452282101": 765, "0671025368": 766, "3257060580": 767, "0553581112": 768, "0805047379": 769, "0892964456": 770, "0373250223": 771, "0451097009": 772, "0451179994": 773, "9681500830": 774, "8432087653": 775, "0553140779": 776, "0425182150": 777, "1883473004": 778, "0061007129": 779, "0061000027": 780, "0812511816": 781, "0833531654": 782, "0880381736": 783, "0880381744": 784, "0722536283": 785, "0060505885": 786, "0061097101": 787, "0299164942": 788, "0312283709": 789, "0312983271": 790, "0380731851": 791, "0446605484": 792, "0446611212": 793, "0451188454": 794, "0451207955": 795, "0609804138": 796, "0671003755": 797, "067104754X": 798, "0743407067": 799, "074343627X": 800, "0786013230": 801, "0812509560": 802, "3426615355": 803, "3442435838": 804, "3442455707": 805, "0075536498": 806, "0099287692": 807, "0099845008": 808, "0330262130": 809, "0385720920": 810, "0393319296": 811, "0553262505": 812, "06514251": 813, "0805062971": 814, "3257228007": 815, "3442430496": 816, "3442446325": 817, "3453171500": 818, "3464371506": 819, "9513098648": 820, "0590108395": 821, "3442451353": 822, "8425330866": 823, "8481301213": 824, "0460905589": 825, "0816704627": 826, "1573229571": 827, "0060595183": 828, "9782922145441": 829, "2830207904": 830, "3499263998": 831, "0434009407": 832, "1841193887": 833, "0375700668": 834, "457871971": 835, "840149768X": 836, "8401499917": 837, "8402007287": 838, "8420600369": 839, "8420603066": 840, "8423918335": 841, "8426105084": 842, "8426429807": 843, "843223138X": 844, "8437608570": 845, "8440630921": 846, "8440630922": 847, "8470394126": 848, "8471662531": 849, "8472230082": 850, "8474541913": 851, "8478091351": 852, "3426193310": 853, "0064404773": 854, "0064407667": 855, "0552545228": 856, "006054094X": 857, "0375706038": 858, "081297106X": 859, "0843951826": 860, "1400032628": 861, "1585861553": 862, "0066210151": 863, "0385334141": 864, "0425191184": 865, "0671027343": 866, "0751503894": 867, "3714500799": 868, "0385503822": 869, "3548208975": 870, "0399138684": 871, "0425189864": 872, "0440236053": 873, "0440241073": 874, "0553586122": 875, "0099935708": 876, "0140118608": 877, "0552998249": 878, "3518408127": 879, "1586609726": 880, "0865472807": 881, "0312960344": 882, "055358068X": 883, "0792270142": 884, "0688174590": 885, "031286504X": 886, "0345342968": 887, "0375756981": 888, "0553382411": 889, "0671027662": 890, "0767903382": 891, "3785527195": 892, "1558744592": 893, "0060002484": 894, "0060094117": 895, "0312253397": 896, "0312331754": 897, "0312874243": 898, "0312979517": 899, "0316152196": 900, "0316154059": 901, "0316287555": 902, "034541389X": 903, "0345450175": 904, "0345452550": 905, "0375727981": 906, "0380975017": 907, "0385305389": 908, "0393045390": 909, "0394543289": 910, "0399135804": 911, "0399138188": 912, "0399141340": 913, "0399146466": 914, "0399146504": 915, "0399146687": 916, "0399147101": 917, "0399147144": 918, "0399147322": 919, "0399147624": 920, "0399148337": 921, "0399148450": 922, "0399148639": 923, "0399148728": 924, "0399149783": 925, "0399150811": 926, "0399150870": 927, "0399151451": 928, "0399151478": 929, "039915177X": 930, "0399151885": 931, "0425183181": 932, "0440111323": 933, "0440122147": 934, "0440221463": 935, "0441005470": 936, "0446519480": 937, "0449221512": 938, "055358295X": 939, "0670894184": 940, "0671024094": 941, "0679450408": 942, "0684801663": 943, "0684846608": 944, "0684871726": 945, "0743201604": 946, "074320607X": 947, "0743407377": 948, "0743486226": 949, "0804109990": 950, "0812571118": 951, "0969691319": 952, "8475251471": 953, "0380724987": 954, "0380726246": 955, "0380816059": 956, "0399139419": 957, "0425175405": 958, "0425182932": 959, "0440225701": 960, "0446602485": 961, "0446603406": 962, "0451091949": 963, "0553095439": 964, "0553227041": 965, "0553263226": 966, "0553295098": 967, "0553564994": 968, "067102423X": 969, "0671032658": 970, "0671653849": 971, "0722509049": 972, "0812500067": 973, "0812516001": 974, "0812568710": 975, "0843921609": 976, "0890875588": 977, "1557730091": 978, "1558172882": 979, "0425178102": 980, "0446609404": 981, "0446610038": 982, "0451178017": 983, "0553211056": 984, "0553285920": 985}

processed/summary_book.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

processed/user_id_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"276725": 0, "276726": 1, "276727": 2, "276729": 3, "276733": 4, "276736": 5, "276737": 6, "276744": 7, "276745": 8, "276746": 9, "276747": 10, "276748": 11, "276751": 12, "276754": 13, "276755": 14, "276760": 15, "276762": 16, "276765": 17, "276768": 18, "276772": 19, "276774": 20, "276780": 21, "276786": 22, "276788": 23, "276796": 24, "276798": 25, "276800": 26, "276803": 27, "276804": 28, "276806": 29, "276808": 30, "276811": 31, "276812": 32, "276813": 33, "276814": 34, "276817": 35, "276820": 36, "276822": 37, "276827": 38, "276828": 39, "276830": 40, "276832": 41, "276833": 42, "276835": 43, "276837": 44, "276838": 45, "276840": 46, "276842": 47, "276847": 48, "276848": 49, "276850": 50, "276852": 51, "276853": 52, "276854": 53, "276856": 54, "276857": 55, "276859": 56, "276861": 57, "276862": 58, "276863": 59, "276866": 60, "276869": 61, "276870": 62, "276872": 63, "276873": 64, "276875": 65, "276878": 66, "276879": 67, "276884": 68, "276887": 69, "276888": 70, "276889": 71, "276890": 72, "276896": 73, "276904": 74, "276905": 75, "276911": 76, "276912": 77, "276915": 78, "276916": 79, "276925": 80, "276927": 81, "276928": 82, "276929": 83, "276934": 84, "276936": 85, "276939": 86, "276943": 87, "276946": 88, "276949": 89, "276950": 90, "276953": 91, "276954": 92, "276957": 93, "276959": 94, "276963": 95, "276964": 96, "276965": 97, "276975": 98, "276981": 99, "276984": 100, "276986": 101, "276988": 102, "276989": 103, "276990": 104, "276992": 105, "276994": 106, "276997": 107, "276998": 108, "277002": 109, "277007": 110, "277009": 111, "277010": 112, "277012": 113, "277018": 114, "277019": 115, "277022": 116, "277023": 117, "277028": 118, "277031": 119, "277032": 120, "277035": 121, "277036": 122, "277040": 123, "277042": 124, "277048": 125, "277051": 126, "277052": 127, "277053": 128, "277056": 129, "277058": 130, "277064": 131, "277065": 132, "277072": 133, "277073": 134, "277074": 135, "277075": 136, "277079": 137, "277085": 138, "277087": 139, "277090": 140, "277094": 141, "277096": 142, "277102": 143, "277107": 144, "277109": 145, "277114": 146, "277116": 147, "277123": 148, "277124": 149, "277128": 150, "277129": 151, "277134": 152, "277135": 153, "277139": 154, "277142": 155, "277143": 156, "277149": 157, "277155": 158, "277157": 159, "277159": 160, "277165": 161, "277168": 162, "277170": 163}

recommend.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import json
+import numpy as np
+import pandas as pd
+from jsonargparse import ArgumentParser
+def parse_args():
+    """Parse command-line arguments."""
+    parser = ArgumentParser()
+    parser.add_argument("--data_dir", type=str, required=True, default="./processed")
+    parser.add_argument("--weight_dir", type=str, required=True, default="./weight")
+    parser.add_argument("--info_path", type=str, required=True, default="./processed/summary_book.csv")
+    parser.add_argument("--user_id", required=True, default="276729")
+    parser.add_argument("--num", type=int, required=True, default=10)
+    parser.add_argument("--lb", type=int, required=True, default=0)
+    return vars(parser.parse_args())
+def main(
+    data_dir,
+    weight_dir,
+    info_path,
+    user_id,
+    num,
+    lb,
+    **kwargs
+):
+    # Load R matrix from file
+    R = np.load(f'{data_dir}/R.npy', allow_pickle=True)
+    # Load prediction
+    prediction = np.load(f'{weight_dir}/predicted.npy', allow_pickle=True)
+    # Load dictionary from JSON file
+    with open(f'{data_dir}/user_id_map.json', 'r') as file:
+        user2id = json.load(file)
+    with open(f'{data_dir}/book_id_map.json', 'r') as file:
+        book2id = json.load(file)
+    # Recommend
+    user_idx = user2id[str(user_id)]
+    predict = prediction[:, user_idx]   # get prediction for user
+    predict_dict = {book: np.round(predict[idx], 2) for book, idx in book2id.items()}
+    # Load information about book
+    book_df = pd.read_csv(info_path)
+    book_df = book_df[book_df["Num-Rating"] > lb]
+    book_df['predict'] = book_df["ISBN"].map(predict_dict)
+    recommendations = book_df.nlargest(num, "predict").reset_index(drop=True)
+    recommendations["context"] = recommendations.apply(
+        lambda book: f"{book['Book-Title']} ({book['Year-Of-Publication']}) - by {book['Book-Author']}", axis=1
+    )
+    print(recommendations)
+if __name__ == "__main__":
+    main(**parse_args())

requirements.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+altair==4.2.2
+attrs==23.1.0
+backports.zoneinfo==0.2.1
+blinker==1.6.2
+cachetools==5.3.0
+certifi==2023.5.7
+charset-normalizer==3.1.0
+click==8.1.3
+decorator==5.1.1
+entrypoints==0.4
+gitdb==4.0.10
+GitPython==3.1.31
+idna==3.4
+importlib-metadata==6.6.0
+importlib-resources==5.12.0
+Jinja2==3.1.2
+jsonargparse==4.21.1
+jsonschema==4.17.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+mdurl==0.1.2
+numpy==1.24.3
+packaging==23.1
+pandas==2.0.1
+Pillow==9.5.0
+pkgutil-resolve-name==1.3.10
+protobuf==3.20.3
+pyarrow==12.0.0
+pydeck==0.8.1b0
+Pygments==2.15.1
+Pympler==1.0.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2023.3
+PyYAML==6.0
+requests==2.31.0
+rich==13.3.5
+six==1.16.0
+smmap==5.0.0
+streamlit==1.22.0
+tenacity==8.2.2
+toml==0.10.2
+toolz==0.12.0
+tornado==6.3.2
+typing-extensions==4.6.2
+tzdata==2023.3
+tzlocal==5.0.1
+urllib3==2.0.2
+validators==0.20.0
+watchdog==3.0.0
+zipp==3.15.0

train.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from pathlib import Path
+from jsonargparse import ArgumentParser
+from utils_c import normalize, cost_function
+def parse_args():
+    """Parse command-line arguments."""
+    parser = ArgumentParser()
+    parser.add_argument("--data_dir", type=str, required=True, default="./processed")
+    parser.add_argument("--out_dir", type=str, required=True, default="./weight")
+    parser.add_argument("--num_features", type=int, required=True, default=10)
+    parser.add_argument("--num_iterators", type=int, required=True, default=200)
+    parser.add_argument("--learning_rate", type=float, required=True, default=1e-1)
+    parser.add_argument("--lambda_", type=float, required=True, default=2.0)
+    parser.add_argument("--seed", type=int, required=True, default=1234)
+    parser.add_argument("--freq", type=int, required=True, default=20)
+    return vars(parser.parse_args())
+def main(
+    data_dir,
+    out_dir,
+    num_features,
+    num_iterators,
+    learning_rate,
+    lambda_,
+    seed,
+    freq
+):
+    # Load R matrix from file
+    R = np.load(f'{data_dir}/R.npy', allow_pickle=True)
+    # Load Y matrix from file
+    Y = np.load(f'{data_dir}/Y.npy', allow_pickle=True)
+    # Normalize the Dataset
+    Y_norm, Y_mean = normalize(Y, R)
+    num_books, num_users = Y.shape
+    # Set Initial Parameters (W, X), use tf.Variable to track these variables
+    tf.random.set_seed(seed) # for consistent results
+    W = tf.Variable(tf.random.normal((num_users, num_features), dtype=tf.float64), name='W')
+    X = tf.Variable(tf.random.normal((num_books, num_features), dtype=tf.float64), name='X')
+    b = tf.Variable(tf.random.normal((1, num_users), dtype=tf.float64), name='b')
+    # Instantiate an optimizer.
+    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
+    for iter in range(num_iterators):
+        # Use TensorFlow’s GradientTape
+        # to record the operations used to compute the cost
+        with tf.GradientTape() as tape:
+            # Compute the cost (forward pass included in cost)
+            cost_value = cost_function(X, W, b, Y_norm, R, lambda_)
+        # Use the gradient tape to automatically retrieve
+        # the gradients of the trainable variables with respect to the loss
+        grads = tape.gradient(cost_value, [X, W, b])
+        # Run one step of gradient descent by updating
+        # the value of the variables to minimize the loss.
+        optimizer.apply_gradients(zip(grads, [X, W, b]))
+        # Log periodically.
+        if iter % freq == 0:
+            print(f"Training loss at iteration {iter}: {cost_value:0.1f}")
+    predict = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()
+    predict = predict + Y_mean
+    # Save weight
+    out_dir = Path(out_dir)
+    if out_dir.exists():
+        assert out_dir.is_dir()
+    else:
+        out_dir.mkdir(parents=True)
+    np.save(f'{out_dir}/W.npy', W)
+    np.save(f'{out_dir}/X.npy', X)
+    np.save(f'{out_dir}/b.npy', b)
+    np.save(f'{out_dir}/predicted.npy', predict)
+if __name__ == "__main__":
+    main(**parse_args())

utils_c.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import numpy as np
+import tensorflow as tf
+def normalize(Y, R):
+    """
+    Preprocess data by subtracting mean rating for every book (every row).
+    Only include real ratings R(i,j)=1.
+    [Y_norm, Y_mean] = normalize(Y, R) normalized Y so that each book
+    has a rating of 0 on average. Unrated moves then have a mean rating (0)
+    Returns the mean rating in Y_mean.
+    """
+    Y_mean = (np.sum(Y * R, axis=1) / (np.sum(R, axis=1) + 1e-12)).reshape(-1, 1)
+    Y_norm = Y - np.multiply(Y_mean, R)
+    return Y_norm, Y_mean
+def cost_function(X, W, b, Y, R, lambda_):
+    """
+    Returns the cost for the collaborative filtering
+    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
+    Args:
+      X (ndarray (num_books,num_features)): matrix of item features
+      W (ndarray (num_users,num_features)) : matrix of user parameters
+      b (ndarray (1, num_users)            : vector of user parameters
+      Y (ndarray (num_books,num_users)    : matrix of user ratings of books
+      R (ndarray (num_books,num_users)    : matrix, where R(i, j) = 1 if the i-th books was rated by the j-th user
+      lambda_ (float): regularization parameter
+    Returns:
+      J (float) : Cost
+    """
+    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y) * R
+    J = 0.5 * tf.reduce_sum(j ** 2) + (lambda_ / 2) * (tf.reduce_sum(X ** 2) + tf.reduce_sum(W ** 2))
+    return J

weight/W.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8df3995b7b6243c4b68b3cecabb10414d982c1cf1baf4533e6c8b8fadd3dc751
+size 13248

weight/X.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f0e114c605d0b60edbf581ae35380ec3f1a1271f280d3f17c415a52828358f
+size 79008

weight/b.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d69013d3a83302293d5e37e4fbbb2dd026297f6499cd71d9501fc91adc0d817f
+size 1440

weight/predicted.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d02e08da558c5ad31c1eb64d15a6227b570ef73c4d0597d5ab49a5aa7f0310f
+size 1293760