{ "cells": [ { "cell_type": "markdown", "id": "a3022e2a", "metadata": {}, "source": [ "# BOOKS Recommender system using clustering | collabrative based" ] }, { "cell_type": "code", "execution_count": 1, "id": "ca603e9c", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 3, "id": "b850f997", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Chirag\\AppData\\Local\\Temp\\ipykernel_6300\\499239923.py:1: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.\n", "\n", "\n", " books = pd.read_csv('data/BX-Books.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')\n", "b'Skipping line 6452: expected 8 fields, saw 9\\nSkipping line 43667: expected 8 fields, saw 10\\nSkipping line 51751: expected 8 fields, saw 9\\n'\n", "b'Skipping line 92038: expected 8 fields, saw 9\\nSkipping line 104319: expected 8 fields, saw 9\\nSkipping line 121768: expected 8 fields, saw 9\\n'\n", "b'Skipping line 144058: expected 8 fields, saw 9\\nSkipping line 150789: expected 8 fields, saw 9\\nSkipping line 157128: expected 8 fields, saw 9\\nSkipping line 180189: expected 8 fields, saw 9\\nSkipping line 185738: expected 8 fields, saw 9\\n'\n", "b'Skipping line 209388: expected 8 fields, saw 9\\nSkipping line 220626: expected 8 fields, saw 9\\nSkipping line 227933: expected 8 fields, saw 11\\nSkipping line 228957: expected 8 fields, saw 10\\nSkipping line 245933: expected 8 fields, saw 9\\nSkipping line 251296: expected 8 fields, saw 9\\nSkipping line 259941: expected 8 fields, saw 9\\nSkipping line 261529: expected 8 fields, saw 9\\n'\n", "C:\\Users\\Chirag\\AppData\\Local\\Temp\\ipykernel_6300\\499239923.py:1: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n", " books = pd.read_csv('data/BX-Books.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')\n" ] } ], "source": [ "books = pd.read_csv('data/BX-Books.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')" ] }, { "cell_type": "code", "execution_count": 4, "id": "726fa091", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISBNBook-TitleBook-AuthorYear-Of-PublicationPublisherImage-URL-SImage-URL-MImage-URL-L
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...http://images.amazon.com/images/P/0195153448.0...http://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...http://images.amazon.com/images/P/0002005018.0...http://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...http://images.amazon.com/images/P/0060973129.0...http://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...http://images.amazon.com/images/P/0374157065.0...http://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...http://images.amazon.com/images/P/0393045218.0...http://images.amazon.com/images/P/0393045218.0...
\n", "
" ], "text/plain": [ " ISBN Book-Title \\\n", "0 0195153448 Classical Mythology \n", "1 0002005018 Clara Callan \n", "2 0060973129 Decision in Normandy \n", "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", "4 0393045218 The Mummies of Urumchi \n", "\n", " Book-Author Year-Of-Publication Publisher \\\n", "0 Mark P. O. Morford 2002 Oxford University Press \n", "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", "2 Carlo D'Este 1991 HarperPerennial \n", "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", "4 E. J. W. Barber 1999 W. W. Norton & Company \n", "\n", " Image-URL-S \\\n", "0 http://images.amazon.com/images/P/0195153448.0... \n", "1 http://images.amazon.com/images/P/0002005018.0... \n", "2 http://images.amazon.com/images/P/0060973129.0... \n", "3 http://images.amazon.com/images/P/0374157065.0... \n", "4 http://images.amazon.com/images/P/0393045218.0... \n", "\n", " Image-URL-M \\\n", "0 http://images.amazon.com/images/P/0195153448.0... \n", "1 http://images.amazon.com/images/P/0002005018.0... \n", "2 http://images.amazon.com/images/P/0060973129.0... \n", "3 http://images.amazon.com/images/P/0374157065.0... \n", "4 http://images.amazon.com/images/P/0393045218.0... \n", "\n", " Image-URL-L \n", "0 http://images.amazon.com/images/P/0195153448.0... \n", "1 http://images.amazon.com/images/P/0002005018.0... \n", "2 http://images.amazon.com/images/P/0060973129.0... \n", "3 http://images.amazon.com/images/P/0374157065.0... \n", "4 http://images.amazon.com/images/P/0393045218.0... " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "e79586b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(271360, 8)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books.shape" ] }, { "cell_type": "code", "execution_count": 7, "id": "67039a00", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',\n", " 'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],\n", " dtype='object')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books.columns" ] }, { "cell_type": "code", "execution_count": 8, "id": "4933dda7", "metadata": {}, "outputs": [], "source": [ "books = books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',\n", " 'Image-URL-L']]" ] }, { "cell_type": "code", "execution_count": 9, "id": "4b643469", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISBNBook-TitleBook-AuthorYear-Of-PublicationPublisherImage-URL-L
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...
\n", "
" ], "text/plain": [ " ISBN Book-Title \\\n", "0 0195153448 Classical Mythology \n", "1 0002005018 Clara Callan \n", "2 0060973129 Decision in Normandy \n", "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", "4 0393045218 The Mummies of Urumchi \n", "\n", " Book-Author Year-Of-Publication Publisher \\\n", "0 Mark P. O. Morford 2002 Oxford University Press \n", "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", "2 Carlo D'Este 1991 HarperPerennial \n", "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", "4 E. J. W. Barber 1999 W. W. Norton & Company \n", "\n", " Image-URL-L \n", "0 http://images.amazon.com/images/P/0195153448.0... \n", "1 http://images.amazon.com/images/P/0002005018.0... \n", "2 http://images.amazon.com/images/P/0060973129.0... \n", "3 http://images.amazon.com/images/P/0374157065.0... \n", "4 http://images.amazon.com/images/P/0393045218.0... " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "8aaac73d", "metadata": {}, "outputs": [], "source": [ "books.rename(columns={\n", " \n", " \"Book-Title\" : \"title\",\n", " \"Book-Author\" : 'author',\n", " \"Year-Of-Publication\" : \"year\",\n", " \"Publisher\" : \"publisher\",\n", " \"Image-URL-L\" : \"img_url\"\n", "},inplace=True)" ] }, { "cell_type": "code", "execution_count": 12, "id": "60467a1e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISBNtitleauthoryearpublisherimg_url
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...
\n", "
" ], "text/plain": [ " ISBN title \\\n", "0 0195153448 Classical Mythology \n", "1 0002005018 Clara Callan \n", "2 0060973129 Decision in Normandy \n", "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", "4 0393045218 The Mummies of Urumchi \n", "\n", " author year publisher \\\n", "0 Mark P. O. Morford 2002 Oxford University Press \n", "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", "2 Carlo D'Este 1991 HarperPerennial \n", "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", "4 E. J. W. Barber 1999 W. W. Norton & Company \n", "\n", " img_url \n", "0 http://images.amazon.com/images/P/0195153448.0... \n", "1 http://images.amazon.com/images/P/0002005018.0... \n", "2 http://images.amazon.com/images/P/0060973129.0... \n", "3 http://images.amazon.com/images/P/0374157065.0... \n", "4 http://images.amazon.com/images/P/0393045218.0... " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books.head()" ] }, { "cell_type": "code", "execution_count": 13, "id": "28c563c7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Chirag\\AppData\\Local\\Temp\\ipykernel_6300\\3855865208.py:1: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.\n", "\n", "\n", " users = pd.read_csv('data/BX-Users.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')\n" ] } ], "source": [ "users = pd.read_csv('data/BX-Users.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')" ] }, { "cell_type": "code", "execution_count": 14, "id": "deab77ad", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDLocationAge
01nyc, new york, usaNaN
12stockton, california, usa18.0
23moscow, yukon territory, russiaNaN
34porto, v.n.gaia, portugal17.0
45farnborough, hants, united kingdomNaN
\n", "
" ], "text/plain": [ " User-ID Location Age\n", "0 1 nyc, new york, usa NaN\n", "1 2 stockton, california, usa 18.0\n", "2 3 moscow, yukon territory, russia NaN\n", "3 4 porto, v.n.gaia, portugal 17.0\n", "4 5 farnborough, hants, united kingdom NaN" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users.head()" ] }, { "cell_type": "code", "execution_count": 15, "id": "e5881c1f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(278858, 3)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users.shape" ] }, { "cell_type": "code", "execution_count": 16, "id": "9b99bbc3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Chirag\\AppData\\Local\\Temp\\ipykernel_6300\\1992283511.py:1: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.\n", "\n", "\n", " ratings = pd.read_csv('data/BX-Book-Ratings.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')\n" ] } ], "source": [ "ratings = pd.read_csv('data/BX-Book-Ratings.csv', sep = \";\" ,error_bad_lines=False,encoding='latin-1')" ] }, { "cell_type": "code", "execution_count": 17, "id": "47c65bb2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDISBNBook-Rating
0276725034545104X0
127672601550612245
227672704465208020
3276729052165615X3
427672905217950286
\n", "
" ], "text/plain": [ " User-ID ISBN Book-Rating\n", "0 276725 034545104X 0\n", "1 276726 0155061224 5\n", "2 276727 0446520802 0\n", "3 276729 052165615X 3\n", "4 276729 0521795028 6" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "87c1b251", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1149780, 3)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings.shape\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "8c632209", "metadata": {}, "outputs": [], "source": [ "ratings.rename(columns={\n", " \"User-ID\" : \"user_id\",\n", " \"Book-Rating\" : \"rating\"\n", "},inplace = True)" ] }, { "cell_type": "code", "execution_count": 21, "id": "08efd352", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idISBNrating
0276725034545104X0
127672601550612245
227672704465208020
3276729052165615X3
427672905217950286
\n", "
" ], "text/plain": [ " user_id ISBN rating\n", "0 276725 034545104X 0\n", "1 276726 0155061224 5\n", "2 276727 0446520802 0\n", "3 276729 052165615X 3\n", "4 276729 0521795028 6" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings.head()" ] }, { "cell_type": "code", "execution_count": 23, "id": "c6f2e5fe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11676 13602\n", "198711 7550\n", "153662 6109\n", "98391 5891\n", "35859 5850\n", " ... \n", "116180 1\n", "116166 1\n", "116154 1\n", "116137 1\n", "276723 1\n", "Name: user_id, Length: 105283, dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings['user_id'].value_countsts()" ] }, { "cell_type": "code", "execution_count": 24, "id": "50af5f4c", "metadata": {}, "outputs": [], "source": [ "x = ratings['user_id'].value_counts() > 200" ] }, { "cell_type": "code", "execution_count": 25, "id": "859569fd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(899,)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x[x].shape" ] }, { "cell_type": "code", "execution_count": 26, "id": "f87e961b", "metadata": {}, "outputs": [], "source": [ "y = x[x].index" ] }, { "cell_type": "code", "execution_count": 27, "id": "722837bd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([ 11676, 198711, 153662, 98391, 35859, 212898, 278418, 76352,\n", " 110973, 235105,\n", " ...\n", " 260183, 73681, 44296, 155916, 9856, 274808, 28634, 59727,\n", " 268622, 188951],\n", " dtype='int64', length=899)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y" ] }, { "cell_type": "code", "execution_count": 29, "id": "2b46c4c2", "metadata": {}, "outputs": [], "source": [ "ratings = ratings[ratings['user_id'].isin(y)]" ] }, { "cell_type": "code", "execution_count": 30, "id": "5b96eac4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idISBNrating
1456277427002542730X10
145727742700262174570
1458277427003008685X8
145927742700306153210
146027742700600020500
............
114761227597038290218600
114761327597047700195720
11476142759708960860970
114761527597096263407628
114761627597096263449900
\n", "

526356 rows × 3 columns

\n", "
" ], "text/plain": [ " user_id ISBN rating\n", "1456 277427 002542730X 10\n", "1457 277427 0026217457 0\n", "1458 277427 003008685X 8\n", "1459 277427 0030615321 0\n", "1460 277427 0060002050 0\n", "... ... ... ...\n", "1147612 275970 3829021860 0\n", "1147613 275970 4770019572 0\n", "1147614 275970 896086097 0\n", "1147615 275970 9626340762 8\n", "1147616 275970 9626344990 0\n", "\n", "[526356 rows x 3 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings" ] }, { "cell_type": "code", "execution_count": 31, "id": "218ccd07", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(526356, 3)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings.shape" ] }, { "cell_type": "code", "execution_count": 32, "id": "5ec779da", "metadata": {}, "outputs": [], "source": [ "ratings_with_books = ratings.merge(books, on = \"ISBN\")" ] }, { "cell_type": "code", "execution_count": 33, "id": "9f6f8c49", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idISBNratingtitleauthoryearpublisherimg_url
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
13363002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
211676002542730X6Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
312538002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
413552002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
...........................
48766627597018921450220Here Is New YorkE. B. White1999Little Bookroomhttp://images.amazon.com/images/P/1892145022.0...
48766727597019318681230There's a Porcupine in My Outhouse: Misadventu...Mike Tougias2002Capital Books (VA)http://images.amazon.com/images/P/1931868123.0...
487668275970341108621110Die Biene.Sybil Gr�¤fin Sch�¶nfeldt1993Bibliographisches Institut, Mannheimhttp://images.amazon.com/images/P/3411086211.0...
48766927597038290218600The Penis BookJoseph Cohen1999Konemannhttp://images.amazon.com/images/P/3829021860.0...
48767027597047700195720MusashiEiji Yoshikawa1995Kodansha International (JPN)http://images.amazon.com/images/P/4770019572.0...
\n", "

487671 rows × 8 columns

\n", "
" ], "text/plain": [ " user_id ISBN rating \\\n", "0 277427 002542730X 10 \n", "1 3363 002542730X 0 \n", "2 11676 002542730X 6 \n", "3 12538 002542730X 10 \n", "4 13552 002542730X 0 \n", "... ... ... ... \n", "487666 275970 1892145022 0 \n", "487667 275970 1931868123 0 \n", "487668 275970 3411086211 10 \n", "487669 275970 3829021860 0 \n", "487670 275970 4770019572 0 \n", "\n", " title \\\n", "0 Politically Correct Bedtime Stories: Modern Ta... \n", "1 Politically Correct Bedtime Stories: Modern Ta... \n", "2 Politically Correct Bedtime Stories: Modern Ta... \n", "3 Politically Correct Bedtime Stories: Modern Ta... \n", "4 Politically Correct Bedtime Stories: Modern Ta... \n", "... ... \n", "487666 Here Is New York \n", "487667 There's a Porcupine in My Outhouse: Misadventu... \n", "487668 Die Biene. \n", "487669 The Penis Book \n", "487670 Musashi \n", "\n", " author year \\\n", "0 James Finn Garner 1994 \n", "1 James Finn Garner 1994 \n", "2 James Finn Garner 1994 \n", "3 James Finn Garner 1994 \n", "4 James Finn Garner 1994 \n", "... ... ... \n", "487666 E. B. White 1999 \n", "487667 Mike Tougias 2002 \n", "487668 Sybil Gr�¤fin Sch�¶nfeldt 1993 \n", "487669 Joseph Cohen 1999 \n", "487670 Eiji Yoshikawa 1995 \n", "\n", " publisher \\\n", "0 John Wiley & Sons Inc \n", "1 John Wiley & Sons Inc \n", "2 John Wiley & Sons Inc \n", "3 John Wiley & Sons Inc \n", "4 John Wiley & Sons Inc \n", "... ... \n", "487666 Little Bookroom \n", "487667 Capital Books (VA) \n", "487668 Bibliographisches Institut, Mannheim \n", "487669 Konemann \n", "487670 Kodansha International (JPN) \n", "\n", " img_url \n", "0 http://images.amazon.com/images/P/002542730X.0... \n", "1 http://images.amazon.com/images/P/002542730X.0... \n", "2 http://images.amazon.com/images/P/002542730X.0... \n", "3 http://images.amazon.com/images/P/002542730X.0... \n", "4 http://images.amazon.com/images/P/002542730X.0... \n", "... ... \n", "487666 http://images.amazon.com/images/P/1892145022.0... \n", "487667 http://images.amazon.com/images/P/1931868123.0... \n", "487668 http://images.amazon.com/images/P/3411086211.0... \n", "487669 http://images.amazon.com/images/P/3829021860.0... \n", "487670 http://images.amazon.com/images/P/4770019572.0... \n", "\n", "[487671 rows x 8 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings_with_books" ] }, { "cell_type": "code", "execution_count": 34, "id": "b3e78a43", "metadata": {}, "outputs": [], "source": [ "x= ratings['user_id'].value_counts() > 200" ] }, { "cell_type": "code", "execution_count": 35, "id": "3ae79ca5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11676 True\n", "198711 True\n", "153662 True\n", "98391 True\n", "35859 True\n", " ... \n", "59727 True\n", "188951 True\n", "268622 True\n", "9856 True\n", "155916 True\n", "Name: user_id, Length: 899, dtype: bool" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x" ] }, { "cell_type": "code", "execution_count": 36, "id": "9ef399cd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(899,)" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x[x].shape" ] }, { "cell_type": "code", "execution_count": 37, "id": "bc17d6c1", "metadata": {}, "outputs": [], "source": [ "y = x[x].index" ] }, { "cell_type": "code", "execution_count": 38, "id": "db83e33e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([ 11676, 198711, 153662, 98391, 35859, 212898, 278418, 76352,\n", " 110973, 235105,\n", " ...\n", " 88793, 274808, 44296, 28634, 73681, 59727, 188951, 268622,\n", " 9856, 155916],\n", " dtype='int64', length=899)" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y" ] }, { "cell_type": "code", "execution_count": 40, "id": "7668d2b0", "metadata": {}, "outputs": [], "source": [ "ratings = ratings[ratings['user_id'].isin(y)]" ] }, { "cell_type": "code", "execution_count": 41, "id": "15ad27f4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idISBNrating
1456277427002542730X10
145727742700262174570
1458277427003008685X8
145927742700306153210
146027742700600020500
............
114761227597038290218600
114761327597047700195720
11476142759708960860970
114761527597096263407628
114761627597096263449900
\n", "

526356 rows × 3 columns

\n", "
" ], "text/plain": [ " user_id ISBN rating\n", "1456 277427 002542730X 10\n", "1457 277427 0026217457 0\n", "1458 277427 003008685X 8\n", "1459 277427 0030615321 0\n", "1460 277427 0060002050 0\n", "... ... ... ...\n", "1147612 275970 3829021860 0\n", "1147613 275970 4770019572 0\n", "1147614 275970 896086097 0\n", "1147615 275970 9626340762 8\n", "1147616 275970 9626344990 0\n", "\n", "[526356 rows x 3 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings" ] }, { "cell_type": "code", "execution_count": 44, "id": "c4c6274e", "metadata": {}, "outputs": [], "source": [ "num_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()" ] }, { "cell_type": "code", "execution_count": 46, "id": "3050e19d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlerating
0A Light in the Storm: The Civil War Diary of ...2
1Always Have Popsicles1
2Apple Magic (The Collector's series)1
3Beyond IBM: Leadership Marketing and Finance ...1
4Clifford Visita El Hospital (Clifford El Gran...1
\n", "
" ], "text/plain": [ " title rating\n", "0 A Light in the Storm: The Civil War Diary of ... 2\n", "1 Always Have Popsicles 1\n", "2 Apple Magic (The Collector's series) 1\n", "3 Beyond IBM: Leadership Marketing and Finance ... 1\n", "4 Clifford Visita El Hospital (Clifford El Gran... 1" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num_rating.head()" ] }, { "cell_type": "code", "execution_count": 48, "id": "d287a22d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idISBNrating_xtitleauthoryearpublisherimg_urlrating_y
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
13363002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
211676002542730X6Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
312538002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
413552002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
..............................
48766627597018921450220Here Is New YorkE. B. White1999Little Bookroomhttp://images.amazon.com/images/P/1892145022.0...1
48766727597019318681230There's a Porcupine in My Outhouse: Misadventu...Mike Tougias2002Capital Books (VA)http://images.amazon.com/images/P/1931868123.0...1
487668275970341108621110Die Biene.Sybil Gr�¤fin Sch�¶nfeldt1993Bibliographisches Institut, Mannheimhttp://images.amazon.com/images/P/3411086211.0...1
48766927597038290218600The Penis BookJoseph Cohen1999Konemannhttp://images.amazon.com/images/P/3829021860.0...1
48767027597047700195720MusashiEiji Yoshikawa1995Kodansha International (JPN)http://images.amazon.com/images/P/4770019572.0...1
\n", "

487671 rows × 9 columns

\n", "
" ], "text/plain": [ " user_id ISBN rating_x \\\n", "0 277427 002542730X 10 \n", "1 3363 002542730X 0 \n", "2 11676 002542730X 6 \n", "3 12538 002542730X 10 \n", "4 13552 002542730X 0 \n", "... ... ... ... \n", "487666 275970 1892145022 0 \n", "487667 275970 1931868123 0 \n", "487668 275970 3411086211 10 \n", "487669 275970 3829021860 0 \n", "487670 275970 4770019572 0 \n", "\n", " title \\\n", "0 Politically Correct Bedtime Stories: Modern Ta... \n", "1 Politically Correct Bedtime Stories: Modern Ta... \n", "2 Politically Correct Bedtime Stories: Modern Ta... \n", "3 Politically Correct Bedtime Stories: Modern Ta... \n", "4 Politically Correct Bedtime Stories: Modern Ta... \n", "... ... \n", "487666 Here Is New York \n", "487667 There's a Porcupine in My Outhouse: Misadventu... \n", "487668 Die Biene. \n", "487669 The Penis Book \n", "487670 Musashi \n", "\n", " author year \\\n", "0 James Finn Garner 1994 \n", "1 James Finn Garner 1994 \n", "2 James Finn Garner 1994 \n", "3 James Finn Garner 1994 \n", "4 James Finn Garner 1994 \n", "... ... ... \n", "487666 E. B. White 1999 \n", "487667 Mike Tougias 2002 \n", "487668 Sybil Gr�¤fin Sch�¶nfeldt 1993 \n", "487669 Joseph Cohen 1999 \n", "487670 Eiji Yoshikawa 1995 \n", "\n", " publisher \\\n", "0 John Wiley & Sons Inc \n", "1 John Wiley & Sons Inc \n", "2 John Wiley & Sons Inc \n", "3 John Wiley & Sons Inc \n", "4 John Wiley & Sons Inc \n", "... ... \n", "487666 Little Bookroom \n", "487667 Capital Books (VA) \n", "487668 Bibliographisches Institut, Mannheim \n", "487669 Konemann \n", "487670 Kodansha International (JPN) \n", "\n", " img_url rating_y \n", "0 http://images.amazon.com/images/P/002542730X.0... 82 \n", "1 http://images.amazon.com/images/P/002542730X.0... 82 \n", "2 http://images.amazon.com/images/P/002542730X.0... 82 \n", "3 http://images.amazon.com/images/P/002542730X.0... 82 \n", "4 http://images.amazon.com/images/P/002542730X.0... 82 \n", "... ... ... \n", "487666 http://images.amazon.com/images/P/1892145022.0... 1 \n", "487667 http://images.amazon.com/images/P/1931868123.0... 1 \n", "487668 http://images.amazon.com/images/P/3411086211.0... 1 \n", "487669 http://images.amazon.com/images/P/3829021860.0... 1 \n", "487670 http://images.amazon.com/images/P/4770019572.0... 1 \n", "\n", "[487671 rows x 9 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating = ratings_with_books.merge(num_rating, on = 'title')\n", "final_rating" ] }, { "cell_type": "code", "execution_count": 51, "id": "179591e9", "metadata": {}, "outputs": [], "source": [ "final_rating = final_rating[final_rating['rating_y']>=50]" ] }, { "cell_type": "code", "execution_count": 52, "id": "4ab478d3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(61853, 9)" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating.shape" ] }, { "cell_type": "code", "execution_count": 54, "id": "4bc81238", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Chirag\\AppData\\Local\\Temp\\ipykernel_6300\\1573401051.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " final_rating.drop_duplicates(['user_id','title'],inplace=True)\n" ] } ], "source": [ "final_rating.drop_duplicates(['user_id','title'],inplace=True)" ] }, { "cell_type": "code", "execution_count": 55, "id": "d19aeae9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idISBNrating_xtitleauthoryearpublisherimg_urlrating_y
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
13363002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
211676002542730X6Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
312538002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
413552002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
..............................
23670125548905535799837And Then You DieIris Johansen1998Bantamhttp://images.amazon.com/images/P/0553579983.0...50
23670225640705535799830And Then You DieIris Johansen1998Bantamhttp://images.amazon.com/images/P/0553579983.0...50
23670325720405535799830And Then You DieIris Johansen1998Bantamhttp://images.amazon.com/images/P/0553579983.0...50
23670426182905535799830And Then You DieIris Johansen1998Bantamhttp://images.amazon.com/images/P/0553579983.0...50
23670527397905535799830And Then You DieIris Johansen1998Bantamhttp://images.amazon.com/images/P/0553579983.0...50
\n", "

59850 rows × 9 columns

\n", "
" ], "text/plain": [ " user_id ISBN rating_x \\\n", "0 277427 002542730X 10 \n", "1 3363 002542730X 0 \n", "2 11676 002542730X 6 \n", "3 12538 002542730X 10 \n", "4 13552 002542730X 0 \n", "... ... ... ... \n", "236701 255489 0553579983 7 \n", "236702 256407 0553579983 0 \n", "236703 257204 0553579983 0 \n", "236704 261829 0553579983 0 \n", "236705 273979 0553579983 0 \n", "\n", " title author \\\n", "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "... ... ... \n", "236701 And Then You Die Iris Johansen \n", "236702 And Then You Die Iris Johansen \n", "236703 And Then You Die Iris Johansen \n", "236704 And Then You Die Iris Johansen \n", "236705 And Then You Die Iris Johansen \n", "\n", " year publisher \\\n", "0 1994 John Wiley & Sons Inc \n", "1 1994 John Wiley & Sons Inc \n", "2 1994 John Wiley & Sons Inc \n", "3 1994 John Wiley & Sons Inc \n", "4 1994 John Wiley & Sons Inc \n", "... ... ... \n", "236701 1998 Bantam \n", "236702 1998 Bantam \n", "236703 1998 Bantam \n", "236704 1998 Bantam \n", "236705 1998 Bantam \n", "\n", " img_url rating_y \n", "0 http://images.amazon.com/images/P/002542730X.0... 82 \n", "1 http://images.amazon.com/images/P/002542730X.0... 82 \n", "2 http://images.amazon.com/images/P/002542730X.0... 82 \n", "3 http://images.amazon.com/images/P/002542730X.0... 82 \n", "4 http://images.amazon.com/images/P/002542730X.0... 82 \n", "... ... ... \n", "236701 http://images.amazon.com/images/P/0553579983.0... 50 \n", "236702 http://images.amazon.com/images/P/0553579983.0... 50 \n", "236703 http://images.amazon.com/images/P/0553579983.0... 50 \n", "236704 http://images.amazon.com/images/P/0553579983.0... 50 \n", "236705 http://images.amazon.com/images/P/0553579983.0... 50 \n", "\n", "[59850 rows x 9 columns]" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating" ] }, { "cell_type": "code", "execution_count": 57, "id": "be3f1522", "metadata": {}, "outputs": [], "source": [ "book_pivot = final_rating.pivot_table(columns='user_id', index ='title', values = 'rating_x')" ] }, { "cell_type": "code", "execution_count": 58, "id": "8397e228", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_id254227627662977336337574017438562426251...274004274061274301274308274808275970277427277478277639278418
title
19849.0NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
1st to Die: A NovelNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2nd ChanceNaN10.0NaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaN0.0NaNNaNNaNNaN0.0NaN
4 BlondesNaNNaNNaNNaNNaNNaNNaNNaNNaN0.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
84 Charing Cross RoadNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN10.0NaNNaNNaNNaN
..................................................................
Year of WondersNaNNaNNaN7.0NaNNaNNaNNaN7.0NaN...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
You Belong To MeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Zen and the Art of Motorcycle Maintenance: An Inquiry into ValuesNaNNaNNaNNaN0.0NaNNaNNaNNaN0.0...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
ZoyaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\\O\\\" Is for Outlaw\"NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN8.0NaNNaNNaNNaNNaNNaNNaN
\n", "

742 rows × 888 columns

\n", "
" ], "text/plain": [ "user_id 254 2276 2766 \\\n", "title \n", "1984 9.0 NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN 10.0 NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "user_id 2977 3363 3757 \\\n", "title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders 7.0 NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "user_id 4017 4385 6242 \\\n", "title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN 7.0 \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "user_id 6251 ... 274004 \\\n", "title ... \n", "1984 NaN ... NaN \n", "1st to Die: A Novel NaN ... NaN \n", "2nd Chance NaN ... NaN \n", "4 Blondes 0.0 ... NaN \n", "84 Charing Cross Road NaN ... NaN \n", "... ... ... ... \n", "Year of Wonders NaN ... NaN \n", "You Belong To Me NaN ... NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... NaN \n", "Zoya NaN ... NaN \n", "\\O\\\" Is for Outlaw\" NaN ... NaN \n", "\n", "user_id 274061 274301 274308 \\\n", "title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN 0.0 \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN 8.0 NaN \n", "\n", "user_id 274808 275970 277427 \\\n", "title \n", "1984 NaN 0.0 NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN 10.0 NaN \n", "... ... ... ... \n", "Year of Wonders NaN 0.0 NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "user_id 277478 277639 278418 \n", "title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN 0.0 NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "[742 rows x 888 columns]" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_pivot" ] }, { "cell_type": "code", "execution_count": 59, "id": "56f068ca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(742, 888)" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_pivot.shape" ] }, { "cell_type": "code", "execution_count": 60, "id": "18474ec7", "metadata": {}, "outputs": [], "source": [ "book_pivot.fillna(0,inplace=True)" ] }, { "cell_type": "code", "execution_count": 61, "id": "6bf46f17", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_id254227627662977336337574017438562426251...274004274061274301274308274808275970277427277478277639278418
title
19849.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1st to Die: A Novel0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
2nd Chance0.010.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4 Blondes0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
84 Charing Cross Road0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.010.00.00.00.00.0
..................................................................
Year of Wonders0.00.00.07.00.00.00.00.07.00.0...0.00.00.00.00.00.00.00.00.00.0
You Belong To Me0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
Zoya0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\\O\\\" Is for Outlaw\"0.00.00.00.00.00.00.00.00.00.0...0.00.08.00.00.00.00.00.00.00.0
\n", "

742 rows × 888 columns

\n", "
" ], "text/plain": [ "user_id 254 2276 2766 \\\n", "title \n", "1984 9.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 10.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "user_id 2977 3363 3757 \\\n", "title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 7.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "user_id 4017 4385 6242 \\\n", "title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 7.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "user_id 6251 ... 274004 \\\n", "title ... \n", "1984 0.0 ... 0.0 \n", "1st to Die: A Novel 0.0 ... 0.0 \n", "2nd Chance 0.0 ... 0.0 \n", "4 Blondes 0.0 ... 0.0 \n", "84 Charing Cross Road 0.0 ... 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 ... 0.0 \n", "You Belong To Me 0.0 ... 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... 0.0 \n", "Zoya 0.0 ... 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 ... 0.0 \n", "\n", "user_id 274061 274301 274308 \\\n", "title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 8.0 0.0 \n", "\n", "user_id 274808 275970 277427 \\\n", "title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 10.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "user_id 277478 277639 278418 \n", "title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "[742 rows x 888 columns]" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_pivot" ] }, { "cell_type": "code", "execution_count": 62, "id": "136cdcc6", "metadata": {}, "outputs": [], "source": [ "from scipy.sparse import csr_matrix" ] }, { "cell_type": "code", "execution_count": 63, "id": "18966457", "metadata": {}, "outputs": [], "source": [ "book_sparse = csr_matrix(book_pivot)" ] }, { "cell_type": "code", "execution_count": 64, "id": "4d3807fb", "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import NearestNeighbors\n", "model = NearestNeighbors(algorithm='brute')" ] }, { "cell_type": "code", "execution_count": 65, "id": "4f90bb09", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NearestNeighbors(algorithm='brute')" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(book_sparse)" ] }, { "cell_type": "code", "execution_count": 66, "id": "665da4a5", "metadata": {}, "outputs": [], "source": [ "distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1),n_neighbors= 6)" ] }, { "cell_type": "code", "execution_count": 67, "id": "7e73f0fc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0. , 68.78953409, 69.5413546 , 72.64296249, 76.83098333,\n", " 77.28518616]])" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distance" ] }, { "cell_type": "code", "execution_count": 68, "id": "514b3304", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[237, 240, 238, 241, 184, 536]], dtype=int64)" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "suggestion" ] }, { "cell_type": "code", "execution_count": 69, "id": "9f912987", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n", " 'Harry Potter and the Prisoner of Azkaban (Book 3)',\n", " 'Harry Potter and the Goblet of Fire (Book 4)',\n", " 'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n", " 'The Cradle Will Fall'],\n", " dtype='object', name='title')\n" ] } ], "source": [ "for i in range(len(suggestion)):\n", " print(book_pivot.index[suggestion[i]])" ] }, { "cell_type": "code", "execution_count": 70, "id": "ee80a699", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'4 Blondes'" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_pivot.index[3]" ] }, { "cell_type": "code", "execution_count": 71, "id": "5bdf8b35", "metadata": {}, "outputs": [], "source": [ "books_names = book_pivot.index" ] }, { "cell_type": "code", "execution_count": 74, "id": "b29d7b24", "metadata": {}, "outputs": [], "source": [ "import pickle \n", "pickle.dump(model, open('artifacts/model.pkl','wb'))\n", "pickle.dump(books_names, open('artifacts/books_names.pkl','wb'))\n", "pickle.dump(final_rating,open('artifacts/final_rating.pkl','wb'))\n", "pickle.dump(book_pivot, open('artifacts/book_pivot.pkl','wb'))" ] }, { "cell_type": "code", "execution_count": 76, "id": "8a4bd345", "metadata": {}, "outputs": [], "source": [ "def recommend_book(book_name):\n", " book_id = np.where(book_pivot.index == book_name)[0][0]\n", " distance, suggestion = model.kneighbours(book_pivot.iloc[237,:].values.reshape(1,-1),n_neighbors=6)\n", " for i in range(len(suggestion)):\n", " books = book_pivot.index[suggestion[i]]\n", " for j in books:\n", " print(j)" ] }, { "cell_type": "code", "execution_count": null, "id": "593d138f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }