YAMITEK commited on
Commit
5a413ea
·
verified ·
1 Parent(s): 0f95b52

Upload 11 files

Browse files
Files changed (12) hide show
  1. .gitattributes +3 -0
  2. Books.csv +3 -0
  3. Document.docx +0 -0
  4. Ratings.csv +3 -0
  5. Users.csv +3 -0
  6. app.py +21 -0
  7. book.ipynb +2784 -0
  8. book_names.pkl +3 -0
  9. book_pivot_tale.pkl +3 -0
  10. final_rating.pkl +3 -0
  11. model.pkl +3 -0
  12. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Books.csv filter=lfs diff=lfs merge=lfs -text
37
+ Ratings.csv filter=lfs diff=lfs merge=lfs -text
38
+ Users.csv filter=lfs diff=lfs merge=lfs -text
Books.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a406befc83452ab2dded8050dec47bccb6aa8110edb2512b62d8402f1a3d8dd
3
+ size 73293635
Document.docx ADDED
Binary file (19.8 kB). View file
 
Ratings.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb819408aa8d42e7675200b26d48d66660b6e712bbd82585e5b564053d29de9
3
+ size 22633892
Users.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d82fe997ee9bfc455d0090b67d235694b82a9479d03e036665945ac9d30981bc
3
+ size 11017438
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ from joblib import load
4
+
5
+ st.title("book recomandation")
6
+
7
+ model = load("model.pkl")
8
+ books_name = load("book_names.pkl")
9
+ final_rating = load("final_rating.pkl")
10
+ book_pivot_tale = load("book_pivot_tale.pkl")
11
+
12
+ select_book = st.selectbox("select book",options=books_name)
13
+
14
+ if st.button("recommend"):
15
+ def recommended_book(book_name):
16
+ book_id = np.where(book_pivot_tale.index==book_name)[0][0]
17
+ distance,suggestion=model.kneighbors(book_pivot_tale.iloc[book_id,:].values.reshape(1,-1),n_neighbors=6)
18
+ for i in suggestion:
19
+ books = book_pivot_tale.index[i]
20
+ return books
21
+ st.success(list(recommended_book(select_book)))
book.ipynb ADDED
@@ -0,0 +1,2784 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## 1.Required packages"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 119,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import pandas as pd\n",
17
+ "import numpy as np\n",
18
+ "import matplotlib.pyplot as plt\n",
19
+ "import seaborn as sns\n",
20
+ "from scipy.sparse import csr_matrix\n",
21
+ "from sklearn.neighbors import NearestNeighbors\n",
22
+ "import pickle"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "markdown",
27
+ "metadata": {},
28
+ "source": [
29
+ "## 2.Load data"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 84,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "name": "stderr",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_8204\\1552887261.py:1: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
42
+ " book_df=pd.read_csv(\"books.csv\")\n"
43
+ ]
44
+ }
45
+ ],
46
+ "source": [
47
+ "book_df=pd.read_csv(\"books.csv\")\n",
48
+ "users_df = pd.read_csv(\"Users.csv\")\n",
49
+ "rating_df = pd.read_csv(\"Ratings.csv\")"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "markdown",
54
+ "metadata": {},
55
+ "source": [
56
+ "## 3.Basic info"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 85,
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "(271360, 8)\n",
69
+ "(278858, 3)\n",
70
+ "(1149780, 3)\n"
71
+ ]
72
+ }
73
+ ],
74
+ "source": [
75
+ "print(book_df.shape)\n",
76
+ "print(users_df.shape)\n",
77
+ "print(rating_df.shape)\n"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 86,
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "<class 'pandas.core.frame.DataFrame'>\n",
90
+ "RangeIndex: 271360 entries, 0 to 271359\n",
91
+ "Data columns (total 8 columns):\n",
92
+ " # Column Non-Null Count Dtype \n",
93
+ "--- ------ -------------- ----- \n",
94
+ " 0 ISBN 271360 non-null object\n",
95
+ " 1 Book-Title 271360 non-null object\n",
96
+ " 2 Book-Author 271358 non-null object\n",
97
+ " 3 Year-Of-Publication 271360 non-null object\n",
98
+ " 4 Publisher 271358 non-null object\n",
99
+ " 5 Image-URL-S 271360 non-null object\n",
100
+ " 6 Image-URL-M 271360 non-null object\n",
101
+ " 7 Image-URL-L 271357 non-null object\n",
102
+ "dtypes: object(8)\n",
103
+ "memory usage: 16.6+ MB\n",
104
+ "None\n",
105
+ "====================================================\n",
106
+ "<class 'pandas.core.frame.DataFrame'>\n",
107
+ "RangeIndex: 278858 entries, 0 to 278857\n",
108
+ "Data columns (total 3 columns):\n",
109
+ " # Column Non-Null Count Dtype \n",
110
+ "--- ------ -------------- ----- \n",
111
+ " 0 User-ID 278858 non-null int64 \n",
112
+ " 1 Location 278858 non-null object \n",
113
+ " 2 Age 168096 non-null float64\n",
114
+ "dtypes: float64(1), int64(1), object(1)\n",
115
+ "memory usage: 6.4+ MB\n",
116
+ "None\n",
117
+ "====================================================\n",
118
+ "<class 'pandas.core.frame.DataFrame'>\n",
119
+ "RangeIndex: 1149780 entries, 0 to 1149779\n",
120
+ "Data columns (total 3 columns):\n",
121
+ " # Column Non-Null Count Dtype \n",
122
+ "--- ------ -------------- ----- \n",
123
+ " 0 User-ID 1149780 non-null int64 \n",
124
+ " 1 ISBN 1149780 non-null object\n",
125
+ " 2 Book-Rating 1149780 non-null int64 \n",
126
+ "dtypes: int64(2), object(1)\n",
127
+ "memory usage: 26.3+ MB\n",
128
+ "None\n",
129
+ "====================================================\n"
130
+ ]
131
+ }
132
+ ],
133
+ "source": [
134
+ "print(book_df.info())\n",
135
+ "print(\"====================================================\")\n",
136
+ "print(users_df.info())\n",
137
+ "print(\"====================================================\")\n",
138
+ "print(rating_df.info())\n",
139
+ "print(\"====================================================\")\n"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "markdown",
144
+ "metadata": {},
145
+ "source": [
146
+ "## 4.Preprocess (EDA)"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 87,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "book_df.drop([\"Image-URL-S\",\"Image-URL-M\",\"Image-URL-L\"],axis=1,inplace=True)"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 88,
161
+ "metadata": {},
162
+ "outputs": [
163
+ {
164
+ "data": {
165
+ "text/html": [
166
+ "<div>\n",
167
+ "<style scoped>\n",
168
+ " .dataframe tbody tr th:only-of-type {\n",
169
+ " vertical-align: middle;\n",
170
+ " }\n",
171
+ "\n",
172
+ " .dataframe tbody tr th {\n",
173
+ " vertical-align: top;\n",
174
+ " }\n",
175
+ "\n",
176
+ " .dataframe thead th {\n",
177
+ " text-align: right;\n",
178
+ " }\n",
179
+ "</style>\n",
180
+ "<table border=\"1\" class=\"dataframe\">\n",
181
+ " <thead>\n",
182
+ " <tr style=\"text-align: right;\">\n",
183
+ " <th></th>\n",
184
+ " <th>ISBN</th>\n",
185
+ " <th>Book-Title</th>\n",
186
+ " <th>Book-Author</th>\n",
187
+ " <th>Year-Of-Publication</th>\n",
188
+ " <th>Publisher</th>\n",
189
+ " </tr>\n",
190
+ " </thead>\n",
191
+ " <tbody>\n",
192
+ " <tr>\n",
193
+ " <th>0</th>\n",
194
+ " <td>0195153448</td>\n",
195
+ " <td>Classical Mythology</td>\n",
196
+ " <td>Mark P. O. Morford</td>\n",
197
+ " <td>2002</td>\n",
198
+ " <td>Oxford University Press</td>\n",
199
+ " </tr>\n",
200
+ " <tr>\n",
201
+ " <th>1</th>\n",
202
+ " <td>0002005018</td>\n",
203
+ " <td>Clara Callan</td>\n",
204
+ " <td>Richard Bruce Wright</td>\n",
205
+ " <td>2001</td>\n",
206
+ " <td>HarperFlamingo Canada</td>\n",
207
+ " </tr>\n",
208
+ " <tr>\n",
209
+ " <th>2</th>\n",
210
+ " <td>0060973129</td>\n",
211
+ " <td>Decision in Normandy</td>\n",
212
+ " <td>Carlo D'Este</td>\n",
213
+ " <td>1991</td>\n",
214
+ " <td>HarperPerennial</td>\n",
215
+ " </tr>\n",
216
+ " <tr>\n",
217
+ " <th>3</th>\n",
218
+ " <td>0374157065</td>\n",
219
+ " <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
220
+ " <td>Gina Bari Kolata</td>\n",
221
+ " <td>1999</td>\n",
222
+ " <td>Farrar Straus Giroux</td>\n",
223
+ " </tr>\n",
224
+ " <tr>\n",
225
+ " <th>4</th>\n",
226
+ " <td>0393045218</td>\n",
227
+ " <td>The Mummies of Urumchi</td>\n",
228
+ " <td>E. J. W. Barber</td>\n",
229
+ " <td>1999</td>\n",
230
+ " <td>W. W. Norton &amp;amp; Company</td>\n",
231
+ " </tr>\n",
232
+ " </tbody>\n",
233
+ "</table>\n",
234
+ "</div>"
235
+ ],
236
+ "text/plain": [
237
+ " ISBN Book-Title \\\n",
238
+ "0 0195153448 Classical Mythology \n",
239
+ "1 0002005018 Clara Callan \n",
240
+ "2 0060973129 Decision in Normandy \n",
241
+ "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n",
242
+ "4 0393045218 The Mummies of Urumchi \n",
243
+ "\n",
244
+ " Book-Author Year-Of-Publication Publisher \n",
245
+ "0 Mark P. O. Morford 2002 Oxford University Press \n",
246
+ "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n",
247
+ "2 Carlo D'Este 1991 HarperPerennial \n",
248
+ "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n",
249
+ "4 E. J. W. Barber 1999 W. W. Norton &amp; Company "
250
+ ]
251
+ },
252
+ "execution_count": 88,
253
+ "metadata": {},
254
+ "output_type": "execute_result"
255
+ }
256
+ ],
257
+ "source": [
258
+ "book_df.head()"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 89,
264
+ "metadata": {},
265
+ "outputs": [
266
+ {
267
+ "data": {
268
+ "text/html": [
269
+ "<div>\n",
270
+ "<style scoped>\n",
271
+ " .dataframe tbody tr th:only-of-type {\n",
272
+ " vertical-align: middle;\n",
273
+ " }\n",
274
+ "\n",
275
+ " .dataframe tbody tr th {\n",
276
+ " vertical-align: top;\n",
277
+ " }\n",
278
+ "\n",
279
+ " .dataframe thead th {\n",
280
+ " text-align: right;\n",
281
+ " }\n",
282
+ "</style>\n",
283
+ "<table border=\"1\" class=\"dataframe\">\n",
284
+ " <thead>\n",
285
+ " <tr style=\"text-align: right;\">\n",
286
+ " <th></th>\n",
287
+ " <th>User-ID</th>\n",
288
+ " <th>Location</th>\n",
289
+ " <th>Age</th>\n",
290
+ " </tr>\n",
291
+ " </thead>\n",
292
+ " <tbody>\n",
293
+ " <tr>\n",
294
+ " <th>0</th>\n",
295
+ " <td>1</td>\n",
296
+ " <td>nyc, new york, usa</td>\n",
297
+ " <td>NaN</td>\n",
298
+ " </tr>\n",
299
+ " <tr>\n",
300
+ " <th>1</th>\n",
301
+ " <td>2</td>\n",
302
+ " <td>stockton, california, usa</td>\n",
303
+ " <td>18.0</td>\n",
304
+ " </tr>\n",
305
+ " <tr>\n",
306
+ " <th>2</th>\n",
307
+ " <td>3</td>\n",
308
+ " <td>moscow, yukon territory, russia</td>\n",
309
+ " <td>NaN</td>\n",
310
+ " </tr>\n",
311
+ " <tr>\n",
312
+ " <th>3</th>\n",
313
+ " <td>4</td>\n",
314
+ " <td>porto, v.n.gaia, portugal</td>\n",
315
+ " <td>17.0</td>\n",
316
+ " </tr>\n",
317
+ " <tr>\n",
318
+ " <th>4</th>\n",
319
+ " <td>5</td>\n",
320
+ " <td>farnborough, hants, united kingdom</td>\n",
321
+ " <td>NaN</td>\n",
322
+ " </tr>\n",
323
+ " </tbody>\n",
324
+ "</table>\n",
325
+ "</div>"
326
+ ],
327
+ "text/plain": [
328
+ " User-ID Location Age\n",
329
+ "0 1 nyc, new york, usa NaN\n",
330
+ "1 2 stockton, california, usa 18.0\n",
331
+ "2 3 moscow, yukon territory, russia NaN\n",
332
+ "3 4 porto, v.n.gaia, portugal 17.0\n",
333
+ "4 5 farnborough, hants, united kingdom NaN"
334
+ ]
335
+ },
336
+ "execution_count": 89,
337
+ "metadata": {},
338
+ "output_type": "execute_result"
339
+ }
340
+ ],
341
+ "source": [
342
+ "users_df.head(5)"
343
+ ]
344
+ },
345
+ {
346
+ "cell_type": "markdown",
347
+ "metadata": {},
348
+ "source": [
349
+ "## 4.1 checking for null values"
350
+ ]
351
+ },
352
+ {
353
+ "cell_type": "code",
354
+ "execution_count": 90,
355
+ "metadata": {},
356
+ "outputs": [
357
+ {
358
+ "data": {
359
+ "text/plain": [
360
+ "User-ID 0\n",
361
+ "Location 0\n",
362
+ "Age 110762\n",
363
+ "dtype: int64"
364
+ ]
365
+ },
366
+ "execution_count": 90,
367
+ "metadata": {},
368
+ "output_type": "execute_result"
369
+ }
370
+ ],
371
+ "source": [
372
+ "users_df.isnull().sum()"
373
+ ]
374
+ },
375
+ {
376
+ "cell_type": "code",
377
+ "execution_count": 91,
378
+ "metadata": {},
379
+ "outputs": [
380
+ {
381
+ "data": {
382
+ "text/plain": [
383
+ "278858"
384
+ ]
385
+ },
386
+ "execution_count": 91,
387
+ "metadata": {},
388
+ "output_type": "execute_result"
389
+ }
390
+ ],
391
+ "source": [
392
+ "users_df[\"User-ID\"].nunique()"
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "markdown",
397
+ "metadata": {},
398
+ "source": [
399
+ "- total 278858 users."
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": 92,
405
+ "metadata": {},
406
+ "outputs": [
407
+ {
408
+ "data": {
409
+ "text/html": [
410
+ "<div>\n",
411
+ "<style scoped>\n",
412
+ " .dataframe tbody tr th:only-of-type {\n",
413
+ " vertical-align: middle;\n",
414
+ " }\n",
415
+ "\n",
416
+ " .dataframe tbody tr th {\n",
417
+ " vertical-align: top;\n",
418
+ " }\n",
419
+ "\n",
420
+ " .dataframe thead th {\n",
421
+ " text-align: right;\n",
422
+ " }\n",
423
+ "</style>\n",
424
+ "<table border=\"1\" class=\"dataframe\">\n",
425
+ " <thead>\n",
426
+ " <tr style=\"text-align: right;\">\n",
427
+ " <th></th>\n",
428
+ " <th>User-ID</th>\n",
429
+ " <th>ISBN</th>\n",
430
+ " <th>Book-Rating</th>\n",
431
+ " </tr>\n",
432
+ " </thead>\n",
433
+ " <tbody>\n",
434
+ " <tr>\n",
435
+ " <th>0</th>\n",
436
+ " <td>276725</td>\n",
437
+ " <td>034545104X</td>\n",
438
+ " <td>0</td>\n",
439
+ " </tr>\n",
440
+ " <tr>\n",
441
+ " <th>1</th>\n",
442
+ " <td>276726</td>\n",
443
+ " <td>0155061224</td>\n",
444
+ " <td>5</td>\n",
445
+ " </tr>\n",
446
+ " </tbody>\n",
447
+ "</table>\n",
448
+ "</div>"
449
+ ],
450
+ "text/plain": [
451
+ " User-ID ISBN Book-Rating\n",
452
+ "0 276725 034545104X 0\n",
453
+ "1 276726 0155061224 5"
454
+ ]
455
+ },
456
+ "execution_count": 92,
457
+ "metadata": {},
458
+ "output_type": "execute_result"
459
+ }
460
+ ],
461
+ "source": [
462
+ "rating_df.head(2)"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 96,
468
+ "metadata": {},
469
+ "outputs": [
470
+ {
471
+ "data": {
472
+ "text/plain": [
473
+ "User-ID\n",
474
+ "11676 13602\n",
475
+ "198711 7550\n",
476
+ "153662 6109\n",
477
+ "98391 5891\n",
478
+ "35859 5850\n",
479
+ " ... \n",
480
+ "274808 201\n",
481
+ "28634 201\n",
482
+ "59727 201\n",
483
+ "268622 201\n",
484
+ "188951 201\n",
485
+ "Name: count, Length: 899, dtype: int64"
486
+ ]
487
+ },
488
+ "execution_count": 96,
489
+ "metadata": {},
490
+ "output_type": "execute_result"
491
+ }
492
+ ],
493
+ "source": [
494
+ "review_count=rating_df[\"User-ID\"].value_counts()\n",
495
+ "msot_review_users=review_count[review_count>200]\n",
496
+ "msot_review_users"
497
+ ]
498
+ },
499
+ {
500
+ "cell_type": "markdown",
501
+ "metadata": {},
502
+ "source": [
503
+ "## 4.2 Finding more then 200 books riview users"
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "execution_count": 97,
509
+ "metadata": {},
510
+ "outputs": [
511
+ {
512
+ "data": {
513
+ "text/html": [
514
+ "<div>\n",
515
+ "<style scoped>\n",
516
+ " .dataframe tbody tr th:only-of-type {\n",
517
+ " vertical-align: middle;\n",
518
+ " }\n",
519
+ "\n",
520
+ " .dataframe tbody tr th {\n",
521
+ " vertical-align: top;\n",
522
+ " }\n",
523
+ "\n",
524
+ " .dataframe thead th {\n",
525
+ " text-align: right;\n",
526
+ " }\n",
527
+ "</style>\n",
528
+ "<table border=\"1\" class=\"dataframe\">\n",
529
+ " <thead>\n",
530
+ " <tr style=\"text-align: right;\">\n",
531
+ " <th></th>\n",
532
+ " <th>User-ID</th>\n",
533
+ " <th>ISBN</th>\n",
534
+ " <th>Book-Rating</th>\n",
535
+ " </tr>\n",
536
+ " </thead>\n",
537
+ " <tbody>\n",
538
+ " <tr>\n",
539
+ " <th>1456</th>\n",
540
+ " <td>277427</td>\n",
541
+ " <td>002542730X</td>\n",
542
+ " <td>10</td>\n",
543
+ " </tr>\n",
544
+ " <tr>\n",
545
+ " <th>1457</th>\n",
546
+ " <td>277427</td>\n",
547
+ " <td>0026217457</td>\n",
548
+ " <td>0</td>\n",
549
+ " </tr>\n",
550
+ " <tr>\n",
551
+ " <th>1458</th>\n",
552
+ " <td>277427</td>\n",
553
+ " <td>003008685X</td>\n",
554
+ " <td>8</td>\n",
555
+ " </tr>\n",
556
+ " <tr>\n",
557
+ " <th>1459</th>\n",
558
+ " <td>277427</td>\n",
559
+ " <td>0030615321</td>\n",
560
+ " <td>0</td>\n",
561
+ " </tr>\n",
562
+ " <tr>\n",
563
+ " <th>1460</th>\n",
564
+ " <td>277427</td>\n",
565
+ " <td>0060002050</td>\n",
566
+ " <td>0</td>\n",
567
+ " </tr>\n",
568
+ " </tbody>\n",
569
+ "</table>\n",
570
+ "</div>"
571
+ ],
572
+ "text/plain": [
573
+ " User-ID ISBN Book-Rating\n",
574
+ "1456 277427 002542730X 10\n",
575
+ "1457 277427 0026217457 0\n",
576
+ "1458 277427 003008685X 8\n",
577
+ "1459 277427 0030615321 0\n",
578
+ "1460 277427 0060002050 0"
579
+ ]
580
+ },
581
+ "execution_count": 97,
582
+ "metadata": {},
583
+ "output_type": "execute_result"
584
+ }
585
+ ],
586
+ "source": [
587
+ "# this are the info who are rated more then 200 books \n",
588
+ "ratings=rating_df[rating_df[\"User-ID\"].isin(msot_review_users.index)]\n",
589
+ "ratings.head()"
590
+ ]
591
+ },
592
+ {
593
+ "cell_type": "code",
594
+ "execution_count": 98,
595
+ "metadata": {},
596
+ "outputs": [
597
+ {
598
+ "data": {
599
+ "text/plain": [
600
+ "(526356, 3)"
601
+ ]
602
+ },
603
+ "execution_count": 98,
604
+ "metadata": {},
605
+ "output_type": "execute_result"
606
+ }
607
+ ],
608
+ "source": [
609
+ "ratings.shape"
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "code",
614
+ "execution_count": 99,
615
+ "metadata": {},
616
+ "outputs": [
617
+ {
618
+ "data": {
619
+ "text/html": [
620
+ "<div>\n",
621
+ "<style scoped>\n",
622
+ " .dataframe tbody tr th:only-of-type {\n",
623
+ " vertical-align: middle;\n",
624
+ " }\n",
625
+ "\n",
626
+ " .dataframe tbody tr th {\n",
627
+ " vertical-align: top;\n",
628
+ " }\n",
629
+ "\n",
630
+ " .dataframe thead th {\n",
631
+ " text-align: right;\n",
632
+ " }\n",
633
+ "</style>\n",
634
+ "<table border=\"1\" class=\"dataframe\">\n",
635
+ " <thead>\n",
636
+ " <tr style=\"text-align: right;\">\n",
637
+ " <th></th>\n",
638
+ " <th>ISBN</th>\n",
639
+ " <th>Book-Title</th>\n",
640
+ " <th>Book-Author</th>\n",
641
+ " <th>Year-Of-Publication</th>\n",
642
+ " <th>Publisher</th>\n",
643
+ " </tr>\n",
644
+ " </thead>\n",
645
+ " <tbody>\n",
646
+ " <tr>\n",
647
+ " <th>0</th>\n",
648
+ " <td>0195153448</td>\n",
649
+ " <td>Classical Mythology</td>\n",
650
+ " <td>Mark P. O. Morford</td>\n",
651
+ " <td>2002</td>\n",
652
+ " <td>Oxford University Press</td>\n",
653
+ " </tr>\n",
654
+ " <tr>\n",
655
+ " <th>1</th>\n",
656
+ " <td>0002005018</td>\n",
657
+ " <td>Clara Callan</td>\n",
658
+ " <td>Richard Bruce Wright</td>\n",
659
+ " <td>2001</td>\n",
660
+ " <td>HarperFlamingo Canada</td>\n",
661
+ " </tr>\n",
662
+ " <tr>\n",
663
+ " <th>2</th>\n",
664
+ " <td>0060973129</td>\n",
665
+ " <td>Decision in Normandy</td>\n",
666
+ " <td>Carlo D'Este</td>\n",
667
+ " <td>1991</td>\n",
668
+ " <td>HarperPerennial</td>\n",
669
+ " </tr>\n",
670
+ " <tr>\n",
671
+ " <th>3</th>\n",
672
+ " <td>0374157065</td>\n",
673
+ " <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
674
+ " <td>Gina Bari Kolata</td>\n",
675
+ " <td>1999</td>\n",
676
+ " <td>Farrar Straus Giroux</td>\n",
677
+ " </tr>\n",
678
+ " <tr>\n",
679
+ " <th>4</th>\n",
680
+ " <td>0393045218</td>\n",
681
+ " <td>The Mummies of Urumchi</td>\n",
682
+ " <td>E. J. W. Barber</td>\n",
683
+ " <td>1999</td>\n",
684
+ " <td>W. W. Norton &amp;amp; Company</td>\n",
685
+ " </tr>\n",
686
+ " </tbody>\n",
687
+ "</table>\n",
688
+ "</div>"
689
+ ],
690
+ "text/plain": [
691
+ " ISBN Book-Title \\\n",
692
+ "0 0195153448 Classical Mythology \n",
693
+ "1 0002005018 Clara Callan \n",
694
+ "2 0060973129 Decision in Normandy \n",
695
+ "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n",
696
+ "4 0393045218 The Mummies of Urumchi \n",
697
+ "\n",
698
+ " Book-Author Year-Of-Publication Publisher \n",
699
+ "0 Mark P. O. Morford 2002 Oxford University Press \n",
700
+ "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n",
701
+ "2 Carlo D'Este 1991 HarperPerennial \n",
702
+ "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n",
703
+ "4 E. J. W. Barber 1999 W. W. Norton &amp; Company "
704
+ ]
705
+ },
706
+ "execution_count": 99,
707
+ "metadata": {},
708
+ "output_type": "execute_result"
709
+ }
710
+ ],
711
+ "source": [
712
+ "book_df.head()"
713
+ ]
714
+ },
715
+ {
716
+ "cell_type": "code",
717
+ "execution_count": 100,
718
+ "metadata": {},
719
+ "outputs": [
720
+ {
721
+ "data": {
722
+ "text/html": [
723
+ "<div>\n",
724
+ "<style scoped>\n",
725
+ " .dataframe tbody tr th:only-of-type {\n",
726
+ " vertical-align: middle;\n",
727
+ " }\n",
728
+ "\n",
729
+ " .dataframe tbody tr th {\n",
730
+ " vertical-align: top;\n",
731
+ " }\n",
732
+ "\n",
733
+ " .dataframe thead th {\n",
734
+ " text-align: right;\n",
735
+ " }\n",
736
+ "</style>\n",
737
+ "<table border=\"1\" class=\"dataframe\">\n",
738
+ " <thead>\n",
739
+ " <tr style=\"text-align: right;\">\n",
740
+ " <th></th>\n",
741
+ " <th>User-ID</th>\n",
742
+ " <th>ISBN</th>\n",
743
+ " <th>Book-Rating</th>\n",
744
+ " <th>Book-Title</th>\n",
745
+ " <th>Book-Author</th>\n",
746
+ " <th>Year-Of-Publication</th>\n",
747
+ " <th>Publisher</th>\n",
748
+ " </tr>\n",
749
+ " </thead>\n",
750
+ " <tbody>\n",
751
+ " <tr>\n",
752
+ " <th>0</th>\n",
753
+ " <td>277427</td>\n",
754
+ " <td>002542730X</td>\n",
755
+ " <td>10</td>\n",
756
+ " <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
757
+ " <td>James Finn Garner</td>\n",
758
+ " <td>1994</td>\n",
759
+ " <td>John Wiley &amp;amp; Sons Inc</td>\n",
760
+ " </tr>\n",
761
+ " <tr>\n",
762
+ " <th>1</th>\n",
763
+ " <td>277427</td>\n",
764
+ " <td>0026217457</td>\n",
765
+ " <td>0</td>\n",
766
+ " <td>Vegetarian Times Complete Cookbook</td>\n",
767
+ " <td>Lucy Moll</td>\n",
768
+ " <td>1995</td>\n",
769
+ " <td>John Wiley &amp;amp; Sons</td>\n",
770
+ " </tr>\n",
771
+ " </tbody>\n",
772
+ "</table>\n",
773
+ "</div>"
774
+ ],
775
+ "text/plain": [
776
+ " User-ID ISBN Book-Rating \\\n",
777
+ "0 277427 002542730X 10 \n",
778
+ "1 277427 0026217457 0 \n",
779
+ "\n",
780
+ " Book-Title Book-Author \\\n",
781
+ "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n",
782
+ "1 Vegetarian Times Complete Cookbook Lucy Moll \n",
783
+ "\n",
784
+ " Year-Of-Publication Publisher \n",
785
+ "0 1994 John Wiley &amp; Sons Inc \n",
786
+ "1 1995 John Wiley &amp; Sons "
787
+ ]
788
+ },
789
+ "execution_count": 100,
790
+ "metadata": {},
791
+ "output_type": "execute_result"
792
+ }
793
+ ],
794
+ "source": [
795
+ "rating_with_books = ratings.merge(book_df, on=\"ISBN\")\n",
796
+ "rating_with_books.head(2)"
797
+ ]
798
+ },
799
+ {
800
+ "cell_type": "markdown",
801
+ "metadata": {},
802
+ "source": [
803
+ "## 4.3 Finding of cout for each book"
804
+ ]
805
+ },
806
+ {
807
+ "cell_type": "code",
808
+ "execution_count": null,
809
+ "metadata": {},
810
+ "outputs": [
811
+ {
812
+ "data": {
813
+ "text/html": [
814
+ "<div>\n",
815
+ "<style scoped>\n",
816
+ " .dataframe tbody tr th:only-of-type {\n",
817
+ " vertical-align: middle;\n",
818
+ " }\n",
819
+ "\n",
820
+ " .dataframe tbody tr th {\n",
821
+ " vertical-align: top;\n",
822
+ " }\n",
823
+ "\n",
824
+ " .dataframe thead th {\n",
825
+ " text-align: right;\n",
826
+ " }\n",
827
+ "</style>\n",
828
+ "<table border=\"1\" class=\"dataframe\">\n",
829
+ " <thead>\n",
830
+ " <tr style=\"text-align: right;\">\n",
831
+ " <th></th>\n",
832
+ " <th>Book-Title</th>\n",
833
+ " <th>num_of_rating</th>\n",
834
+ " </tr>\n",
835
+ " </thead>\n",
836
+ " <tbody>\n",
837
+ " <tr>\n",
838
+ " <th>0</th>\n",
839
+ " <td>A Light in the Storm: The Civil War Diary of ...</td>\n",
840
+ " <td>2</td>\n",
841
+ " </tr>\n",
842
+ " <tr>\n",
843
+ " <th>1</th>\n",
844
+ " <td>Always Have Popsicles</td>\n",
845
+ " <td>1</td>\n",
846
+ " </tr>\n",
847
+ " <tr>\n",
848
+ " <th>2</th>\n",
849
+ " <td>Apple Magic (The Collector's series)</td>\n",
850
+ " <td>1</td>\n",
851
+ " </tr>\n",
852
+ " <tr>\n",
853
+ " <th>3</th>\n",
854
+ " <td>Beyond IBM: Leadership Marketing and Finance ...</td>\n",
855
+ " <td>1</td>\n",
856
+ " </tr>\n",
857
+ " <tr>\n",
858
+ " <th>4</th>\n",
859
+ " <td>Clifford Visita El Hospital (Clifford El Gran...</td>\n",
860
+ " <td>1</td>\n",
861
+ " </tr>\n",
862
+ " </tbody>\n",
863
+ "</table>\n",
864
+ "</div>"
865
+ ],
866
+ "text/plain": [
867
+ " Book-Title num_of_rating\n",
868
+ "0 A Light in the Storm: The Civil War Diary of ... 2\n",
869
+ "1 Always Have Popsicles 1\n",
870
+ "2 Apple Magic (The Collector's series) 1\n",
871
+ "3 Beyond IBM: Leadership Marketing and Finance ... 1\n",
872
+ "4 Clifford Visita El Hospital (Clifford El Gran... 1"
873
+ ]
874
+ },
875
+ "execution_count": 102,
876
+ "metadata": {},
877
+ "output_type": "execute_result"
878
+ }
879
+ ],
880
+ "source": [
881
+ "num_rating = rating_with_books.groupby(\"Book-Title\")[\"Book-Rating\"].count().reset_index()\n",
882
+ "num_rating.rename(columns={\"Book-Rating\":\"num_of_rating\"},inplace=True)\n",
883
+ "num_rating.head(5)"
884
+ ]
885
+ },
886
+ {
887
+ "cell_type": "markdown",
888
+ "metadata": {},
889
+ "source": [
890
+ "## 4.4 Merge rating with book and number of ratings"
891
+ ]
892
+ },
893
+ {
894
+ "cell_type": "code",
895
+ "execution_count": 103,
896
+ "metadata": {},
897
+ "outputs": [
898
+ {
899
+ "data": {
900
+ "text/html": [
901
+ "<div>\n",
902
+ "<style scoped>\n",
903
+ " .dataframe tbody tr th:only-of-type {\n",
904
+ " vertical-align: middle;\n",
905
+ " }\n",
906
+ "\n",
907
+ " .dataframe tbody tr th {\n",
908
+ " vertical-align: top;\n",
909
+ " }\n",
910
+ "\n",
911
+ " .dataframe thead th {\n",
912
+ " text-align: right;\n",
913
+ " }\n",
914
+ "</style>\n",
915
+ "<table border=\"1\" class=\"dataframe\">\n",
916
+ " <thead>\n",
917
+ " <tr style=\"text-align: right;\">\n",
918
+ " <th></th>\n",
919
+ " <th>User-ID</th>\n",
920
+ " <th>ISBN</th>\n",
921
+ " <th>Book-Rating</th>\n",
922
+ " <th>Book-Title</th>\n",
923
+ " <th>Book-Author</th>\n",
924
+ " <th>Year-Of-Publication</th>\n",
925
+ " <th>Publisher</th>\n",
926
+ " <th>num_of_rating</th>\n",
927
+ " </tr>\n",
928
+ " </thead>\n",
929
+ " <tbody>\n",
930
+ " <tr>\n",
931
+ " <th>0</th>\n",
932
+ " <td>277427</td>\n",
933
+ " <td>002542730X</td>\n",
934
+ " <td>10</td>\n",
935
+ " <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
936
+ " <td>James Finn Garner</td>\n",
937
+ " <td>1994</td>\n",
938
+ " <td>John Wiley &amp;amp; Sons Inc</td>\n",
939
+ " <td>82</td>\n",
940
+ " </tr>\n",
941
+ " <tr>\n",
942
+ " <th>1</th>\n",
943
+ " <td>277427</td>\n",
944
+ " <td>0026217457</td>\n",
945
+ " <td>0</td>\n",
946
+ " <td>Vegetarian Times Complete Cookbook</td>\n",
947
+ " <td>Lucy Moll</td>\n",
948
+ " <td>1995</td>\n",
949
+ " <td>John Wiley &amp;amp; Sons</td>\n",
950
+ " <td>7</td>\n",
951
+ " </tr>\n",
952
+ " <tr>\n",
953
+ " <th>2</th>\n",
954
+ " <td>277427</td>\n",
955
+ " <td>003008685X</td>\n",
956
+ " <td>8</td>\n",
957
+ " <td>Pioneers</td>\n",
958
+ " <td>James Fenimore Cooper</td>\n",
959
+ " <td>1974</td>\n",
960
+ " <td>Thomson Learning</td>\n",
961
+ " <td>1</td>\n",
962
+ " </tr>\n",
963
+ " <tr>\n",
964
+ " <th>3</th>\n",
965
+ " <td>277427</td>\n",
966
+ " <td>0030615321</td>\n",
967
+ " <td>0</td>\n",
968
+ " <td>Ask for May, Settle for June (A Doonesbury book)</td>\n",
969
+ " <td>G. B. Trudeau</td>\n",
970
+ " <td>1982</td>\n",
971
+ " <td>Henry Holt &amp;amp; Co</td>\n",
972
+ " <td>1</td>\n",
973
+ " </tr>\n",
974
+ " <tr>\n",
975
+ " <th>4</th>\n",
976
+ " <td>277427</td>\n",
977
+ " <td>0060002050</td>\n",
978
+ " <td>0</td>\n",
979
+ " <td>On a Wicked Dawn (Cynster Novels)</td>\n",
980
+ " <td>Stephanie Laurens</td>\n",
981
+ " <td>2002</td>\n",
982
+ " <td>Avon Books</td>\n",
983
+ " <td>13</td>\n",
984
+ " </tr>\n",
985
+ " </tbody>\n",
986
+ "</table>\n",
987
+ "</div>"
988
+ ],
989
+ "text/plain": [
990
+ " User-ID ISBN Book-Rating \\\n",
991
+ "0 277427 002542730X 10 \n",
992
+ "1 277427 0026217457 0 \n",
993
+ "2 277427 003008685X 8 \n",
994
+ "3 277427 0030615321 0 \n",
995
+ "4 277427 0060002050 0 \n",
996
+ "\n",
997
+ " Book-Title Book-Author \\\n",
998
+ "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n",
999
+ "1 Vegetarian Times Complete Cookbook Lucy Moll \n",
1000
+ "2 Pioneers James Fenimore Cooper \n",
1001
+ "3 Ask for May, Settle for June (A Doonesbury book) G. B. Trudeau \n",
1002
+ "4 On a Wicked Dawn (Cynster Novels) Stephanie Laurens \n",
1003
+ "\n",
1004
+ " Year-Of-Publication Publisher num_of_rating \n",
1005
+ "0 1994 John Wiley &amp; Sons Inc 82 \n",
1006
+ "1 1995 John Wiley &amp; Sons 7 \n",
1007
+ "2 1974 Thomson Learning 1 \n",
1008
+ "3 1982 Henry Holt &amp; Co 1 \n",
1009
+ "4 2002 Avon Books 13 "
1010
+ ]
1011
+ },
1012
+ "execution_count": 103,
1013
+ "metadata": {},
1014
+ "output_type": "execute_result"
1015
+ }
1016
+ ],
1017
+ "source": [
1018
+ "final_rating=rating_with_books.merge(num_rating, on=\"Book-Title\")\n",
1019
+ "final_rating.head()"
1020
+ ]
1021
+ },
1022
+ {
1023
+ "cell_type": "code",
1024
+ "execution_count": 104,
1025
+ "metadata": {},
1026
+ "outputs": [
1027
+ {
1028
+ "data": {
1029
+ "text/plain": [
1030
+ "(487671, 8)"
1031
+ ]
1032
+ },
1033
+ "execution_count": 104,
1034
+ "metadata": {},
1035
+ "output_type": "execute_result"
1036
+ }
1037
+ ],
1038
+ "source": [
1039
+ "final_rating.shape"
1040
+ ]
1041
+ },
1042
+ {
1043
+ "cell_type": "code",
1044
+ "execution_count": 105,
1045
+ "metadata": {},
1046
+ "outputs": [
1047
+ {
1048
+ "data": {
1049
+ "text/html": [
1050
+ "<div>\n",
1051
+ "<style scoped>\n",
1052
+ " .dataframe tbody tr th:only-of-type {\n",
1053
+ " vertical-align: middle;\n",
1054
+ " }\n",
1055
+ "\n",
1056
+ " .dataframe tbody tr th {\n",
1057
+ " vertical-align: top;\n",
1058
+ " }\n",
1059
+ "\n",
1060
+ " .dataframe thead th {\n",
1061
+ " text-align: right;\n",
1062
+ " }\n",
1063
+ "</style>\n",
1064
+ "<table border=\"1\" class=\"dataframe\">\n",
1065
+ " <thead>\n",
1066
+ " <tr style=\"text-align: right;\">\n",
1067
+ " <th></th>\n",
1068
+ " <th>User-ID</th>\n",
1069
+ " <th>ISBN</th>\n",
1070
+ " <th>Book-Rating</th>\n",
1071
+ " <th>Book-Title</th>\n",
1072
+ " <th>Book-Author</th>\n",
1073
+ " <th>Year-Of-Publication</th>\n",
1074
+ " <th>Publisher</th>\n",
1075
+ " <th>num_of_rating</th>\n",
1076
+ " </tr>\n",
1077
+ " </thead>\n",
1078
+ " <tbody>\n",
1079
+ " <tr>\n",
1080
+ " <th>0</th>\n",
1081
+ " <td>277427</td>\n",
1082
+ " <td>002542730X</td>\n",
1083
+ " <td>10</td>\n",
1084
+ " <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1085
+ " <td>James Finn Garner</td>\n",
1086
+ " <td>1994</td>\n",
1087
+ " <td>John Wiley &amp;amp; Sons Inc</td>\n",
1088
+ " <td>82</td>\n",
1089
+ " </tr>\n",
1090
+ " <tr>\n",
1091
+ " <th>13</th>\n",
1092
+ " <td>277427</td>\n",
1093
+ " <td>0060930535</td>\n",
1094
+ " <td>0</td>\n",
1095
+ " <td>The Poisonwood Bible: A Novel</td>\n",
1096
+ " <td>Barbara Kingsolver</td>\n",
1097
+ " <td>1999</td>\n",
1098
+ " <td>Perennial</td>\n",
1099
+ " <td>133</td>\n",
1100
+ " </tr>\n",
1101
+ " <tr>\n",
1102
+ " <th>15</th>\n",
1103
+ " <td>277427</td>\n",
1104
+ " <td>0060934417</td>\n",
1105
+ " <td>0</td>\n",
1106
+ " <td>Bel Canto: A Novel</td>\n",
1107
+ " <td>Ann Patchett</td>\n",
1108
+ " <td>2002</td>\n",
1109
+ " <td>Perennial</td>\n",
1110
+ " <td>108</td>\n",
1111
+ " </tr>\n",
1112
+ " </tbody>\n",
1113
+ "</table>\n",
1114
+ "</div>"
1115
+ ],
1116
+ "text/plain": [
1117
+ " User-ID ISBN Book-Rating \\\n",
1118
+ "0 277427 002542730X 10 \n",
1119
+ "13 277427 0060930535 0 \n",
1120
+ "15 277427 0060934417 0 \n",
1121
+ "\n",
1122
+ " Book-Title Book-Author \\\n",
1123
+ "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n",
1124
+ "13 The Poisonwood Bible: A Novel Barbara Kingsolver \n",
1125
+ "15 Bel Canto: A Novel Ann Patchett \n",
1126
+ "\n",
1127
+ " Year-Of-Publication Publisher num_of_rating \n",
1128
+ "0 1994 John Wiley &amp; Sons Inc 82 \n",
1129
+ "13 1999 Perennial 133 \n",
1130
+ "15 2002 Perennial 108 "
1131
+ ]
1132
+ },
1133
+ "execution_count": 105,
1134
+ "metadata": {},
1135
+ "output_type": "execute_result"
1136
+ }
1137
+ ],
1138
+ "source": [
1139
+ "final_rating = final_rating[final_rating[\"num_of_rating\"]>=50]\n",
1140
+ "final_rating.head(3)"
1141
+ ]
1142
+ },
1143
+ {
1144
+ "cell_type": "code",
1145
+ "execution_count": 106,
1146
+ "metadata": {},
1147
+ "outputs": [
1148
+ {
1149
+ "data": {
1150
+ "text/plain": [
1151
+ "(61853, 8)"
1152
+ ]
1153
+ },
1154
+ "execution_count": 106,
1155
+ "metadata": {},
1156
+ "output_type": "execute_result"
1157
+ }
1158
+ ],
1159
+ "source": [
1160
+ "final_rating.shape"
1161
+ ]
1162
+ },
1163
+ {
1164
+ "cell_type": "code",
1165
+ "execution_count": 108,
1166
+ "metadata": {},
1167
+ "outputs": [],
1168
+ "source": [
1169
+ "final_rating.drop_duplicates([\"User-ID\",\"Book-Title\"],inplace=True)"
1170
+ ]
1171
+ },
1172
+ {
1173
+ "cell_type": "code",
1174
+ "execution_count": null,
1175
+ "metadata": {},
1176
+ "outputs": [
1177
+ {
1178
+ "data": {
1179
+ "text/plain": [
1180
+ "(59850, 8)"
1181
+ ]
1182
+ },
1183
+ "execution_count": 109,
1184
+ "metadata": {},
1185
+ "output_type": "execute_result"
1186
+ }
1187
+ ],
1188
+ "source": [
1189
+ "final_rating.shape"
1190
+ ]
1191
+ },
1192
+ {
1193
+ "cell_type": "markdown",
1194
+ "metadata": {},
1195
+ "source": [
1196
+ "## 4.5 Creating pivot table"
1197
+ ]
1198
+ },
1199
+ {
1200
+ "cell_type": "code",
1201
+ "execution_count": 110,
1202
+ "metadata": {},
1203
+ "outputs": [
1204
+ {
1205
+ "data": {
1206
+ "text/html": [
1207
+ "<div>\n",
1208
+ "<style scoped>\n",
1209
+ " .dataframe tbody tr th:only-of-type {\n",
1210
+ " vertical-align: middle;\n",
1211
+ " }\n",
1212
+ "\n",
1213
+ " .dataframe tbody tr th {\n",
1214
+ " vertical-align: top;\n",
1215
+ " }\n",
1216
+ "\n",
1217
+ " .dataframe thead th {\n",
1218
+ " text-align: right;\n",
1219
+ " }\n",
1220
+ "</style>\n",
1221
+ "<table border=\"1\" class=\"dataframe\">\n",
1222
+ " <thead>\n",
1223
+ " <tr style=\"text-align: right;\">\n",
1224
+ " <th>User-ID</th>\n",
1225
+ " <th>254</th>\n",
1226
+ " <th>2276</th>\n",
1227
+ " <th>2766</th>\n",
1228
+ " <th>2977</th>\n",
1229
+ " <th>3363</th>\n",
1230
+ " <th>3757</th>\n",
1231
+ " <th>4017</th>\n",
1232
+ " <th>4385</th>\n",
1233
+ " <th>6242</th>\n",
1234
+ " <th>6251</th>\n",
1235
+ " <th>...</th>\n",
1236
+ " <th>274004</th>\n",
1237
+ " <th>274061</th>\n",
1238
+ " <th>274301</th>\n",
1239
+ " <th>274308</th>\n",
1240
+ " <th>274808</th>\n",
1241
+ " <th>275970</th>\n",
1242
+ " <th>277427</th>\n",
1243
+ " <th>277478</th>\n",
1244
+ " <th>277639</th>\n",
1245
+ " <th>278418</th>\n",
1246
+ " </tr>\n",
1247
+ " <tr>\n",
1248
+ " <th>Book-Title</th>\n",
1249
+ " <th></th>\n",
1250
+ " <th></th>\n",
1251
+ " <th></th>\n",
1252
+ " <th></th>\n",
1253
+ " <th></th>\n",
1254
+ " <th></th>\n",
1255
+ " <th></th>\n",
1256
+ " <th></th>\n",
1257
+ " <th></th>\n",
1258
+ " <th></th>\n",
1259
+ " <th></th>\n",
1260
+ " <th></th>\n",
1261
+ " <th></th>\n",
1262
+ " <th></th>\n",
1263
+ " <th></th>\n",
1264
+ " <th></th>\n",
1265
+ " <th></th>\n",
1266
+ " <th></th>\n",
1267
+ " <th></th>\n",
1268
+ " <th></th>\n",
1269
+ " <th></th>\n",
1270
+ " </tr>\n",
1271
+ " </thead>\n",
1272
+ " <tbody>\n",
1273
+ " <tr>\n",
1274
+ " <th>1984</th>\n",
1275
+ " <td>9.0</td>\n",
1276
+ " <td>NaN</td>\n",
1277
+ " <td>NaN</td>\n",
1278
+ " <td>NaN</td>\n",
1279
+ " <td>NaN</td>\n",
1280
+ " <td>NaN</td>\n",
1281
+ " <td>NaN</td>\n",
1282
+ " <td>NaN</td>\n",
1283
+ " <td>NaN</td>\n",
1284
+ " <td>NaN</td>\n",
1285
+ " <td>...</td>\n",
1286
+ " <td>NaN</td>\n",
1287
+ " <td>NaN</td>\n",
1288
+ " <td>NaN</td>\n",
1289
+ " <td>NaN</td>\n",
1290
+ " <td>NaN</td>\n",
1291
+ " <td>0.0</td>\n",
1292
+ " <td>NaN</td>\n",
1293
+ " <td>NaN</td>\n",
1294
+ " <td>NaN</td>\n",
1295
+ " <td>NaN</td>\n",
1296
+ " </tr>\n",
1297
+ " <tr>\n",
1298
+ " <th>1st to Die: A Novel</th>\n",
1299
+ " <td>NaN</td>\n",
1300
+ " <td>NaN</td>\n",
1301
+ " <td>NaN</td>\n",
1302
+ " <td>NaN</td>\n",
1303
+ " <td>NaN</td>\n",
1304
+ " <td>NaN</td>\n",
1305
+ " <td>NaN</td>\n",
1306
+ " <td>NaN</td>\n",
1307
+ " <td>NaN</td>\n",
1308
+ " <td>NaN</td>\n",
1309
+ " <td>...</td>\n",
1310
+ " <td>NaN</td>\n",
1311
+ " <td>NaN</td>\n",
1312
+ " <td>NaN</td>\n",
1313
+ " <td>NaN</td>\n",
1314
+ " <td>NaN</td>\n",
1315
+ " <td>NaN</td>\n",
1316
+ " <td>NaN</td>\n",
1317
+ " <td>NaN</td>\n",
1318
+ " <td>NaN</td>\n",
1319
+ " <td>NaN</td>\n",
1320
+ " </tr>\n",
1321
+ " <tr>\n",
1322
+ " <th>2nd Chance</th>\n",
1323
+ " <td>NaN</td>\n",
1324
+ " <td>10.0</td>\n",
1325
+ " <td>NaN</td>\n",
1326
+ " <td>NaN</td>\n",
1327
+ " <td>NaN</td>\n",
1328
+ " <td>NaN</td>\n",
1329
+ " <td>NaN</td>\n",
1330
+ " <td>NaN</td>\n",
1331
+ " <td>NaN</td>\n",
1332
+ " <td>NaN</td>\n",
1333
+ " <td>...</td>\n",
1334
+ " <td>NaN</td>\n",
1335
+ " <td>NaN</td>\n",
1336
+ " <td>NaN</td>\n",
1337
+ " <td>0.0</td>\n",
1338
+ " <td>NaN</td>\n",
1339
+ " <td>NaN</td>\n",
1340
+ " <td>NaN</td>\n",
1341
+ " <td>NaN</td>\n",
1342
+ " <td>0.0</td>\n",
1343
+ " <td>NaN</td>\n",
1344
+ " </tr>\n",
1345
+ " <tr>\n",
1346
+ " <th>4 Blondes</th>\n",
1347
+ " <td>NaN</td>\n",
1348
+ " <td>NaN</td>\n",
1349
+ " <td>NaN</td>\n",
1350
+ " <td>NaN</td>\n",
1351
+ " <td>NaN</td>\n",
1352
+ " <td>NaN</td>\n",
1353
+ " <td>NaN</td>\n",
1354
+ " <td>NaN</td>\n",
1355
+ " <td>NaN</td>\n",
1356
+ " <td>0.0</td>\n",
1357
+ " <td>...</td>\n",
1358
+ " <td>NaN</td>\n",
1359
+ " <td>NaN</td>\n",
1360
+ " <td>NaN</td>\n",
1361
+ " <td>NaN</td>\n",
1362
+ " <td>NaN</td>\n",
1363
+ " <td>NaN</td>\n",
1364
+ " <td>NaN</td>\n",
1365
+ " <td>NaN</td>\n",
1366
+ " <td>NaN</td>\n",
1367
+ " <td>NaN</td>\n",
1368
+ " </tr>\n",
1369
+ " <tr>\n",
1370
+ " <th>84 Charing Cross Road</th>\n",
1371
+ " <td>NaN</td>\n",
1372
+ " <td>NaN</td>\n",
1373
+ " <td>NaN</td>\n",
1374
+ " <td>NaN</td>\n",
1375
+ " <td>NaN</td>\n",
1376
+ " <td>NaN</td>\n",
1377
+ " <td>NaN</td>\n",
1378
+ " <td>NaN</td>\n",
1379
+ " <td>NaN</td>\n",
1380
+ " <td>NaN</td>\n",
1381
+ " <td>...</td>\n",
1382
+ " <td>NaN</td>\n",
1383
+ " <td>NaN</td>\n",
1384
+ " <td>NaN</td>\n",
1385
+ " <td>NaN</td>\n",
1386
+ " <td>NaN</td>\n",
1387
+ " <td>10.0</td>\n",
1388
+ " <td>NaN</td>\n",
1389
+ " <td>NaN</td>\n",
1390
+ " <td>NaN</td>\n",
1391
+ " <td>NaN</td>\n",
1392
+ " </tr>\n",
1393
+ " <tr>\n",
1394
+ " <th>...</th>\n",
1395
+ " <td>...</td>\n",
1396
+ " <td>...</td>\n",
1397
+ " <td>...</td>\n",
1398
+ " <td>...</td>\n",
1399
+ " <td>...</td>\n",
1400
+ " <td>...</td>\n",
1401
+ " <td>...</td>\n",
1402
+ " <td>...</td>\n",
1403
+ " <td>...</td>\n",
1404
+ " <td>...</td>\n",
1405
+ " <td>...</td>\n",
1406
+ " <td>...</td>\n",
1407
+ " <td>...</td>\n",
1408
+ " <td>...</td>\n",
1409
+ " <td>...</td>\n",
1410
+ " <td>...</td>\n",
1411
+ " <td>...</td>\n",
1412
+ " <td>...</td>\n",
1413
+ " <td>...</td>\n",
1414
+ " <td>...</td>\n",
1415
+ " <td>...</td>\n",
1416
+ " </tr>\n",
1417
+ " <tr>\n",
1418
+ " <th>Year of Wonders</th>\n",
1419
+ " <td>NaN</td>\n",
1420
+ " <td>NaN</td>\n",
1421
+ " <td>NaN</td>\n",
1422
+ " <td>7.0</td>\n",
1423
+ " <td>NaN</td>\n",
1424
+ " <td>NaN</td>\n",
1425
+ " <td>NaN</td>\n",
1426
+ " <td>NaN</td>\n",
1427
+ " <td>7.0</td>\n",
1428
+ " <td>NaN</td>\n",
1429
+ " <td>...</td>\n",
1430
+ " <td>NaN</td>\n",
1431
+ " <td>NaN</td>\n",
1432
+ " <td>NaN</td>\n",
1433
+ " <td>NaN</td>\n",
1434
+ " <td>NaN</td>\n",
1435
+ " <td>0.0</td>\n",
1436
+ " <td>NaN</td>\n",
1437
+ " <td>NaN</td>\n",
1438
+ " <td>NaN</td>\n",
1439
+ " <td>NaN</td>\n",
1440
+ " </tr>\n",
1441
+ " <tr>\n",
1442
+ " <th>You Belong To Me</th>\n",
1443
+ " <td>NaN</td>\n",
1444
+ " <td>NaN</td>\n",
1445
+ " <td>NaN</td>\n",
1446
+ " <td>NaN</td>\n",
1447
+ " <td>NaN</td>\n",
1448
+ " <td>NaN</td>\n",
1449
+ " <td>NaN</td>\n",
1450
+ " <td>NaN</td>\n",
1451
+ " <td>NaN</td>\n",
1452
+ " <td>NaN</td>\n",
1453
+ " <td>...</td>\n",
1454
+ " <td>NaN</td>\n",
1455
+ " <td>NaN</td>\n",
1456
+ " <td>NaN</td>\n",
1457
+ " <td>NaN</td>\n",
1458
+ " <td>NaN</td>\n",
1459
+ " <td>NaN</td>\n",
1460
+ " <td>NaN</td>\n",
1461
+ " <td>NaN</td>\n",
1462
+ " <td>NaN</td>\n",
1463
+ " <td>NaN</td>\n",
1464
+ " </tr>\n",
1465
+ " <tr>\n",
1466
+ " <th>Zen and the Art of Motorcycle Maintenance: An Inquiry into Values</th>\n",
1467
+ " <td>NaN</td>\n",
1468
+ " <td>NaN</td>\n",
1469
+ " <td>NaN</td>\n",
1470
+ " <td>NaN</td>\n",
1471
+ " <td>0.0</td>\n",
1472
+ " <td>NaN</td>\n",
1473
+ " <td>NaN</td>\n",
1474
+ " <td>NaN</td>\n",
1475
+ " <td>NaN</td>\n",
1476
+ " <td>0.0</td>\n",
1477
+ " <td>...</td>\n",
1478
+ " <td>NaN</td>\n",
1479
+ " <td>NaN</td>\n",
1480
+ " <td>NaN</td>\n",
1481
+ " <td>NaN</td>\n",
1482
+ " <td>NaN</td>\n",
1483
+ " <td>0.0</td>\n",
1484
+ " <td>NaN</td>\n",
1485
+ " <td>NaN</td>\n",
1486
+ " <td>NaN</td>\n",
1487
+ " <td>NaN</td>\n",
1488
+ " </tr>\n",
1489
+ " <tr>\n",
1490
+ " <th>Zoya</th>\n",
1491
+ " <td>NaN</td>\n",
1492
+ " <td>NaN</td>\n",
1493
+ " <td>NaN</td>\n",
1494
+ " <td>NaN</td>\n",
1495
+ " <td>NaN</td>\n",
1496
+ " <td>NaN</td>\n",
1497
+ " <td>NaN</td>\n",
1498
+ " <td>NaN</td>\n",
1499
+ " <td>NaN</td>\n",
1500
+ " <td>NaN</td>\n",
1501
+ " <td>...</td>\n",
1502
+ " <td>NaN</td>\n",
1503
+ " <td>NaN</td>\n",
1504
+ " <td>NaN</td>\n",
1505
+ " <td>NaN</td>\n",
1506
+ " <td>NaN</td>\n",
1507
+ " <td>NaN</td>\n",
1508
+ " <td>NaN</td>\n",
1509
+ " <td>NaN</td>\n",
1510
+ " <td>NaN</td>\n",
1511
+ " <td>NaN</td>\n",
1512
+ " </tr>\n",
1513
+ " <tr>\n",
1514
+ " <th>\\O\\\" Is for Outlaw\"</th>\n",
1515
+ " <td>NaN</td>\n",
1516
+ " <td>NaN</td>\n",
1517
+ " <td>NaN</td>\n",
1518
+ " <td>NaN</td>\n",
1519
+ " <td>NaN</td>\n",
1520
+ " <td>NaN</td>\n",
1521
+ " <td>NaN</td>\n",
1522
+ " <td>NaN</td>\n",
1523
+ " <td>NaN</td>\n",
1524
+ " <td>NaN</td>\n",
1525
+ " <td>...</td>\n",
1526
+ " <td>NaN</td>\n",
1527
+ " <td>NaN</td>\n",
1528
+ " <td>8.0</td>\n",
1529
+ " <td>NaN</td>\n",
1530
+ " <td>NaN</td>\n",
1531
+ " <td>NaN</td>\n",
1532
+ " <td>NaN</td>\n",
1533
+ " <td>NaN</td>\n",
1534
+ " <td>NaN</td>\n",
1535
+ " <td>NaN</td>\n",
1536
+ " </tr>\n",
1537
+ " </tbody>\n",
1538
+ "</table>\n",
1539
+ "<p>742 rows × 888 columns</p>\n",
1540
+ "</div>"
1541
+ ],
1542
+ "text/plain": [
1543
+ "User-ID 254 2276 2766 \\\n",
1544
+ "Book-Title \n",
1545
+ "1984 9.0 NaN NaN \n",
1546
+ "1st to Die: A Novel NaN NaN NaN \n",
1547
+ "2nd Chance NaN 10.0 NaN \n",
1548
+ "4 Blondes NaN NaN NaN \n",
1549
+ "84 Charing Cross Road NaN NaN NaN \n",
1550
+ "... ... ... ... \n",
1551
+ "Year of Wonders NaN NaN NaN \n",
1552
+ "You Belong To Me NaN NaN NaN \n",
1553
+ "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
1554
+ "Zoya NaN NaN NaN \n",
1555
+ "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
1556
+ "\n",
1557
+ "User-ID 2977 3363 3757 \\\n",
1558
+ "Book-Title \n",
1559
+ "1984 NaN NaN NaN \n",
1560
+ "1st to Die: A Novel NaN NaN NaN \n",
1561
+ "2nd Chance NaN NaN NaN \n",
1562
+ "4 Blondes NaN NaN NaN \n",
1563
+ "84 Charing Cross Road NaN NaN NaN \n",
1564
+ "... ... ... ... \n",
1565
+ "Year of Wonders 7.0 NaN NaN \n",
1566
+ "You Belong To Me NaN NaN NaN \n",
1567
+ "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n",
1568
+ "Zoya NaN NaN NaN \n",
1569
+ "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
1570
+ "\n",
1571
+ "User-ID 4017 4385 6242 \\\n",
1572
+ "Book-Title \n",
1573
+ "1984 NaN NaN NaN \n",
1574
+ "1st to Die: A Novel NaN NaN NaN \n",
1575
+ "2nd Chance NaN NaN NaN \n",
1576
+ "4 Blondes NaN NaN NaN \n",
1577
+ "84 Charing Cross Road NaN NaN NaN \n",
1578
+ "... ... ... ... \n",
1579
+ "Year of Wonders NaN NaN 7.0 \n",
1580
+ "You Belong To Me NaN NaN NaN \n",
1581
+ "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
1582
+ "Zoya NaN NaN NaN \n",
1583
+ "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
1584
+ "\n",
1585
+ "User-ID 6251 ... 274004 \\\n",
1586
+ "Book-Title ... \n",
1587
+ "1984 NaN ... NaN \n",
1588
+ "1st to Die: A Novel NaN ... NaN \n",
1589
+ "2nd Chance NaN ... NaN \n",
1590
+ "4 Blondes 0.0 ... NaN \n",
1591
+ "84 Charing Cross Road NaN ... NaN \n",
1592
+ "... ... ... ... \n",
1593
+ "Year of Wonders NaN ... NaN \n",
1594
+ "You Belong To Me NaN ... NaN \n",
1595
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... NaN \n",
1596
+ "Zoya NaN ... NaN \n",
1597
+ "\\O\\\" Is for Outlaw\" NaN ... NaN \n",
1598
+ "\n",
1599
+ "User-ID 274061 274301 274308 \\\n",
1600
+ "Book-Title \n",
1601
+ "1984 NaN NaN NaN \n",
1602
+ "1st to Die: A Novel NaN NaN NaN \n",
1603
+ "2nd Chance NaN NaN 0.0 \n",
1604
+ "4 Blondes NaN NaN NaN \n",
1605
+ "84 Charing Cross Road NaN NaN NaN \n",
1606
+ "... ... ... ... \n",
1607
+ "Year of Wonders NaN NaN NaN \n",
1608
+ "You Belong To Me NaN NaN NaN \n",
1609
+ "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
1610
+ "Zoya NaN NaN NaN \n",
1611
+ "\\O\\\" Is for Outlaw\" NaN 8.0 NaN \n",
1612
+ "\n",
1613
+ "User-ID 274808 275970 277427 \\\n",
1614
+ "Book-Title \n",
1615
+ "1984 NaN 0.0 NaN \n",
1616
+ "1st to Die: A Novel NaN NaN NaN \n",
1617
+ "2nd Chance NaN NaN NaN \n",
1618
+ "4 Blondes NaN NaN NaN \n",
1619
+ "84 Charing Cross Road NaN 10.0 NaN \n",
1620
+ "... ... ... ... \n",
1621
+ "Year of Wonders NaN 0.0 NaN \n",
1622
+ "You Belong To Me NaN NaN NaN \n",
1623
+ "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n",
1624
+ "Zoya NaN NaN NaN \n",
1625
+ "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
1626
+ "\n",
1627
+ "User-ID 277478 277639 278418 \n",
1628
+ "Book-Title \n",
1629
+ "1984 NaN NaN NaN \n",
1630
+ "1st to Die: A Novel NaN NaN NaN \n",
1631
+ "2nd Chance NaN 0.0 NaN \n",
1632
+ "4 Blondes NaN NaN NaN \n",
1633
+ "84 Charing Cross Road NaN NaN NaN \n",
1634
+ "... ... ... ... \n",
1635
+ "Year of Wonders NaN NaN NaN \n",
1636
+ "You Belong To Me NaN NaN NaN \n",
1637
+ "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
1638
+ "Zoya NaN NaN NaN \n",
1639
+ "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
1640
+ "\n",
1641
+ "[742 rows x 888 columns]"
1642
+ ]
1643
+ },
1644
+ "execution_count": 110,
1645
+ "metadata": {},
1646
+ "output_type": "execute_result"
1647
+ }
1648
+ ],
1649
+ "source": [
1650
+ "book_pivot_tale = final_rating.pivot_table(columns=\"User-ID\",\n",
1651
+ " index=\"Book-Title\",\n",
1652
+ " values=\"Book-Rating\")\n",
1653
+ "\n",
1654
+ "book_pivot_tale"
1655
+ ]
1656
+ },
1657
+ {
1658
+ "cell_type": "code",
1659
+ "execution_count": 112,
1660
+ "metadata": {},
1661
+ "outputs": [],
1662
+ "source": [
1663
+ "book_pivot_tale.fillna(0,inplace=True)"
1664
+ ]
1665
+ },
1666
+ {
1667
+ "cell_type": "code",
1668
+ "execution_count": 113,
1669
+ "metadata": {},
1670
+ "outputs": [
1671
+ {
1672
+ "data": {
1673
+ "text/html": [
1674
+ "<div>\n",
1675
+ "<style scoped>\n",
1676
+ " .dataframe tbody tr th:only-of-type {\n",
1677
+ " vertical-align: middle;\n",
1678
+ " }\n",
1679
+ "\n",
1680
+ " .dataframe tbody tr th {\n",
1681
+ " vertical-align: top;\n",
1682
+ " }\n",
1683
+ "\n",
1684
+ " .dataframe thead th {\n",
1685
+ " text-align: right;\n",
1686
+ " }\n",
1687
+ "</style>\n",
1688
+ "<table border=\"1\" class=\"dataframe\">\n",
1689
+ " <thead>\n",
1690
+ " <tr style=\"text-align: right;\">\n",
1691
+ " <th>User-ID</th>\n",
1692
+ " <th>254</th>\n",
1693
+ " <th>2276</th>\n",
1694
+ " <th>2766</th>\n",
1695
+ " <th>2977</th>\n",
1696
+ " <th>3363</th>\n",
1697
+ " <th>3757</th>\n",
1698
+ " <th>4017</th>\n",
1699
+ " <th>4385</th>\n",
1700
+ " <th>6242</th>\n",
1701
+ " <th>6251</th>\n",
1702
+ " <th>...</th>\n",
1703
+ " <th>274004</th>\n",
1704
+ " <th>274061</th>\n",
1705
+ " <th>274301</th>\n",
1706
+ " <th>274308</th>\n",
1707
+ " <th>274808</th>\n",
1708
+ " <th>275970</th>\n",
1709
+ " <th>277427</th>\n",
1710
+ " <th>277478</th>\n",
1711
+ " <th>277639</th>\n",
1712
+ " <th>278418</th>\n",
1713
+ " </tr>\n",
1714
+ " <tr>\n",
1715
+ " <th>Book-Title</th>\n",
1716
+ " <th></th>\n",
1717
+ " <th></th>\n",
1718
+ " <th></th>\n",
1719
+ " <th></th>\n",
1720
+ " <th></th>\n",
1721
+ " <th></th>\n",
1722
+ " <th></th>\n",
1723
+ " <th></th>\n",
1724
+ " <th></th>\n",
1725
+ " <th></th>\n",
1726
+ " <th></th>\n",
1727
+ " <th></th>\n",
1728
+ " <th></th>\n",
1729
+ " <th></th>\n",
1730
+ " <th></th>\n",
1731
+ " <th></th>\n",
1732
+ " <th></th>\n",
1733
+ " <th></th>\n",
1734
+ " <th></th>\n",
1735
+ " <th></th>\n",
1736
+ " <th></th>\n",
1737
+ " </tr>\n",
1738
+ " </thead>\n",
1739
+ " <tbody>\n",
1740
+ " <tr>\n",
1741
+ " <th>1984</th>\n",
1742
+ " <td>9.0</td>\n",
1743
+ " <td>0.0</td>\n",
1744
+ " <td>0.0</td>\n",
1745
+ " <td>0.0</td>\n",
1746
+ " <td>0.0</td>\n",
1747
+ " <td>0.0</td>\n",
1748
+ " <td>0.0</td>\n",
1749
+ " <td>0.0</td>\n",
1750
+ " <td>0.0</td>\n",
1751
+ " <td>0.0</td>\n",
1752
+ " <td>...</td>\n",
1753
+ " <td>0.0</td>\n",
1754
+ " <td>0.0</td>\n",
1755
+ " <td>0.0</td>\n",
1756
+ " <td>0.0</td>\n",
1757
+ " <td>0.0</td>\n",
1758
+ " <td>0.0</td>\n",
1759
+ " <td>0.0</td>\n",
1760
+ " <td>0.0</td>\n",
1761
+ " <td>0.0</td>\n",
1762
+ " <td>0.0</td>\n",
1763
+ " </tr>\n",
1764
+ " <tr>\n",
1765
+ " <th>1st to Die: A Novel</th>\n",
1766
+ " <td>0.0</td>\n",
1767
+ " <td>0.0</td>\n",
1768
+ " <td>0.0</td>\n",
1769
+ " <td>0.0</td>\n",
1770
+ " <td>0.0</td>\n",
1771
+ " <td>0.0</td>\n",
1772
+ " <td>0.0</td>\n",
1773
+ " <td>0.0</td>\n",
1774
+ " <td>0.0</td>\n",
1775
+ " <td>0.0</td>\n",
1776
+ " <td>...</td>\n",
1777
+ " <td>0.0</td>\n",
1778
+ " <td>0.0</td>\n",
1779
+ " <td>0.0</td>\n",
1780
+ " <td>0.0</td>\n",
1781
+ " <td>0.0</td>\n",
1782
+ " <td>0.0</td>\n",
1783
+ " <td>0.0</td>\n",
1784
+ " <td>0.0</td>\n",
1785
+ " <td>0.0</td>\n",
1786
+ " <td>0.0</td>\n",
1787
+ " </tr>\n",
1788
+ " <tr>\n",
1789
+ " <th>2nd Chance</th>\n",
1790
+ " <td>0.0</td>\n",
1791
+ " <td>10.0</td>\n",
1792
+ " <td>0.0</td>\n",
1793
+ " <td>0.0</td>\n",
1794
+ " <td>0.0</td>\n",
1795
+ " <td>0.0</td>\n",
1796
+ " <td>0.0</td>\n",
1797
+ " <td>0.0</td>\n",
1798
+ " <td>0.0</td>\n",
1799
+ " <td>0.0</td>\n",
1800
+ " <td>...</td>\n",
1801
+ " <td>0.0</td>\n",
1802
+ " <td>0.0</td>\n",
1803
+ " <td>0.0</td>\n",
1804
+ " <td>0.0</td>\n",
1805
+ " <td>0.0</td>\n",
1806
+ " <td>0.0</td>\n",
1807
+ " <td>0.0</td>\n",
1808
+ " <td>0.0</td>\n",
1809
+ " <td>0.0</td>\n",
1810
+ " <td>0.0</td>\n",
1811
+ " </tr>\n",
1812
+ " <tr>\n",
1813
+ " <th>4 Blondes</th>\n",
1814
+ " <td>0.0</td>\n",
1815
+ " <td>0.0</td>\n",
1816
+ " <td>0.0</td>\n",
1817
+ " <td>0.0</td>\n",
1818
+ " <td>0.0</td>\n",
1819
+ " <td>0.0</td>\n",
1820
+ " <td>0.0</td>\n",
1821
+ " <td>0.0</td>\n",
1822
+ " <td>0.0</td>\n",
1823
+ " <td>0.0</td>\n",
1824
+ " <td>...</td>\n",
1825
+ " <td>0.0</td>\n",
1826
+ " <td>0.0</td>\n",
1827
+ " <td>0.0</td>\n",
1828
+ " <td>0.0</td>\n",
1829
+ " <td>0.0</td>\n",
1830
+ " <td>0.0</td>\n",
1831
+ " <td>0.0</td>\n",
1832
+ " <td>0.0</td>\n",
1833
+ " <td>0.0</td>\n",
1834
+ " <td>0.0</td>\n",
1835
+ " </tr>\n",
1836
+ " <tr>\n",
1837
+ " <th>84 Charing Cross Road</th>\n",
1838
+ " <td>0.0</td>\n",
1839
+ " <td>0.0</td>\n",
1840
+ " <td>0.0</td>\n",
1841
+ " <td>0.0</td>\n",
1842
+ " <td>0.0</td>\n",
1843
+ " <td>0.0</td>\n",
1844
+ " <td>0.0</td>\n",
1845
+ " <td>0.0</td>\n",
1846
+ " <td>0.0</td>\n",
1847
+ " <td>0.0</td>\n",
1848
+ " <td>...</td>\n",
1849
+ " <td>0.0</td>\n",
1850
+ " <td>0.0</td>\n",
1851
+ " <td>0.0</td>\n",
1852
+ " <td>0.0</td>\n",
1853
+ " <td>0.0</td>\n",
1854
+ " <td>10.0</td>\n",
1855
+ " <td>0.0</td>\n",
1856
+ " <td>0.0</td>\n",
1857
+ " <td>0.0</td>\n",
1858
+ " <td>0.0</td>\n",
1859
+ " </tr>\n",
1860
+ " <tr>\n",
1861
+ " <th>...</th>\n",
1862
+ " <td>...</td>\n",
1863
+ " <td>...</td>\n",
1864
+ " <td>...</td>\n",
1865
+ " <td>...</td>\n",
1866
+ " <td>...</td>\n",
1867
+ " <td>...</td>\n",
1868
+ " <td>...</td>\n",
1869
+ " <td>...</td>\n",
1870
+ " <td>...</td>\n",
1871
+ " <td>...</td>\n",
1872
+ " <td>...</td>\n",
1873
+ " <td>...</td>\n",
1874
+ " <td>...</td>\n",
1875
+ " <td>...</td>\n",
1876
+ " <td>...</td>\n",
1877
+ " <td>...</td>\n",
1878
+ " <td>...</td>\n",
1879
+ " <td>...</td>\n",
1880
+ " <td>...</td>\n",
1881
+ " <td>...</td>\n",
1882
+ " <td>...</td>\n",
1883
+ " </tr>\n",
1884
+ " <tr>\n",
1885
+ " <th>Year of Wonders</th>\n",
1886
+ " <td>0.0</td>\n",
1887
+ " <td>0.0</td>\n",
1888
+ " <td>0.0</td>\n",
1889
+ " <td>7.0</td>\n",
1890
+ " <td>0.0</td>\n",
1891
+ " <td>0.0</td>\n",
1892
+ " <td>0.0</td>\n",
1893
+ " <td>0.0</td>\n",
1894
+ " <td>7.0</td>\n",
1895
+ " <td>0.0</td>\n",
1896
+ " <td>...</td>\n",
1897
+ " <td>0.0</td>\n",
1898
+ " <td>0.0</td>\n",
1899
+ " <td>0.0</td>\n",
1900
+ " <td>0.0</td>\n",
1901
+ " <td>0.0</td>\n",
1902
+ " <td>0.0</td>\n",
1903
+ " <td>0.0</td>\n",
1904
+ " <td>0.0</td>\n",
1905
+ " <td>0.0</td>\n",
1906
+ " <td>0.0</td>\n",
1907
+ " </tr>\n",
1908
+ " <tr>\n",
1909
+ " <th>You Belong To Me</th>\n",
1910
+ " <td>0.0</td>\n",
1911
+ " <td>0.0</td>\n",
1912
+ " <td>0.0</td>\n",
1913
+ " <td>0.0</td>\n",
1914
+ " <td>0.0</td>\n",
1915
+ " <td>0.0</td>\n",
1916
+ " <td>0.0</td>\n",
1917
+ " <td>0.0</td>\n",
1918
+ " <td>0.0</td>\n",
1919
+ " <td>0.0</td>\n",
1920
+ " <td>...</td>\n",
1921
+ " <td>0.0</td>\n",
1922
+ " <td>0.0</td>\n",
1923
+ " <td>0.0</td>\n",
1924
+ " <td>0.0</td>\n",
1925
+ " <td>0.0</td>\n",
1926
+ " <td>0.0</td>\n",
1927
+ " <td>0.0</td>\n",
1928
+ " <td>0.0</td>\n",
1929
+ " <td>0.0</td>\n",
1930
+ " <td>0.0</td>\n",
1931
+ " </tr>\n",
1932
+ " <tr>\n",
1933
+ " <th>Zen and the Art of Motorcycle Maintenance: An Inquiry into Values</th>\n",
1934
+ " <td>0.0</td>\n",
1935
+ " <td>0.0</td>\n",
1936
+ " <td>0.0</td>\n",
1937
+ " <td>0.0</td>\n",
1938
+ " <td>0.0</td>\n",
1939
+ " <td>0.0</td>\n",
1940
+ " <td>0.0</td>\n",
1941
+ " <td>0.0</td>\n",
1942
+ " <td>0.0</td>\n",
1943
+ " <td>0.0</td>\n",
1944
+ " <td>...</td>\n",
1945
+ " <td>0.0</td>\n",
1946
+ " <td>0.0</td>\n",
1947
+ " <td>0.0</td>\n",
1948
+ " <td>0.0</td>\n",
1949
+ " <td>0.0</td>\n",
1950
+ " <td>0.0</td>\n",
1951
+ " <td>0.0</td>\n",
1952
+ " <td>0.0</td>\n",
1953
+ " <td>0.0</td>\n",
1954
+ " <td>0.0</td>\n",
1955
+ " </tr>\n",
1956
+ " <tr>\n",
1957
+ " <th>Zoya</th>\n",
1958
+ " <td>0.0</td>\n",
1959
+ " <td>0.0</td>\n",
1960
+ " <td>0.0</td>\n",
1961
+ " <td>0.0</td>\n",
1962
+ " <td>0.0</td>\n",
1963
+ " <td>0.0</td>\n",
1964
+ " <td>0.0</td>\n",
1965
+ " <td>0.0</td>\n",
1966
+ " <td>0.0</td>\n",
1967
+ " <td>0.0</td>\n",
1968
+ " <td>...</td>\n",
1969
+ " <td>0.0</td>\n",
1970
+ " <td>0.0</td>\n",
1971
+ " <td>0.0</td>\n",
1972
+ " <td>0.0</td>\n",
1973
+ " <td>0.0</td>\n",
1974
+ " <td>0.0</td>\n",
1975
+ " <td>0.0</td>\n",
1976
+ " <td>0.0</td>\n",
1977
+ " <td>0.0</td>\n",
1978
+ " <td>0.0</td>\n",
1979
+ " </tr>\n",
1980
+ " <tr>\n",
1981
+ " <th>\\O\\\" Is for Outlaw\"</th>\n",
1982
+ " <td>0.0</td>\n",
1983
+ " <td>0.0</td>\n",
1984
+ " <td>0.0</td>\n",
1985
+ " <td>0.0</td>\n",
1986
+ " <td>0.0</td>\n",
1987
+ " <td>0.0</td>\n",
1988
+ " <td>0.0</td>\n",
1989
+ " <td>0.0</td>\n",
1990
+ " <td>0.0</td>\n",
1991
+ " <td>0.0</td>\n",
1992
+ " <td>...</td>\n",
1993
+ " <td>0.0</td>\n",
1994
+ " <td>0.0</td>\n",
1995
+ " <td>8.0</td>\n",
1996
+ " <td>0.0</td>\n",
1997
+ " <td>0.0</td>\n",
1998
+ " <td>0.0</td>\n",
1999
+ " <td>0.0</td>\n",
2000
+ " <td>0.0</td>\n",
2001
+ " <td>0.0</td>\n",
2002
+ " <td>0.0</td>\n",
2003
+ " </tr>\n",
2004
+ " </tbody>\n",
2005
+ "</table>\n",
2006
+ "<p>742 rows × 888 columns</p>\n",
2007
+ "</div>"
2008
+ ],
2009
+ "text/plain": [
2010
+ "User-ID 254 2276 2766 \\\n",
2011
+ "Book-Title \n",
2012
+ "1984 9.0 0.0 0.0 \n",
2013
+ "1st to Die: A Novel 0.0 0.0 0.0 \n",
2014
+ "2nd Chance 0.0 10.0 0.0 \n",
2015
+ "4 Blondes 0.0 0.0 0.0 \n",
2016
+ "84 Charing Cross Road 0.0 0.0 0.0 \n",
2017
+ "... ... ... ... \n",
2018
+ "Year of Wonders 0.0 0.0 0.0 \n",
2019
+ "You Belong To Me 0.0 0.0 0.0 \n",
2020
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2021
+ "Zoya 0.0 0.0 0.0 \n",
2022
+ "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2023
+ "\n",
2024
+ "User-ID 2977 3363 3757 \\\n",
2025
+ "Book-Title \n",
2026
+ "1984 0.0 0.0 0.0 \n",
2027
+ "1st to Die: A Novel 0.0 0.0 0.0 \n",
2028
+ "2nd Chance 0.0 0.0 0.0 \n",
2029
+ "4 Blondes 0.0 0.0 0.0 \n",
2030
+ "84 Charing Cross Road 0.0 0.0 0.0 \n",
2031
+ "... ... ... ... \n",
2032
+ "Year of Wonders 7.0 0.0 0.0 \n",
2033
+ "You Belong To Me 0.0 0.0 0.0 \n",
2034
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2035
+ "Zoya 0.0 0.0 0.0 \n",
2036
+ "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2037
+ "\n",
2038
+ "User-ID 4017 4385 6242 \\\n",
2039
+ "Book-Title \n",
2040
+ "1984 0.0 0.0 0.0 \n",
2041
+ "1st to Die: A Novel 0.0 0.0 0.0 \n",
2042
+ "2nd Chance 0.0 0.0 0.0 \n",
2043
+ "4 Blondes 0.0 0.0 0.0 \n",
2044
+ "84 Charing Cross Road 0.0 0.0 0.0 \n",
2045
+ "... ... ... ... \n",
2046
+ "Year of Wonders 0.0 0.0 7.0 \n",
2047
+ "You Belong To Me 0.0 0.0 0.0 \n",
2048
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2049
+ "Zoya 0.0 0.0 0.0 \n",
2050
+ "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2051
+ "\n",
2052
+ "User-ID 6251 ... 274004 \\\n",
2053
+ "Book-Title ... \n",
2054
+ "1984 0.0 ... 0.0 \n",
2055
+ "1st to Die: A Novel 0.0 ... 0.0 \n",
2056
+ "2nd Chance 0.0 ... 0.0 \n",
2057
+ "4 Blondes 0.0 ... 0.0 \n",
2058
+ "84 Charing Cross Road 0.0 ... 0.0 \n",
2059
+ "... ... ... ... \n",
2060
+ "Year of Wonders 0.0 ... 0.0 \n",
2061
+ "You Belong To Me 0.0 ... 0.0 \n",
2062
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... 0.0 \n",
2063
+ "Zoya 0.0 ... 0.0 \n",
2064
+ "\\O\\\" Is for Outlaw\" 0.0 ... 0.0 \n",
2065
+ "\n",
2066
+ "User-ID 274061 274301 274308 \\\n",
2067
+ "Book-Title \n",
2068
+ "1984 0.0 0.0 0.0 \n",
2069
+ "1st to Die: A Novel 0.0 0.0 0.0 \n",
2070
+ "2nd Chance 0.0 0.0 0.0 \n",
2071
+ "4 Blondes 0.0 0.0 0.0 \n",
2072
+ "84 Charing Cross Road 0.0 0.0 0.0 \n",
2073
+ "... ... ... ... \n",
2074
+ "Year of Wonders 0.0 0.0 0.0 \n",
2075
+ "You Belong To Me 0.0 0.0 0.0 \n",
2076
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2077
+ "Zoya 0.0 0.0 0.0 \n",
2078
+ "\\O\\\" Is for Outlaw\" 0.0 8.0 0.0 \n",
2079
+ "\n",
2080
+ "User-ID 274808 275970 277427 \\\n",
2081
+ "Book-Title \n",
2082
+ "1984 0.0 0.0 0.0 \n",
2083
+ "1st to Die: A Novel 0.0 0.0 0.0 \n",
2084
+ "2nd Chance 0.0 0.0 0.0 \n",
2085
+ "4 Blondes 0.0 0.0 0.0 \n",
2086
+ "84 Charing Cross Road 0.0 10.0 0.0 \n",
2087
+ "... ... ... ... \n",
2088
+ "Year of Wonders 0.0 0.0 0.0 \n",
2089
+ "You Belong To Me 0.0 0.0 0.0 \n",
2090
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2091
+ "Zoya 0.0 0.0 0.0 \n",
2092
+ "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2093
+ "\n",
2094
+ "User-ID 277478 277639 278418 \n",
2095
+ "Book-Title \n",
2096
+ "1984 0.0 0.0 0.0 \n",
2097
+ "1st to Die: A Novel 0.0 0.0 0.0 \n",
2098
+ "2nd Chance 0.0 0.0 0.0 \n",
2099
+ "4 Blondes 0.0 0.0 0.0 \n",
2100
+ "84 Charing Cross Road 0.0 0.0 0.0 \n",
2101
+ "... ... ... ... \n",
2102
+ "Year of Wonders 0.0 0.0 0.0 \n",
2103
+ "You Belong To Me 0.0 0.0 0.0 \n",
2104
+ "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2105
+ "Zoya 0.0 0.0 0.0 \n",
2106
+ "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2107
+ "\n",
2108
+ "[742 rows x 888 columns]"
2109
+ ]
2110
+ },
2111
+ "execution_count": 113,
2112
+ "metadata": {},
2113
+ "output_type": "execute_result"
2114
+ }
2115
+ ],
2116
+ "source": [
2117
+ "book_pivot_tale"
2118
+ ]
2119
+ },
2120
+ {
2121
+ "cell_type": "markdown",
2122
+ "metadata": {},
2123
+ "source": [
2124
+ "## 4.6 scipy matrix creation"
2125
+ ]
2126
+ },
2127
+ {
2128
+ "cell_type": "code",
2129
+ "execution_count": 118,
2130
+ "metadata": {},
2131
+ "outputs": [
2132
+ {
2133
+ "data": {
2134
+ "text/plain": [
2135
+ "<742x888 sparse matrix of type '<class 'numpy.float64'>'\n",
2136
+ "\twith 14961 stored elements in Compressed Sparse Row format>"
2137
+ ]
2138
+ },
2139
+ "execution_count": 118,
2140
+ "metadata": {},
2141
+ "output_type": "execute_result"
2142
+ }
2143
+ ],
2144
+ "source": [
2145
+ "books_sparse_table = csr_matrix(book_pivot_tale)\n",
2146
+ "books_sparse_table"
2147
+ ]
2148
+ },
2149
+ {
2150
+ "cell_type": "markdown",
2151
+ "metadata": {},
2152
+ "source": [
2153
+ "## 5. Model Building"
2154
+ ]
2155
+ },
2156
+ {
2157
+ "cell_type": "code",
2158
+ "execution_count": 124,
2159
+ "metadata": {},
2160
+ "outputs": [],
2161
+ "source": [
2162
+ "model = NearestNeighbors(algorithm=\"brute\")"
2163
+ ]
2164
+ },
2165
+ {
2166
+ "cell_type": "code",
2167
+ "execution_count": 125,
2168
+ "metadata": {},
2169
+ "outputs": [
2170
+ {
2171
+ "data": {
2172
+ "text/html": [
2173
+ "<style>#sk-container-id-2 {\n",
2174
+ " /* Definition of color scheme common for light and dark mode */\n",
2175
+ " --sklearn-color-text: black;\n",
2176
+ " --sklearn-color-line: gray;\n",
2177
+ " /* Definition of color scheme for unfitted estimators */\n",
2178
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
2179
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
2180
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
2181
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
2182
+ " /* Definition of color scheme for fitted estimators */\n",
2183
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
2184
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
2185
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
2186
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
2187
+ "\n",
2188
+ " /* Specific color for light theme */\n",
2189
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
2190
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
2191
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
2192
+ " --sklearn-color-icon: #696969;\n",
2193
+ "\n",
2194
+ " @media (prefers-color-scheme: dark) {\n",
2195
+ " /* Redefinition of color scheme for dark theme */\n",
2196
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
2197
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
2198
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
2199
+ " --sklearn-color-icon: #878787;\n",
2200
+ " }\n",
2201
+ "}\n",
2202
+ "\n",
2203
+ "#sk-container-id-2 {\n",
2204
+ " color: var(--sklearn-color-text);\n",
2205
+ "}\n",
2206
+ "\n",
2207
+ "#sk-container-id-2 pre {\n",
2208
+ " padding: 0;\n",
2209
+ "}\n",
2210
+ "\n",
2211
+ "#sk-container-id-2 input.sk-hidden--visually {\n",
2212
+ " border: 0;\n",
2213
+ " clip: rect(1px 1px 1px 1px);\n",
2214
+ " clip: rect(1px, 1px, 1px, 1px);\n",
2215
+ " height: 1px;\n",
2216
+ " margin: -1px;\n",
2217
+ " overflow: hidden;\n",
2218
+ " padding: 0;\n",
2219
+ " position: absolute;\n",
2220
+ " width: 1px;\n",
2221
+ "}\n",
2222
+ "\n",
2223
+ "#sk-container-id-2 div.sk-dashed-wrapped {\n",
2224
+ " border: 1px dashed var(--sklearn-color-line);\n",
2225
+ " margin: 0 0.4em 0.5em 0.4em;\n",
2226
+ " box-sizing: border-box;\n",
2227
+ " padding-bottom: 0.4em;\n",
2228
+ " background-color: var(--sklearn-color-background);\n",
2229
+ "}\n",
2230
+ "\n",
2231
+ "#sk-container-id-2 div.sk-container {\n",
2232
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
2233
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
2234
+ " so we also need the `!important` here to be able to override the\n",
2235
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
2236
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
2237
+ " display: inline-block !important;\n",
2238
+ " position: relative;\n",
2239
+ "}\n",
2240
+ "\n",
2241
+ "#sk-container-id-2 div.sk-text-repr-fallback {\n",
2242
+ " display: none;\n",
2243
+ "}\n",
2244
+ "\n",
2245
+ "div.sk-parallel-item,\n",
2246
+ "div.sk-serial,\n",
2247
+ "div.sk-item {\n",
2248
+ " /* draw centered vertical line to link estimators */\n",
2249
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
2250
+ " background-size: 2px 100%;\n",
2251
+ " background-repeat: no-repeat;\n",
2252
+ " background-position: center center;\n",
2253
+ "}\n",
2254
+ "\n",
2255
+ "/* Parallel-specific style estimator block */\n",
2256
+ "\n",
2257
+ "#sk-container-id-2 div.sk-parallel-item::after {\n",
2258
+ " content: \"\";\n",
2259
+ " width: 100%;\n",
2260
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
2261
+ " flex-grow: 1;\n",
2262
+ "}\n",
2263
+ "\n",
2264
+ "#sk-container-id-2 div.sk-parallel {\n",
2265
+ " display: flex;\n",
2266
+ " align-items: stretch;\n",
2267
+ " justify-content: center;\n",
2268
+ " background-color: var(--sklearn-color-background);\n",
2269
+ " position: relative;\n",
2270
+ "}\n",
2271
+ "\n",
2272
+ "#sk-container-id-2 div.sk-parallel-item {\n",
2273
+ " display: flex;\n",
2274
+ " flex-direction: column;\n",
2275
+ "}\n",
2276
+ "\n",
2277
+ "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
2278
+ " align-self: flex-end;\n",
2279
+ " width: 50%;\n",
2280
+ "}\n",
2281
+ "\n",
2282
+ "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
2283
+ " align-self: flex-start;\n",
2284
+ " width: 50%;\n",
2285
+ "}\n",
2286
+ "\n",
2287
+ "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
2288
+ " width: 0;\n",
2289
+ "}\n",
2290
+ "\n",
2291
+ "/* Serial-specific style estimator block */\n",
2292
+ "\n",
2293
+ "#sk-container-id-2 div.sk-serial {\n",
2294
+ " display: flex;\n",
2295
+ " flex-direction: column;\n",
2296
+ " align-items: center;\n",
2297
+ " background-color: var(--sklearn-color-background);\n",
2298
+ " padding-right: 1em;\n",
2299
+ " padding-left: 1em;\n",
2300
+ "}\n",
2301
+ "\n",
2302
+ "\n",
2303
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
2304
+ "clickable and can be expanded/collapsed.\n",
2305
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
2306
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
2307
+ "*/\n",
2308
+ "\n",
2309
+ "/* Pipeline and ColumnTransformer style (default) */\n",
2310
+ "\n",
2311
+ "#sk-container-id-2 div.sk-toggleable {\n",
2312
+ " /* Default theme specific background. It is overwritten whether we have a\n",
2313
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
2314
+ " background-color: var(--sklearn-color-background);\n",
2315
+ "}\n",
2316
+ "\n",
2317
+ "/* Toggleable label */\n",
2318
+ "#sk-container-id-2 label.sk-toggleable__label {\n",
2319
+ " cursor: pointer;\n",
2320
+ " display: block;\n",
2321
+ " width: 100%;\n",
2322
+ " margin-bottom: 0;\n",
2323
+ " padding: 0.5em;\n",
2324
+ " box-sizing: border-box;\n",
2325
+ " text-align: center;\n",
2326
+ "}\n",
2327
+ "\n",
2328
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
2329
+ " /* Arrow on the left of the label */\n",
2330
+ " content: \"▸\";\n",
2331
+ " float: left;\n",
2332
+ " margin-right: 0.25em;\n",
2333
+ " color: var(--sklearn-color-icon);\n",
2334
+ "}\n",
2335
+ "\n",
2336
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
2337
+ " color: var(--sklearn-color-text);\n",
2338
+ "}\n",
2339
+ "\n",
2340
+ "/* Toggleable content - dropdown */\n",
2341
+ "\n",
2342
+ "#sk-container-id-2 div.sk-toggleable__content {\n",
2343
+ " max-height: 0;\n",
2344
+ " max-width: 0;\n",
2345
+ " overflow: hidden;\n",
2346
+ " text-align: left;\n",
2347
+ " /* unfitted */\n",
2348
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
2349
+ "}\n",
2350
+ "\n",
2351
+ "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
2352
+ " /* fitted */\n",
2353
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
2354
+ "}\n",
2355
+ "\n",
2356
+ "#sk-container-id-2 div.sk-toggleable__content pre {\n",
2357
+ " margin: 0.2em;\n",
2358
+ " border-radius: 0.25em;\n",
2359
+ " color: var(--sklearn-color-text);\n",
2360
+ " /* unfitted */\n",
2361
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
2362
+ "}\n",
2363
+ "\n",
2364
+ "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
2365
+ " /* unfitted */\n",
2366
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
2367
+ "}\n",
2368
+ "\n",
2369
+ "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2370
+ " /* Expand drop-down */\n",
2371
+ " max-height: 200px;\n",
2372
+ " max-width: 100%;\n",
2373
+ " overflow: auto;\n",
2374
+ "}\n",
2375
+ "\n",
2376
+ "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2377
+ " content: \"▾\";\n",
2378
+ "}\n",
2379
+ "\n",
2380
+ "/* Pipeline/ColumnTransformer-specific style */\n",
2381
+ "\n",
2382
+ "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2383
+ " color: var(--sklearn-color-text);\n",
2384
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
2385
+ "}\n",
2386
+ "\n",
2387
+ "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2388
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
2389
+ "}\n",
2390
+ "\n",
2391
+ "/* Estimator-specific style */\n",
2392
+ "\n",
2393
+ "/* Colorize estimator box */\n",
2394
+ "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2395
+ " /* unfitted */\n",
2396
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
2397
+ "}\n",
2398
+ "\n",
2399
+ "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2400
+ " /* fitted */\n",
2401
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
2402
+ "}\n",
2403
+ "\n",
2404
+ "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
2405
+ "#sk-container-id-2 div.sk-label label {\n",
2406
+ " /* The background is the default theme color */\n",
2407
+ " color: var(--sklearn-color-text-on-default-background);\n",
2408
+ "}\n",
2409
+ "\n",
2410
+ "/* On hover, darken the color of the background */\n",
2411
+ "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
2412
+ " color: var(--sklearn-color-text);\n",
2413
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
2414
+ "}\n",
2415
+ "\n",
2416
+ "/* Label box, darken color on hover, fitted */\n",
2417
+ "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2418
+ " color: var(--sklearn-color-text);\n",
2419
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
2420
+ "}\n",
2421
+ "\n",
2422
+ "/* Estimator label */\n",
2423
+ "\n",
2424
+ "#sk-container-id-2 div.sk-label label {\n",
2425
+ " font-family: monospace;\n",
2426
+ " font-weight: bold;\n",
2427
+ " display: inline-block;\n",
2428
+ " line-height: 1.2em;\n",
2429
+ "}\n",
2430
+ "\n",
2431
+ "#sk-container-id-2 div.sk-label-container {\n",
2432
+ " text-align: center;\n",
2433
+ "}\n",
2434
+ "\n",
2435
+ "/* Estimator-specific */\n",
2436
+ "#sk-container-id-2 div.sk-estimator {\n",
2437
+ " font-family: monospace;\n",
2438
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
2439
+ " border-radius: 0.25em;\n",
2440
+ " box-sizing: border-box;\n",
2441
+ " margin-bottom: 0.5em;\n",
2442
+ " /* unfitted */\n",
2443
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
2444
+ "}\n",
2445
+ "\n",
2446
+ "#sk-container-id-2 div.sk-estimator.fitted {\n",
2447
+ " /* fitted */\n",
2448
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
2449
+ "}\n",
2450
+ "\n",
2451
+ "/* on hover */\n",
2452
+ "#sk-container-id-2 div.sk-estimator:hover {\n",
2453
+ " /* unfitted */\n",
2454
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
2455
+ "}\n",
2456
+ "\n",
2457
+ "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
2458
+ " /* fitted */\n",
2459
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
2460
+ "}\n",
2461
+ "\n",
2462
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
2463
+ "\n",
2464
+ "/* Common style for \"i\" and \"?\" */\n",
2465
+ "\n",
2466
+ ".sk-estimator-doc-link,\n",
2467
+ "a:link.sk-estimator-doc-link,\n",
2468
+ "a:visited.sk-estimator-doc-link {\n",
2469
+ " float: right;\n",
2470
+ " font-size: smaller;\n",
2471
+ " line-height: 1em;\n",
2472
+ " font-family: monospace;\n",
2473
+ " background-color: var(--sklearn-color-background);\n",
2474
+ " border-radius: 1em;\n",
2475
+ " height: 1em;\n",
2476
+ " width: 1em;\n",
2477
+ " text-decoration: none !important;\n",
2478
+ " margin-left: 1ex;\n",
2479
+ " /* unfitted */\n",
2480
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2481
+ " color: var(--sklearn-color-unfitted-level-1);\n",
2482
+ "}\n",
2483
+ "\n",
2484
+ ".sk-estimator-doc-link.fitted,\n",
2485
+ "a:link.sk-estimator-doc-link.fitted,\n",
2486
+ "a:visited.sk-estimator-doc-link.fitted {\n",
2487
+ " /* fitted */\n",
2488
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
2489
+ " color: var(--sklearn-color-fitted-level-1);\n",
2490
+ "}\n",
2491
+ "\n",
2492
+ "/* On hover */\n",
2493
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
2494
+ ".sk-estimator-doc-link:hover,\n",
2495
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
2496
+ ".sk-estimator-doc-link:hover {\n",
2497
+ " /* unfitted */\n",
2498
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
2499
+ " color: var(--sklearn-color-background);\n",
2500
+ " text-decoration: none;\n",
2501
+ "}\n",
2502
+ "\n",
2503
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
2504
+ ".sk-estimator-doc-link.fitted:hover,\n",
2505
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
2506
+ ".sk-estimator-doc-link.fitted:hover {\n",
2507
+ " /* fitted */\n",
2508
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
2509
+ " color: var(--sklearn-color-background);\n",
2510
+ " text-decoration: none;\n",
2511
+ "}\n",
2512
+ "\n",
2513
+ "/* Span, style for the box shown on hovering the info icon */\n",
2514
+ ".sk-estimator-doc-link span {\n",
2515
+ " display: none;\n",
2516
+ " z-index: 9999;\n",
2517
+ " position: relative;\n",
2518
+ " font-weight: normal;\n",
2519
+ " right: .2ex;\n",
2520
+ " padding: .5ex;\n",
2521
+ " margin: .5ex;\n",
2522
+ " width: min-content;\n",
2523
+ " min-width: 20ex;\n",
2524
+ " max-width: 50ex;\n",
2525
+ " color: var(--sklearn-color-text);\n",
2526
+ " box-shadow: 2pt 2pt 4pt #999;\n",
2527
+ " /* unfitted */\n",
2528
+ " background: var(--sklearn-color-unfitted-level-0);\n",
2529
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
2530
+ "}\n",
2531
+ "\n",
2532
+ ".sk-estimator-doc-link.fitted span {\n",
2533
+ " /* fitted */\n",
2534
+ " background: var(--sklearn-color-fitted-level-0);\n",
2535
+ " border: var(--sklearn-color-fitted-level-3);\n",
2536
+ "}\n",
2537
+ "\n",
2538
+ ".sk-estimator-doc-link:hover span {\n",
2539
+ " display: block;\n",
2540
+ "}\n",
2541
+ "\n",
2542
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
2543
+ "\n",
2544
+ "#sk-container-id-2 a.estimator_doc_link {\n",
2545
+ " float: right;\n",
2546
+ " font-size: 1rem;\n",
2547
+ " line-height: 1em;\n",
2548
+ " font-family: monospace;\n",
2549
+ " background-color: var(--sklearn-color-background);\n",
2550
+ " border-radius: 1rem;\n",
2551
+ " height: 1rem;\n",
2552
+ " width: 1rem;\n",
2553
+ " text-decoration: none;\n",
2554
+ " /* unfitted */\n",
2555
+ " color: var(--sklearn-color-unfitted-level-1);\n",
2556
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2557
+ "}\n",
2558
+ "\n",
2559
+ "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
2560
+ " /* fitted */\n",
2561
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
2562
+ " color: var(--sklearn-color-fitted-level-1);\n",
2563
+ "}\n",
2564
+ "\n",
2565
+ "/* On hover */\n",
2566
+ "#sk-container-id-2 a.estimator_doc_link:hover {\n",
2567
+ " /* unfitted */\n",
2568
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
2569
+ " color: var(--sklearn-color-background);\n",
2570
+ " text-decoration: none;\n",
2571
+ "}\n",
2572
+ "\n",
2573
+ "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
2574
+ " /* fitted */\n",
2575
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
2576
+ "}\n",
2577
+ "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>NearestNeighbors(algorithm=&#x27;brute&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;NearestNeighbors<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.neighbors.NearestNeighbors.html\">?<span>Documentation for NearestNeighbors</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>NearestNeighbors(algorithm=&#x27;brute&#x27;)</pre></div> </div></div></div></div>"
2578
+ ],
2579
+ "text/plain": [
2580
+ "NearestNeighbors(algorithm='brute')"
2581
+ ]
2582
+ },
2583
+ "execution_count": 125,
2584
+ "metadata": {},
2585
+ "output_type": "execute_result"
2586
+ }
2587
+ ],
2588
+ "source": [
2589
+ "model.fit(books_sparse_table)"
2590
+ ]
2591
+ },
2592
+ {
2593
+ "cell_type": "markdown",
2594
+ "metadata": {},
2595
+ "source": [
2596
+ "## 6. suggestions"
2597
+ ]
2598
+ },
2599
+ {
2600
+ "cell_type": "code",
2601
+ "execution_count": 130,
2602
+ "metadata": {},
2603
+ "outputs": [],
2604
+ "source": [
2605
+ "distance,suggestion=model.kneighbors(book_pivot_tale.iloc[200,:].values.reshape(1,-1),n_neighbors=6)"
2606
+ ]
2607
+ },
2608
+ {
2609
+ "cell_type": "code",
2610
+ "execution_count": 133,
2611
+ "metadata": {},
2612
+ "outputs": [
2613
+ {
2614
+ "data": {
2615
+ "text/plain": [
2616
+ "array([[ 0. , 22.24859546, 22.60530911, 23.43074903, 23.68543856,\n",
2617
+ " 24.14539294]])"
2618
+ ]
2619
+ },
2620
+ "execution_count": 133,
2621
+ "metadata": {},
2622
+ "output_type": "execute_result"
2623
+ }
2624
+ ],
2625
+ "source": [
2626
+ "# distance of simpilar books\n",
2627
+ "distance"
2628
+ ]
2629
+ },
2630
+ {
2631
+ "cell_type": "code",
2632
+ "execution_count": 135,
2633
+ "metadata": {},
2634
+ "outputs": [
2635
+ {
2636
+ "data": {
2637
+ "text/plain": [
2638
+ "array([[200, 372, 485, 320, 184, 536]], dtype=int64)"
2639
+ ]
2640
+ },
2641
+ "execution_count": 135,
2642
+ "metadata": {},
2643
+ "output_type": "execute_result"
2644
+ }
2645
+ ],
2646
+ "source": [
2647
+ "# book id suggestions \n",
2648
+ "suggestion"
2649
+ ]
2650
+ },
2651
+ {
2652
+ "cell_type": "code",
2653
+ "execution_count": 137,
2654
+ "metadata": {},
2655
+ "outputs": [
2656
+ {
2657
+ "name": "stdout",
2658
+ "output_type": "stream",
2659
+ "text": [
2660
+ "Index(['Fatal Cure', 'No Safe Place', 'Table For Two', 'Long After Midnight',\n",
2661
+ " 'Exclusive', 'The Cradle Will Fall'],\n",
2662
+ " dtype='object', name='Book-Title')\n"
2663
+ ]
2664
+ }
2665
+ ],
2666
+ "source": [
2667
+ "for i in suggestion:\n",
2668
+ " print(book_pivot_tale.index[i])"
2669
+ ]
2670
+ },
2671
+ {
2672
+ "cell_type": "code",
2673
+ "execution_count": 139,
2674
+ "metadata": {},
2675
+ "outputs": [
2676
+ {
2677
+ "data": {
2678
+ "text/plain": [
2679
+ "Index(['1984', '1st to Die: A Novel', '2nd Chance', '4 Blondes',\n",
2680
+ " '84 Charing Cross Road', 'A Bend in the Road', 'A Case of Need',\n",
2681
+ " 'A Child Called \\It\\\": One Child's Courage to Survive\"',\n",
2682
+ " 'A Civil Action', 'A Cry In The Night',\n",
2683
+ " ...\n",
2684
+ " 'Winter Solstice', 'Wish You Well', 'Without Remorse',\n",
2685
+ " 'Wizard and Glass (The Dark Tower, Book 4)', 'Wuthering Heights',\n",
2686
+ " 'Year of Wonders', 'You Belong To Me',\n",
2687
+ " 'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',\n",
2688
+ " 'Zoya', '\\O\\\" Is for Outlaw\"'],\n",
2689
+ " dtype='object', name='Book-Title', length=742)"
2690
+ ]
2691
+ },
2692
+ "execution_count": 139,
2693
+ "metadata": {},
2694
+ "output_type": "execute_result"
2695
+ }
2696
+ ],
2697
+ "source": [
2698
+ "book_names = book_pivot_tale.index\n",
2699
+ "book_names"
2700
+ ]
2701
+ },
2702
+ {
2703
+ "cell_type": "markdown",
2704
+ "metadata": {},
2705
+ "source": [
2706
+ "## 7. Save the requirements"
2707
+ ]
2708
+ },
2709
+ {
2710
+ "cell_type": "code",
2711
+ "execution_count": 142,
2712
+ "metadata": {},
2713
+ "outputs": [],
2714
+ "source": [
2715
+ "pickle.dump(model,open(\"model.pkl\",\"wb\"))\n",
2716
+ "pickle.dump(book_names,open(\"book_names.pkl\",\"wb\"))\n",
2717
+ "pickle.dump(final_rating,open(\"final_rating.pkl\",\"wb\"))\n",
2718
+ "pickle.dump(book_pivot_tale,open(\"book_pivot_tale.pkl\",\"wb\"))"
2719
+ ]
2720
+ },
2721
+ {
2722
+ "cell_type": "markdown",
2723
+ "metadata": {},
2724
+ "source": [
2725
+ "## 8. Creating recomandation"
2726
+ ]
2727
+ },
2728
+ {
2729
+ "cell_type": "code",
2730
+ "execution_count": 150,
2731
+ "metadata": {},
2732
+ "outputs": [
2733
+ {
2734
+ "name": "stdout",
2735
+ "output_type": "stream",
2736
+ "text": [
2737
+ "Index(['Zoya', 'Fine Things', 'Exclusive', 'Secrets', 'The Cradle Will Fall',\n",
2738
+ " 'No Safe Place'],\n",
2739
+ " dtype='object', name='Book-Title')\n"
2740
+ ]
2741
+ }
2742
+ ],
2743
+ "source": [
2744
+ "def recommended_book(book_name):\n",
2745
+ " book_id = np.where(book_pivot_tale.index==book_name)[0][0]\n",
2746
+ " distance,suggestion=model.kneighbors(book_pivot_tale.iloc[book_id,:].values.reshape(1,-1),n_neighbors=6)\n",
2747
+ " for i in suggestion:\n",
2748
+ " books=book_pivot_tale.index[i]\n",
2749
+ " print(books)\n",
2750
+ "\n",
2751
+ " \n",
2752
+ "recommended_book(\"Zoya\")"
2753
+ ]
2754
+ },
2755
+ {
2756
+ "cell_type": "code",
2757
+ "execution_count": null,
2758
+ "metadata": {},
2759
+ "outputs": [],
2760
+ "source": []
2761
+ }
2762
+ ],
2763
+ "metadata": {
2764
+ "kernelspec": {
2765
+ "display_name": "Python 3",
2766
+ "language": "python",
2767
+ "name": "python3"
2768
+ },
2769
+ "language_info": {
2770
+ "codemirror_mode": {
2771
+ "name": "ipython",
2772
+ "version": 3
2773
+ },
2774
+ "file_extension": ".py",
2775
+ "mimetype": "text/x-python",
2776
+ "name": "python",
2777
+ "nbconvert_exporter": "python",
2778
+ "pygments_lexer": "ipython3",
2779
+ "version": "3.11.9"
2780
+ }
2781
+ },
2782
+ "nbformat": 4,
2783
+ "nbformat_minor": 2
2784
+ }
book_names.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29fca171e591029b06fb72f2916ec19d88e0c7d1e749fb33c2cd5c1a61d55f5
3
+ size 20136
book_pivot_tale.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb3379fb8d2b5e230d5de222add2402e6e0c6fa3bf683a166fbeb76696bea95
3
+ size 5298858
final_rating.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f285ec5b1ee0669a89d7400dee085b6cd8b7100791b30e0af98d46abe5566e37
3
+ size 3385955
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:803fafb9fded8b774e0f284a0449e2579ad30b71f828e6e8259d67b534fabb6a
3
+ size 183218
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ joblib==1.4.2
2
+ nltk==3.9.1
3
+ numpy==2.2.1
4
+ pandas==2.2.3
5
+ scikit-learn==1.6.0
6
+ streamlit==1.41.1