{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIdmovieIdratingtimestamp
0114.0964982703
1134.0964981247
2164.0964982224
31475.0964983815
41505.0964982931
\n", "
" ], "text/plain": [ " userId movieId rating timestamp\n", "0 1 1 4.0 964982703\n", "1 1 3 4.0 964981247\n", "2 1 6 4.0 964982224\n", "3 1 47 5.0 964983815\n", "4 1 50 5.0 964982931" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings = pd.read_csv('../data/reduced/ratings_m10.csv')\n", "ratings.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIdmovieIdratingtimestamp
0114.0964982703
1134.0964981247
2164.0964982224
31475.0964983815
41505.0964982931
...............
811116101590933.01493847704
811126101641795.01493845631
811136101665284.01493879365
811146101682505.01494273047
811156101682525.01493846352
\n", "

81116 rows × 4 columns

\n", "
" ], "text/plain": [ " userId movieId rating timestamp\n", "0 1 1 4.0 964982703\n", "1 1 3 4.0 964981247\n", "2 1 6 4.0 964982224\n", "3 1 47 5.0 964983815\n", "4 1 50 5.0 964982931\n", "... ... ... ... ...\n", "81111 610 159093 3.0 1493847704\n", "81112 610 164179 5.0 1493845631\n", "81113 610 166528 4.0 1493879365\n", "81114 610 168250 5.0 1494273047\n", "81115 610 168252 5.0 1493846352\n", "\n", "[81116 rows x 4 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings.reindex()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tmdbIdimdbIdcastdirectorkeywordsoverviewtitlegenresyear
movieId
117529135397369610['Chris Pratt', 'Bryce Dallas Howard', 'Irrfan...Colin Trevorrow['monster', 'dna', 'tyrannosaurus rex', 'veloc...Twenty-two years after the events of Jurassic ...Jurassic World['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th...2015
122882763411392190['Tom Hardy', 'Charlize Theron', 'Hugh Keays-B...George Miller['future', 'chase', 'post-apocalyptic', 'dysto...An apocalyptic story set in the furthest reach...Mad Max: Fury Road['Action', 'Adventure', 'Sci-Fi', 'Thriller']2015
1228861406072488496['Harrison Ford', 'Mark Hamill', 'Carrie Fishe...J.J. Abrams['android', 'spaceship', 'jedi', 'space opera'...Thirty years after defeating the Galactic Empi...Star Wars: Episode VII - The Force Awakens['Action', 'Adventure', 'Fantasy', 'Sci-Fi', '...2015
1393852819571663202['Leonardo DiCaprio', 'Tom Hardy', 'Will Poult...Alejandro González Iñárritu['father-son relationship', 'rape', 'based on ...In the 1820s, a frontiersman, Hugh Glass, sets...The Revenant['Adventure', 'Drama']2015
1341302862173659388['Matt Damon', 'Jessica Chastain', 'Kristen Wi...Ridley Scott['based on novel', 'mars', 'nasa', 'isolation'...During a manned mission to Mars, Astronaut Mar...The Martian['Adventure', 'Drama', 'Sci-Fi']2015
\n", "
" ], "text/plain": [ " tmdbId imdbId cast \\\n", "movieId \n", "117529 135397 369610 ['Chris Pratt', 'Bryce Dallas Howard', 'Irrfan... \n", "122882 76341 1392190 ['Tom Hardy', 'Charlize Theron', 'Hugh Keays-B... \n", "122886 140607 2488496 ['Harrison Ford', 'Mark Hamill', 'Carrie Fishe... \n", "139385 281957 1663202 ['Leonardo DiCaprio', 'Tom Hardy', 'Will Poult... \n", "134130 286217 3659388 ['Matt Damon', 'Jessica Chastain', 'Kristen Wi... \n", "\n", " director \\\n", "movieId \n", "117529 Colin Trevorrow \n", "122882 George Miller \n", "122886 J.J. Abrams \n", "139385 Alejandro González Iñárritu \n", "134130 Ridley Scott \n", "\n", " keywords \\\n", "movieId \n", "117529 ['monster', 'dna', 'tyrannosaurus rex', 'veloc... \n", "122882 ['future', 'chase', 'post-apocalyptic', 'dysto... \n", "122886 ['android', 'spaceship', 'jedi', 'space opera'... \n", "139385 ['father-son relationship', 'rape', 'based on ... \n", "134130 ['based on novel', 'mars', 'nasa', 'isolation'... \n", "\n", " overview \\\n", "movieId \n", "117529 Twenty-two years after the events of Jurassic ... \n", "122882 An apocalyptic story set in the furthest reach... \n", "122886 Thirty years after defeating the Galactic Empi... \n", "139385 In the 1820s, a frontiersman, Hugh Glass, sets... \n", "134130 During a manned mission to Mars, Astronaut Mar... \n", "\n", " title \\\n", "movieId \n", "117529 Jurassic World \n", "122882 Mad Max: Fury Road \n", "122886 Star Wars: Episode VII - The Force Awakens \n", "139385 The Revenant \n", "134130 The Martian \n", "\n", " genres year \n", "movieId \n", "117529 ['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th... 2015 \n", "122882 ['Action', 'Adventure', 'Sci-Fi', 'Thriller'] 2015 \n", "122886 ['Action', 'Adventure', 'Fantasy', 'Sci-Fi', '... 2015 \n", "139385 ['Adventure', 'Drama'] 2015 \n", "134130 ['Adventure', 'Drama', 'Sci-Fi'] 2015 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies = pd.read_csv('../data/reduced/movies_m10_rich_pre.csv', index_col='movieId')\n", "movies.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlegenreskeywords
movieId
117529Jurassic World['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th...['monster', 'dna', 'tyrannosaurus rex', 'veloc...
122882Mad Max: Fury Road['Action', 'Adventure', 'Sci-Fi', 'Thriller']['future', 'chase', 'post-apocalyptic', 'dysto...
122886Star Wars: Episode VII - The Force Awakens['Action', 'Adventure', 'Fantasy', 'Sci-Fi', '...['android', 'spaceship', 'jedi', 'space opera'...
139385The Revenant['Adventure', 'Drama']['father-son relationship', 'rape', 'based on ...
134130The Martian['Adventure', 'Drama', 'Sci-Fi']['based on novel', 'mars', 'nasa', 'isolation'...
\n", "
" ], "text/plain": [ " title \\\n", "movieId \n", "117529 Jurassic World \n", "122882 Mad Max: Fury Road \n", "122886 Star Wars: Episode VII - The Force Awakens \n", "139385 The Revenant \n", "134130 The Martian \n", "\n", " genres \\\n", "movieId \n", "117529 ['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th... \n", "122882 ['Action', 'Adventure', 'Sci-Fi', 'Thriller'] \n", "122886 ['Action', 'Adventure', 'Fantasy', 'Sci-Fi', '... \n", "139385 ['Adventure', 'Drama'] \n", "134130 ['Adventure', 'Drama', 'Sci-Fi'] \n", "\n", " keywords \n", "movieId \n", "117529 ['monster', 'dna', 'tyrannosaurus rex', 'veloc... \n", "122882 ['future', 'chase', 'post-apocalyptic', 'dysto... \n", "122886 ['android', 'spaceship', 'jedi', 'space opera'... \n", "139385 ['father-son relationship', 'rape', 'based on ... \n", "134130 ['based on novel', 'mars', 'nasa', 'isolation'... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_redu = movies[['title', 'genres', 'keywords']]\n", "movies_redu.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIdmovieIdratingtimestamptitlegenreskeywords
0114.0964982703Toy Story['Adventure', 'Animation', 'Children', 'Comedy...['jealousy', 'toy', 'boy', 'friendship', 'frie...
1514.0847434962Toy Story['Adventure', 'Animation', 'Children', 'Comedy...['jealousy', 'toy', 'boy', 'friendship', 'frie...
2714.51106635946Toy Story['Adventure', 'Animation', 'Children', 'Comedy...['jealousy', 'toy', 'boy', 'friendship', 'frie...
31512.51510577970Toy Story['Adventure', 'Animation', 'Children', 'Comedy...['jealousy', 'toy', 'boy', 'friendship', 'frie...
41714.51305696483Toy Story['Adventure', 'Animation', 'Children', 'Comedy...['jealousy', 'toy', 'boy', 'friendship', 'frie...
\n", "
" ], "text/plain": [ " userId movieId rating timestamp title \\\n", "0 1 1 4.0 964982703 Toy Story \n", "1 5 1 4.0 847434962 Toy Story \n", "2 7 1 4.5 1106635946 Toy Story \n", "3 15 1 2.5 1510577970 Toy Story \n", "4 17 1 4.5 1305696483 Toy Story \n", "\n", " genres \\\n", "0 ['Adventure', 'Animation', 'Children', 'Comedy... \n", "1 ['Adventure', 'Animation', 'Children', 'Comedy... \n", "2 ['Adventure', 'Animation', 'Children', 'Comedy... \n", "3 ['Adventure', 'Animation', 'Children', 'Comedy... \n", "4 ['Adventure', 'Animation', 'Children', 'Comedy... \n", "\n", " keywords \n", "0 ['jealousy', 'toy', 'boy', 'friendship', 'frie... \n", "1 ['jealousy', 'toy', 'boy', 'friendship', 'frie... \n", "2 ['jealousy', 'toy', 'boy', 'friendship', 'frie... \n", "3 ['jealousy', 'toy', 'boy', 'friendship', 'frie... \n", "4 ['jealousy', 'toy', 'boy', 'friendship', 'frie... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_redu_ratings = ratings.merge(movies_redu, on='movieId')\n", "movies_redu_ratings.head(5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIdmovieIdratingtimestamp
count75043.00000075043.00000075043.0000007.504300e+04
mean318.73315314432.3153133.5494581.195556e+09
std181.67597528044.9684711.0199712.170623e+08
min1.0000001.0000000.5000008.281246e+08
25%167.0000001028.0000003.0000009.978116e+08
50%314.0000002502.0000003.5000001.180013e+09
75%474.0000006003.0000004.0000001.430507e+09
max610.000000148626.0000005.0000001.537799e+09
\n", "
" ], "text/plain": [ " userId movieId rating timestamp\n", "count 75043.000000 75043.000000 75043.000000 7.504300e+04\n", "mean 318.733153 14432.315313 3.549458 1.195556e+09\n", "std 181.675975 28044.968471 1.019971 2.170623e+08\n", "min 1.000000 1.000000 0.500000 8.281246e+08\n", "25% 167.000000 1028.000000 3.000000 9.978116e+08\n", "50% 314.000000 2502.000000 3.500000 1.180013e+09\n", "75% 474.000000 6003.000000 4.000000 1.430507e+09\n", "max 610.000000 148626.000000 5.000000 1.537799e+09" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_redu_ratings.describe()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 75043 entries, 0 to 75042\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 userId 75043 non-null int64 \n", " 1 movieId 75043 non-null int64 \n", " 2 rating 75043 non-null float64\n", " 3 timestamp 75043 non-null int64 \n", " 4 title 75043 non-null object \n", " 5 genres 75043 non-null object \n", " 6 keywords 75043 non-null object \n", "dtypes: float64(1), int64(3), object(3)\n", "memory usage: 4.0+ MB\n" ] } ], "source": [ "movies_redu_ratings.info()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movieId1235679101112...134853135133135887136020138036139385139644140110142488148626
userId
14.00.04.00.04.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
20.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
40.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
54.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
..................................................................
6062.50.00.00.00.02.50.00.02.50.0...0.00.00.00.00.00.00.00.00.00.0
6074.00.00.00.00.00.00.00.03.00.0...0.00.00.00.00.00.00.00.00.00.0
6082.52.02.00.00.00.00.04.00.00.0...0.00.00.00.00.00.00.00.00.00.0
6093.00.00.00.00.00.00.04.00.00.0...0.00.00.00.00.00.00.00.00.00.0
6105.00.00.00.05.00.00.00.00.00.0...3.54.00.03.53.54.54.50.03.54.0
\n", "

610 rows × 2026 columns

\n", "
" ], "text/plain": [ "movieId 1 2 3 5 6 7 9 10 \\\n", "userId \n", "1 4.0 0.0 4.0 0.0 4.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "5 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... \n", "606 2.5 0.0 0.0 0.0 0.0 2.5 0.0 0.0 \n", "607 4.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "608 2.5 2.0 2.0 0.0 0.0 0.0 0.0 4.0 \n", "609 3.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 \n", "610 5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 \n", "\n", "movieId 11 12 ... 134853 135133 135887 136020 138036 139385 \\\n", "userId ... \n", "1 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "5 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... ... ... ... ... ... \n", "606 2.5 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "607 3.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "608 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "609 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 \n", "610 0.0 0.0 ... 3.5 4.0 0.0 3.5 3.5 4.5 \n", "\n", "movieId 139644 140110 142488 148626 \n", "userId \n", "1 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 \n", "5 0.0 0.0 0.0 0.0 \n", "... ... ... ... ... \n", "606 0.0 0.0 0.0 0.0 \n", "607 0.0 0.0 0.0 0.0 \n", "608 0.0 0.0 0.0 0.0 \n", "609 0.0 0.0 0.0 0.0 \n", "610 4.5 0.0 3.5 4.0 \n", "\n", "[610 rows x 2026 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_movie_mat = movies_redu_ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)\n", "user_movie_mat" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 2026 entries, 117529 to 6305\n", "Series name: title\n", "Non-Null Count Dtype \n", "-------------- ----- \n", "2026 non-null object\n", "dtypes: object(1)\n", "memory usage: 31.7+ KB\n" ] } ], "source": [ "movies['title'].info()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "610" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings['userId'].nunique()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=10)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=10)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import NearestNeighbors\n", "\n", "knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10, n_jobs=-1)\n", "\n", "# Fitting the model on our matrix\n", "knn.fit(user_movie_mat)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\felma\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\fuzzywuzzy\\fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n", " warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n" ] } ], "source": [ "from fuzzywuzzy import process" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "def movie_recommender_engine(movie_name, matrix, cf_model, n_recs):\n", " # Fit model on matrix\n", " knn.fit(matrix.T)\n", " \n", " # Extract input movie ID\n", " movie_id = process.extractOne(movie_name, movies_redu['title'])[2]\n", "\n", " knn_input = np.array(matrix[movie_id]).reshape(1, -1)\n", " #print(len(knn_input[0]))\n", " \n", " # Calculate neighbour distances\n", " distances, indices = knn.kneighbors(knn_input, n_neighbors=n_recs)\n", " movie_rec_ids = sorted(list(indices.squeeze().tolist()))\n", " movie_rec_ids = movie_rec_ids\n", " print(movie_rec_ids)\n", " # List to store recommendations\n", " cf_recs = []\n", " for i in movie_rec_ids:\n", " cf_recs.append({'Title':movies_redu.iloc[i]['title']})\n", " \n", " # Select top number of recommendations needed\n", " df = pd.DataFrame(cf_recs, index = range(1,n_recs+1))\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0, 536, 569, 759, 879, 971, 982, 1122, 1211, 1271]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Title
1Jurassic World
2Little Women
3Amazing Spider-Man, The
4Ender's Game
5Borat: Cultural Learnings of America for Make ...
6Secret Window
7Hotel Rwanda
8Revenge of the Nerds
9First Knight
10White Men Can't Jump
\n", "
" ], "text/plain": [ " Title\n", "1 Jurassic World \n", "2 Little Women \n", "3 Amazing Spider-Man, The \n", "4 Ender's Game \n", "5 Borat: Cultural Learnings of America for Make ...\n", "6 Secret Window \n", "7 Hotel Rwanda \n", "8 Revenge of the Nerds \n", "9 First Knight \n", "10 White Men Can't Jump " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movie_recommender_engine('toy story 2', user_movie_mat, knn, 10)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }