{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | userId | \n", "movieId | \n", "rating | \n", "timestamp | \n", "
---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "4.0 | \n", "964982703 | \n", "
1 | \n", "1 | \n", "3 | \n", "4.0 | \n", "964981247 | \n", "
2 | \n", "1 | \n", "6 | \n", "4.0 | \n", "964982224 | \n", "
3 | \n", "1 | \n", "47 | \n", "5.0 | \n", "964983815 | \n", "
4 | \n", "1 | \n", "50 | \n", "5.0 | \n", "964982931 | \n", "
\n", " | userId | \n", "movieId | \n", "rating | \n", "timestamp | \n", "
---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "4.0 | \n", "964982703 | \n", "
1 | \n", "1 | \n", "3 | \n", "4.0 | \n", "964981247 | \n", "
2 | \n", "1 | \n", "6 | \n", "4.0 | \n", "964982224 | \n", "
3 | \n", "1 | \n", "47 | \n", "5.0 | \n", "964983815 | \n", "
4 | \n", "1 | \n", "50 | \n", "5.0 | \n", "964982931 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
81111 | \n", "610 | \n", "159093 | \n", "3.0 | \n", "1493847704 | \n", "
81112 | \n", "610 | \n", "164179 | \n", "5.0 | \n", "1493845631 | \n", "
81113 | \n", "610 | \n", "166528 | \n", "4.0 | \n", "1493879365 | \n", "
81114 | \n", "610 | \n", "168250 | \n", "5.0 | \n", "1494273047 | \n", "
81115 | \n", "610 | \n", "168252 | \n", "5.0 | \n", "1493846352 | \n", "
81116 rows × 4 columns
\n", "\n", " | tmdbId | \n", "imdbId | \n", "cast | \n", "director | \n", "keywords | \n", "overview | \n", "title | \n", "genres | \n", "year | \n", "
---|---|---|---|---|---|---|---|---|---|
movieId | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
117529 | \n", "135397 | \n", "369610 | \n", "['Chris Pratt', 'Bryce Dallas Howard', 'Irrfan... | \n", "Colin Trevorrow | \n", "['monster', 'dna', 'tyrannosaurus rex', 'veloc... | \n", "Twenty-two years after the events of Jurassic ... | \n", "Jurassic World | \n", "['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th... | \n", "2015 | \n", "
122882 | \n", "76341 | \n", "1392190 | \n", "['Tom Hardy', 'Charlize Theron', 'Hugh Keays-B... | \n", "George Miller | \n", "['future', 'chase', 'post-apocalyptic', 'dysto... | \n", "An apocalyptic story set in the furthest reach... | \n", "Mad Max: Fury Road | \n", "['Action', 'Adventure', 'Sci-Fi', 'Thriller'] | \n", "2015 | \n", "
122886 | \n", "140607 | \n", "2488496 | \n", "['Harrison Ford', 'Mark Hamill', 'Carrie Fishe... | \n", "J.J. Abrams | \n", "['android', 'spaceship', 'jedi', 'space opera'... | \n", "Thirty years after defeating the Galactic Empi... | \n", "Star Wars: Episode VII - The Force Awakens | \n", "['Action', 'Adventure', 'Fantasy', 'Sci-Fi', '... | \n", "2015 | \n", "
139385 | \n", "281957 | \n", "1663202 | \n", "['Leonardo DiCaprio', 'Tom Hardy', 'Will Poult... | \n", "Alejandro González Iñárritu | \n", "['father-son relationship', 'rape', 'based on ... | \n", "In the 1820s, a frontiersman, Hugh Glass, sets... | \n", "The Revenant | \n", "['Adventure', 'Drama'] | \n", "2015 | \n", "
134130 | \n", "286217 | \n", "3659388 | \n", "['Matt Damon', 'Jessica Chastain', 'Kristen Wi... | \n", "Ridley Scott | \n", "['based on novel', 'mars', 'nasa', 'isolation'... | \n", "During a manned mission to Mars, Astronaut Mar... | \n", "The Martian | \n", "['Adventure', 'Drama', 'Sci-Fi'] | \n", "2015 | \n", "
\n", " | title | \n", "genres | \n", "keywords | \n", "
---|---|---|---|
movieId | \n", "\n", " | \n", " | \n", " |
117529 | \n", "Jurassic World | \n", "['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th... | \n", "['monster', 'dna', 'tyrannosaurus rex', 'veloc... | \n", "
122882 | \n", "Mad Max: Fury Road | \n", "['Action', 'Adventure', 'Sci-Fi', 'Thriller'] | \n", "['future', 'chase', 'post-apocalyptic', 'dysto... | \n", "
122886 | \n", "Star Wars: Episode VII - The Force Awakens | \n", "['Action', 'Adventure', 'Fantasy', 'Sci-Fi', '... | \n", "['android', 'spaceship', 'jedi', 'space opera'... | \n", "
139385 | \n", "The Revenant | \n", "['Adventure', 'Drama'] | \n", "['father-son relationship', 'rape', 'based on ... | \n", "
134130 | \n", "The Martian | \n", "['Adventure', 'Drama', 'Sci-Fi'] | \n", "['based on novel', 'mars', 'nasa', 'isolation'... | \n", "
\n", " | userId | \n", "movieId | \n", "rating | \n", "timestamp | \n", "title | \n", "genres | \n", "keywords | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "4.0 | \n", "964982703 | \n", "Toy Story | \n", "['Adventure', 'Animation', 'Children', 'Comedy... | \n", "['jealousy', 'toy', 'boy', 'friendship', 'frie... | \n", "
1 | \n", "5 | \n", "1 | \n", "4.0 | \n", "847434962 | \n", "Toy Story | \n", "['Adventure', 'Animation', 'Children', 'Comedy... | \n", "['jealousy', 'toy', 'boy', 'friendship', 'frie... | \n", "
2 | \n", "7 | \n", "1 | \n", "4.5 | \n", "1106635946 | \n", "Toy Story | \n", "['Adventure', 'Animation', 'Children', 'Comedy... | \n", "['jealousy', 'toy', 'boy', 'friendship', 'frie... | \n", "
3 | \n", "15 | \n", "1 | \n", "2.5 | \n", "1510577970 | \n", "Toy Story | \n", "['Adventure', 'Animation', 'Children', 'Comedy... | \n", "['jealousy', 'toy', 'boy', 'friendship', 'frie... | \n", "
4 | \n", "17 | \n", "1 | \n", "4.5 | \n", "1305696483 | \n", "Toy Story | \n", "['Adventure', 'Animation', 'Children', 'Comedy... | \n", "['jealousy', 'toy', 'boy', 'friendship', 'frie... | \n", "
\n", " | userId | \n", "movieId | \n", "rating | \n", "timestamp | \n", "
---|---|---|---|---|
count | \n", "75043.000000 | \n", "75043.000000 | \n", "75043.000000 | \n", "7.504300e+04 | \n", "
mean | \n", "318.733153 | \n", "14432.315313 | \n", "3.549458 | \n", "1.195556e+09 | \n", "
std | \n", "181.675975 | \n", "28044.968471 | \n", "1.019971 | \n", "2.170623e+08 | \n", "
min | \n", "1.000000 | \n", "1.000000 | \n", "0.500000 | \n", "8.281246e+08 | \n", "
25% | \n", "167.000000 | \n", "1028.000000 | \n", "3.000000 | \n", "9.978116e+08 | \n", "
50% | \n", "314.000000 | \n", "2502.000000 | \n", "3.500000 | \n", "1.180013e+09 | \n", "
75% | \n", "474.000000 | \n", "6003.000000 | \n", "4.000000 | \n", "1.430507e+09 | \n", "
max | \n", "610.000000 | \n", "148626.000000 | \n", "5.000000 | \n", "1.537799e+09 | \n", "
movieId | \n", "1 | \n", "2 | \n", "3 | \n", "5 | \n", "6 | \n", "7 | \n", "9 | \n", "10 | \n", "11 | \n", "12 | \n", "... | \n", "134853 | \n", "135133 | \n", "135887 | \n", "136020 | \n", "138036 | \n", "139385 | \n", "139644 | \n", "140110 | \n", "142488 | \n", "148626 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
userId | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1 | \n", "4.0 | \n", "0.0 | \n", "4.0 | \n", "0.0 | \n", "4.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
5 | \n", "4.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
606 | \n", "2.5 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "2.5 | \n", "0.0 | \n", "0.0 | \n", "2.5 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
607 | \n", "4.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "3.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
608 | \n", "2.5 | \n", "2.0 | \n", "2.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "4.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
609 | \n", "3.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "4.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
610 | \n", "5.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "5.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "3.5 | \n", "4.0 | \n", "0.0 | \n", "3.5 | \n", "3.5 | \n", "4.5 | \n", "4.5 | \n", "0.0 | \n", "3.5 | \n", "4.0 | \n", "
610 rows × 2026 columns
\n", "NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=10)
\n", " | Title | \n", "
---|---|
1 | \n", "Jurassic World | \n", "
2 | \n", "Little Women | \n", "
3 | \n", "Amazing Spider-Man, The | \n", "
4 | \n", "Ender's Game | \n", "
5 | \n", "Borat: Cultural Learnings of America for Make ... | \n", "
6 | \n", "Secret Window | \n", "
7 | \n", "Hotel Rwanda | \n", "
8 | \n", "Revenge of the Nerds | \n", "
9 | \n", "First Knight | \n", "
10 | \n", "White Men Can't Jump | \n", "