{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Movie Recommendor Sysytem" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import ast\n", "import nltk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "movies=pd.read_csv(\"C://Users//ARYA SHARMA//Downloads//archive//tmdb_5000_movies.csv\")\n", "credits=pd.read_csv(\"C://Users//ARYA SHARMA//Downloads//archive//tmdb_5000_credits.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
budgetgenreshomepageidkeywordsoriginal_languageoriginal_titleoverviewpopularityproduction_companiesproduction_countriesrelease_daterevenueruntimespoken_languagesstatustaglinetitlevote_averagevote_count
0237000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://www.avatarmovie.com/19995[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...enAvatarIn the 22nd century, a paraplegic Marine is di...150.437577[{\"name\": \"Ingenious Film Partners\", \"id\": 289...[{\"iso_3166_1\": \"US\", \"name\": \"United States o...2009-12-102787965087162.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...ReleasedEnter the World of Pandora.Avatar7.211800
1300000000[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...http://disney.go.com/disneypictures/pirates/285[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...enPirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...139.082615[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...[{\"iso_3166_1\": \"US\", \"name\": \"United States o...2007-05-19961000000169.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}]ReleasedAt the end of the world, the adventure begins.Pirates of the Caribbean: At World's End6.94500
2245000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://www.sonypictures.com/movies/spectre/206647[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...enSpectreA cryptic message from Bond’s past sends him o...107.376788[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...2015-10-26880674609148.0[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...ReleasedA Plan No One EscapesSpectre6.34466
3250000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...http://www.thedarkknightrises.com/49026[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...enThe Dark Knight RisesFollowing the death of District Attorney Harve...112.312950[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...[{\"iso_3166_1\": \"US\", \"name\": \"United States o...2012-07-161084939099165.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}]ReleasedThe Legend EndsThe Dark Knight Rises7.69106
4260000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://movies.disney.com/john-carter49529[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...enJohn CarterJohn Carter is a war-weary, former military ca...43.926995[{\"name\": \"Walt Disney Pictures\", \"id\": 2}][{\"iso_3166_1\": \"US\", \"name\": \"United States o...2012-03-07284139100132.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}]ReleasedLost in our world, found in another.John Carter6.12124
\n", "
" ], "text/plain": [ " budget genres \\\n", "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "1 300000000 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n", "2 245000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "3 250000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam... \n", "4 260000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "\n", " homepage id \\\n", "0 http://www.avatarmovie.com/ 19995 \n", "1 http://disney.go.com/disneypictures/pirates/ 285 \n", "2 http://www.sonypictures.com/movies/spectre/ 206647 \n", "3 http://www.thedarkknightrises.com/ 49026 \n", "4 http://movies.disney.com/john-carter 49529 \n", "\n", " keywords original_language \\\n", "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n", "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... en \n", "2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... en \n", "3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... en \n", "4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... en \n", "\n", " original_title \\\n", "0 Avatar \n", "1 Pirates of the Caribbean: At World's End \n", "2 Spectre \n", "3 The Dark Knight Rises \n", "4 John Carter \n", "\n", " overview popularity \\\n", "0 In the 22nd century, a paraplegic Marine is di... 150.437577 \n", "1 Captain Barbossa, long believed to be dead, ha... 139.082615 \n", "2 A cryptic message from Bond’s past sends him o... 107.376788 \n", "3 Following the death of District Attorney Harve... 112.312950 \n", "4 John Carter is a war-weary, former military ca... 43.926995 \n", "\n", " production_companies \\\n", "0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n", "1 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"... \n", "2 [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam... \n", "3 [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"... \n", "4 [{\"name\": \"Walt Disney Pictures\", \"id\": 2}] \n", "\n", " production_countries release_date revenue \\\n", "0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n", "1 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2007-05-19 961000000 \n", "2 [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"... 2015-10-26 880674609 \n", "3 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2012-07-16 1084939099 \n", "4 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2012-03-07 284139100 \n", "\n", " runtime spoken_languages status \\\n", "0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n", "1 169.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n", "2 148.0 [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},... Released \n", "3 165.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n", "4 132.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}] Released \n", "\n", " tagline \\\n", "0 Enter the World of Pandora. \n", "1 At the end of the world, the adventure begins. \n", "2 A Plan No One Escapes \n", "3 The Legend Ends \n", "4 Lost in our world, found in another. \n", "\n", " title vote_average vote_count \n", "0 Avatar 7.2 11800 \n", "1 Pirates of the Caribbean: At World's End 6.9 4500 \n", "2 Spectre 6.3 4466 \n", "3 The Dark Knight Rises 7.6 9106 \n", "4 John Carter 6.1 2124 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4803, 20)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4803, 4)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "credits.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "movies=movies.merge(credits,on='title')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4809, 23)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 4809 entries, 0 to 4808\n", "Data columns (total 23 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 budget 4809 non-null int64 \n", " 1 genres 4809 non-null object \n", " 2 homepage 1713 non-null object \n", " 3 id 4809 non-null int64 \n", " 4 keywords 4809 non-null object \n", " 5 original_language 4809 non-null object \n", " 6 original_title 4809 non-null object \n", " 7 overview 4806 non-null object \n", " 8 popularity 4809 non-null float64\n", " 9 production_companies 4809 non-null object \n", " 10 production_countries 4809 non-null object \n", " 11 release_date 4808 non-null object \n", " 12 revenue 4809 non-null int64 \n", " 13 runtime 4807 non-null float64\n", " 14 spoken_languages 4809 non-null object \n", " 15 status 4809 non-null object \n", " 16 tagline 3965 non-null object \n", " 17 title 4809 non-null object \n", " 18 vote_average 4809 non-null float64\n", " 19 vote_count 4809 non-null int64 \n", " 20 movie_id 4809 non-null int64 \n", " 21 cast 4809 non-null object \n", " 22 crew 4809 non-null object \n", "dtypes: float64(3), int64(5), object(15)\n", "memory usage: 901.7+ KB\n" ] } ], "source": [ "movies.info()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "movies=movies.drop(['budget','homepage','original_language','original_title','popularity','production_countries','release_date','tagline','vote_count','production_companies','id','revenue','runtime','spoken_languages','status','vote_average'],axis=1)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genreskeywordsoverviewtitlemovie_idcastcrew
0[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...In the 22nd century, a paraplegic Marine is di...Avatar19995[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...Captain Barbossa, long believed to be dead, ha...Pirates of the Caribbean: At World's End285[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...A cryptic message from Bond’s past sends him o...Spectre206647[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
3[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...Following the death of District Attorney Harve...The Dark Knight Rises49026[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
4[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...John Carter is a war-weary, former military ca...John Carter49529[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", "
" ], "text/plain": [ " genres \\\n", "0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "1 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n", "2 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "3 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam... \n", "4 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", "\n", " keywords \\\n", "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n", "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n", "2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n", "3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n", "4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n", "\n", " overview \\\n", "0 In the 22nd century, a paraplegic Marine is di... \n", "1 Captain Barbossa, long believed to be dead, ha... \n", "2 A cryptic message from Bond’s past sends him o... \n", "3 Following the death of District Attorney Harve... \n", "4 John Carter is a war-weary, former military ca... \n", "\n", " title movie_id \\\n", "0 Avatar 19995 \n", "1 Pirates of the Caribbean: At World's End 285 \n", "2 Spectre 206647 \n", "3 The Dark Knight Rises 49026 \n", "4 John Carter 49529 \n", "\n", " cast \\\n", "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n", "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n", "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n", "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n", "\n", " crew \n", "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "genres 0\n", "keywords 0\n", "overview 3\n", "title 0\n", "movie_id 0\n", "cast 0\n", "crew 0\n", "dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "movies.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "genres 0\n", "keywords 0\n", "overview 0\n", "title 0\n", "movie_id 0\n", "cast 0\n", "crew 0\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.duplicated().sum()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.iloc[0].genres" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def convert(obj):\n", " L=[]\n", " for i in ast.literal_eval(obj):\n", " L.append(i['name'])\n", " return L \n", " " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "movies['genres']=movies['genres'].apply(convert)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genreskeywordsoverviewtitlemovie_idcastcrew
0[Action, Adventure, Fantasy, Science Fiction][{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...In the 22nd century, a paraplegic Marine is di...Avatar19995[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1[Adventure, Fantasy, Action][{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...Captain Barbossa, long believed to be dead, ha...Pirates of the Caribbean: At World's End285[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2[Action, Adventure, Crime][{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...A cryptic message from Bond’s past sends him o...Spectre206647[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
3[Action, Crime, Drama, Thriller][{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...Following the death of District Attorney Harve...The Dark Knight Rises49026[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
4[Action, Adventure, Science Fiction][{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...John Carter is a war-weary, former military ca...John Carter49529[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", "
" ], "text/plain": [ " genres \\\n", "0 [Action, Adventure, Fantasy, Science Fiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, Science Fiction] \n", "\n", " keywords \\\n", "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n", "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n", "2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n", "3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n", "4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n", "\n", " overview \\\n", "0 In the 22nd century, a paraplegic Marine is di... \n", "1 Captain Barbossa, long believed to be dead, ha... \n", "2 A cryptic message from Bond’s past sends him o... \n", "3 Following the death of District Attorney Harve... \n", "4 John Carter is a war-weary, former military ca... \n", "\n", " title movie_id \\\n", "0 Avatar 19995 \n", "1 Pirates of the Caribbean: At World's End 285 \n", "2 Spectre 206647 \n", "3 The Dark Knight Rises 49026 \n", "4 John Carter 49529 \n", "\n", " cast \\\n", "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n", "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n", "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n", "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n", "\n", " crew \n", "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "movies['keywords']=movies['keywords'].apply(convert)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "def convert3(obj):\n", " L=[]\n", " counter=0\n", " for i in ast.literal_eval(obj):\n", " if counter!=3:\n", " L.append(i['name'])\n", " counter+=1\n", " else:\n", " break\n", " \n", " return L \n", " " ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "movies['cast']=movies['cast'].apply(convert3)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genreskeywordsoverviewtitlemovie_idcastcrew
0[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colon...In the 22nd century, a paraplegic Marine is di...Avatar19995[Sam Worthington, Zoe Saldana, Sigourney Weaver][{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1[Adventure, Fantasy, Action][ocean, drug abuse, exotic island, east india ...Captain Barbossa, long believed to be dead, ha...Pirates of the Caribbean: At World's End285[Johnny Depp, Orlando Bloom, Keira Knightley][{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2[Action, Adventure, Crime][spy, based on novel, secret agent, sequel, mi...A cryptic message from Bond’s past sends him o...Spectre206647[Daniel Craig, Christoph Waltz, Léa Seydoux][{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
3[Action, Crime, Drama, Thriller][dc comics, crime fighter, terrorist, secret i...Following the death of District Attorney Harve...The Dark Knight Rises49026[Christian Bale, Michael Caine, Gary Oldman][{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
4[Action, Adventure, Science Fiction][based on novel, mars, medallion, space travel...John Carter is a war-weary, former military ca...John Carter49529[Taylor Kitsch, Lynn Collins, Samantha Morton][{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", "
" ], "text/plain": [ " genres \\\n", "0 [Action, Adventure, Fantasy, Science Fiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, Science Fiction] \n", "\n", " keywords \\\n", "0 [culture clash, future, space war, space colon... \n", "1 [ocean, drug abuse, exotic island, east india ... \n", "2 [spy, based on novel, secret agent, sequel, mi... \n", "3 [dc comics, crime fighter, terrorist, secret i... \n", "4 [based on novel, mars, medallion, space travel... \n", "\n", " overview \\\n", "0 In the 22nd century, a paraplegic Marine is di... \n", "1 Captain Barbossa, long believed to be dead, ha... \n", "2 A cryptic message from Bond’s past sends him o... \n", "3 Following the death of District Attorney Harve... \n", "4 John Carter is a war-weary, former military ca... \n", "\n", " title movie_id \\\n", "0 Avatar 19995 \n", "1 Pirates of the Caribbean: At World's End 285 \n", "2 Spectre 206647 \n", "3 The Dark Knight Rises 49026 \n", "4 John Carter 49529 \n", "\n", " cast \\\n", "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] \n", "1 [Johnny Depp, Orlando Bloom, Keira Knightley] \n", "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] \n", "3 [Christian Bale, Michael Caine, Gary Oldman] \n", "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] \n", "\n", " crew \n", "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "def fetch_director(obj):\n", " L=[]\n", " for i in ast.literal_eval(obj):\n", " if i['job']=='Director':\n", " L.append(i['name'])\n", " break\n", " return L" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "movies['crew']=movies['crew'].apply(fetch_director)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genreskeywordsoverviewtitlemovie_idcastcrew
0[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colon...In the 22nd century, a paraplegic Marine is di...Avatar19995[Sam Worthington, Zoe Saldana, Sigourney Weaver][James Cameron]
1[Adventure, Fantasy, Action][ocean, drug abuse, exotic island, east india ...Captain Barbossa, long believed to be dead, ha...Pirates of the Caribbean: At World's End285[Johnny Depp, Orlando Bloom, Keira Knightley][Gore Verbinski]
2[Action, Adventure, Crime][spy, based on novel, secret agent, sequel, mi...A cryptic message from Bond’s past sends him o...Spectre206647[Daniel Craig, Christoph Waltz, Léa Seydoux][Sam Mendes]
3[Action, Crime, Drama, Thriller][dc comics, crime fighter, terrorist, secret i...Following the death of District Attorney Harve...The Dark Knight Rises49026[Christian Bale, Michael Caine, Gary Oldman][Christopher Nolan]
4[Action, Adventure, Science Fiction][based on novel, mars, medallion, space travel...John Carter is a war-weary, former military ca...John Carter49529[Taylor Kitsch, Lynn Collins, Samantha Morton][Andrew Stanton]
\n", "
" ], "text/plain": [ " genres \\\n", "0 [Action, Adventure, Fantasy, Science Fiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, Science Fiction] \n", "\n", " keywords \\\n", "0 [culture clash, future, space war, space colon... \n", "1 [ocean, drug abuse, exotic island, east india ... \n", "2 [spy, based on novel, secret agent, sequel, mi... \n", "3 [dc comics, crime fighter, terrorist, secret i... \n", "4 [based on novel, mars, medallion, space travel... \n", "\n", " overview \\\n", "0 In the 22nd century, a paraplegic Marine is di... \n", "1 Captain Barbossa, long believed to be dead, ha... \n", "2 A cryptic message from Bond’s past sends him o... \n", "3 Following the death of District Attorney Harve... \n", "4 John Carter is a war-weary, former military ca... \n", "\n", " title movie_id \\\n", "0 Avatar 19995 \n", "1 Pirates of the Caribbean: At World's End 285 \n", "2 Spectre 206647 \n", "3 The Dark Knight Rises 49026 \n", "4 John Carter 49529 \n", "\n", " cast crew \n", "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] [James Cameron] \n", "1 [Johnny Depp, Orlando Bloom, Keira Knightley] [Gore Verbinski] \n", "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] [Sam Mendes] \n", "3 [Christian Bale, Michael Caine, Gary Oldman] [Christopher Nolan] \n", "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] [Andrew Stanton] " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "movies['overview']=movies['overview'].apply(lambda x:x.split())" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "movies['genre']=movies['genres'].apply(lambda x:[i.replace(\" \",\"\") for i in x] )\n", "movies['keywords']=movies['keywords'].apply(lambda x:[i.replace(\" \",\"\") for i in x] )\n", "movies['cast']=movies['cast'].apply(lambda x:[i.replace(\" \",\"\") for i in x] )\n", "movies['crew']=movies['crew'].apply(lambda x:[i.replace(\" \",\"\") for i in x] )" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genreskeywordsoverviewtitlemovie_idcastcrewgenre
0[Action, Adventure, Fantasy, Science Fiction][cultureclash, future, spacewar, spacecolony, ...[In, the, 22nd, century,, a, paraplegic, Marin...Avatar19995[SamWorthington, ZoeSaldana, SigourneyWeaver][JamesCameron][Action, Adventure, Fantasy, ScienceFiction]
1[Adventure, Fantasy, Action][ocean, drugabuse, exoticisland, eastindiatrad...[Captain, Barbossa,, long, believed, to, be, d...Pirates of the Caribbean: At World's End285[JohnnyDepp, OrlandoBloom, KeiraKnightley][GoreVerbinski][Adventure, Fantasy, Action]
2[Action, Adventure, Crime][spy, basedonnovel, secretagent, sequel, mi6, ...[A, cryptic, message, from, Bond’s, past, send...Spectre206647[DanielCraig, ChristophWaltz, LéaSeydoux][SamMendes][Action, Adventure, Crime]
3[Action, Crime, Drama, Thriller][dccomics, crimefighter, terrorist, secretiden...[Following, the, death, of, District, Attorney...The Dark Knight Rises49026[ChristianBale, MichaelCaine, GaryOldman][ChristopherNolan][Action, Crime, Drama, Thriller]
4[Action, Adventure, Science Fiction][basedonnovel, mars, medallion, spacetravel, p...[John, Carter, is, a, war-weary,, former, mili...John Carter49529[TaylorKitsch, LynnCollins, SamanthaMorton][AndrewStanton][Action, Adventure, ScienceFiction]
\n", "
" ], "text/plain": [ " genres \\\n", "0 [Action, Adventure, Fantasy, Science Fiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, Science Fiction] \n", "\n", " keywords \\\n", "0 [cultureclash, future, spacewar, spacecolony, ... \n", "1 [ocean, drugabuse, exoticisland, eastindiatrad... \n", "2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n", "3 [dccomics, crimefighter, terrorist, secretiden... \n", "4 [basedonnovel, mars, medallion, spacetravel, p... \n", "\n", " overview \\\n", "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", "1 [Captain, Barbossa,, long, believed, to, be, d... \n", "2 [A, cryptic, message, from, Bond’s, past, send... \n", "3 [Following, the, death, of, District, Attorney... \n", "4 [John, Carter, is, a, war-weary,, former, mili... \n", "\n", " title movie_id \\\n", "0 Avatar 19995 \n", "1 Pirates of the Caribbean: At World's End 285 \n", "2 Spectre 206647 \n", "3 The Dark Knight Rises 49026 \n", "4 John Carter 49529 \n", "\n", " cast crew \\\n", "0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n", "1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n", "2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n", "3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n", "4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] \n", "\n", " genre \n", "0 [Action, Adventure, Fantasy, ScienceFiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, ScienceFiction] " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "movies['tags']=movies['overview']+movies['genres']+movies['keywords']+movies['cast']+movies['crew']" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genreskeywordsoverviewtitlemovie_idcastcrewgenretags
0[Action, Adventure, Fantasy, Science Fiction][cultureclash, future, spacewar, spacecolony, ...[In, the, 22nd, century,, a, paraplegic, Marin...Avatar19995[SamWorthington, ZoeSaldana, SigourneyWeaver][JamesCameron][Action, Adventure, Fantasy, ScienceFiction][In, the, 22nd, century,, a, paraplegic, Marin...
1[Adventure, Fantasy, Action][ocean, drugabuse, exoticisland, eastindiatrad...[Captain, Barbossa,, long, believed, to, be, d...Pirates of the Caribbean: At World's End285[JohnnyDepp, OrlandoBloom, KeiraKnightley][GoreVerbinski][Adventure, Fantasy, Action][Captain, Barbossa,, long, believed, to, be, d...
2[Action, Adventure, Crime][spy, basedonnovel, secretagent, sequel, mi6, ...[A, cryptic, message, from, Bond’s, past, send...Spectre206647[DanielCraig, ChristophWaltz, LéaSeydoux][SamMendes][Action, Adventure, Crime][A, cryptic, message, from, Bond’s, past, send...
3[Action, Crime, Drama, Thriller][dccomics, crimefighter, terrorist, secretiden...[Following, the, death, of, District, Attorney...The Dark Knight Rises49026[ChristianBale, MichaelCaine, GaryOldman][ChristopherNolan][Action, Crime, Drama, Thriller][Following, the, death, of, District, Attorney...
4[Action, Adventure, Science Fiction][basedonnovel, mars, medallion, spacetravel, p...[John, Carter, is, a, war-weary,, former, mili...John Carter49529[TaylorKitsch, LynnCollins, SamanthaMorton][AndrewStanton][Action, Adventure, ScienceFiction][John, Carter, is, a, war-weary,, former, mili...
\n", "
" ], "text/plain": [ " genres \\\n", "0 [Action, Adventure, Fantasy, Science Fiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, Science Fiction] \n", "\n", " keywords \\\n", "0 [cultureclash, future, spacewar, spacecolony, ... \n", "1 [ocean, drugabuse, exoticisland, eastindiatrad... \n", "2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n", "3 [dccomics, crimefighter, terrorist, secretiden... \n", "4 [basedonnovel, mars, medallion, spacetravel, p... \n", "\n", " overview \\\n", "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", "1 [Captain, Barbossa,, long, believed, to, be, d... \n", "2 [A, cryptic, message, from, Bond’s, past, send... \n", "3 [Following, the, death, of, District, Attorney... \n", "4 [John, Carter, is, a, war-weary,, former, mili... \n", "\n", " title movie_id \\\n", "0 Avatar 19995 \n", "1 Pirates of the Caribbean: At World's End 285 \n", "2 Spectre 206647 \n", "3 The Dark Knight Rises 49026 \n", "4 John Carter 49529 \n", "\n", " cast crew \\\n", "0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n", "1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n", "2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n", "3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n", "4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] \n", "\n", " genre \\\n", "0 [Action, Adventure, Fantasy, ScienceFiction] \n", "1 [Adventure, Fantasy, Action] \n", "2 [Action, Adventure, Crime] \n", "3 [Action, Crime, Drama, Thriller] \n", "4 [Action, Adventure, ScienceFiction] \n", "\n", " tags \n", "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", "1 [Captain, Barbossa,, long, believed, to, be, d... \n", "2 [A, cryptic, message, from, Bond’s, past, send... \n", "3 [Following, the, death, of, District, Attorney... \n", "4 [John, Carter, is, a, war-weary,, former, mili... " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.head()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "new_df=movies[['movie_id','title','tags']]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitletags
019995Avatar[In, the, 22nd, century,, a, paraplegic, Marin...
1285Pirates of the Caribbean: At World's End[Captain, Barbossa,, long, believed, to, be, d...
2206647Spectre[A, cryptic, message, from, Bond’s, past, send...
349026The Dark Knight Rises[Following, the, death, of, District, Attorney...
449529John Carter[John, Carter, is, a, war-weary,, former, mili...
............
48049367El Mariachi[El, Mariachi, just, wants, to, play, his, gui...
480572766Newlyweds[A, newlywed, couple's, honeymoon, is, upended...
4806231617Signed, Sealed, Delivered[\"Signed,, Sealed,, Delivered\", introduces, a,...
4807126186Shanghai Calling[When, ambitious, New, York, attorney, Sam, is...
480825975My Date with Drew[Ever, since, the, second, grade, when, he, fi...
\n", "

4806 rows × 3 columns

\n", "
" ], "text/plain": [ " movie_id title \\\n", "0 19995 Avatar \n", "1 285 Pirates of the Caribbean: At World's End \n", "2 206647 Spectre \n", "3 49026 The Dark Knight Rises \n", "4 49529 John Carter \n", "... ... ... \n", "4804 9367 El Mariachi \n", "4805 72766 Newlyweds \n", "4806 231617 Signed, Sealed, Delivered \n", "4807 126186 Shanghai Calling \n", "4808 25975 My Date with Drew \n", "\n", " tags \n", "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", "1 [Captain, Barbossa,, long, believed, to, be, d... \n", "2 [A, cryptic, message, from, Bond’s, past, send... \n", "3 [Following, the, death, of, District, Attorney... \n", "4 [John, Carter, is, a, war-weary,, former, mili... \n", "... ... \n", "4804 [El, Mariachi, just, wants, to, play, his, gui... \n", "4805 [A, newlywed, couple's, honeymoon, is, upended... \n", "4806 [\"Signed,, Sealed,, Delivered\", introduces, a,... \n", "4807 [When, ambitious, New, York, attorney, Sam, is... \n", "4808 [Ever, since, the, second, grade, when, he, fi... \n", "\n", "[4806 rows x 3 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ARYA SHARMA\\AppData\\Local\\Temp\\ipykernel_19872\\487797088.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " new_df['tags']=new_df['tags'].apply(lambda x:\" \".join(x))\n" ] } ], "source": [ "new_df['tags']=new_df['tags'].apply(lambda x:\" \".join(x))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitletags
019995AvatarIn the 22nd century, a paraplegic Marine is di...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...
2206647SpectreA cryptic message from Bond’s past sends him o...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...
449529John CarterJohn Carter is a war-weary, former military ca...
\n", "
" ], "text/plain": [ " movie_id title \\\n", "0 19995 Avatar \n", "1 285 Pirates of the Caribbean: At World's End \n", "2 206647 Spectre \n", "3 49026 The Dark Knight Rises \n", "4 49529 John Carter \n", "\n", " tags \n", "0 In the 22nd century, a paraplegic Marine is di... \n", "1 Captain Barbossa, long believed to be dead, ha... \n", "2 A cryptic message from Bond’s past sends him o... \n", "3 Following the death of District Attorney Harve... \n", "4 John Carter is a war-weary, former military ca... " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.head()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. Action Adventure Fantasy Science Fiction cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d SamWorthington ZoeSaldana SigourneyWeaver JamesCameron'" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df['tags'][0]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ARYA SHARMA\\AppData\\Local\\Temp\\ipykernel_19872\\3923394162.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " new_df['tags']=new_df['tags'].apply(lambda x : x.lower())\n" ] } ], "source": [ "new_df['tags']=new_df['tags'].apply(lambda x : x.lower())" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitletags
019995Avatarin the 22nd century, a paraplegic marine is di...
1285Pirates of the Caribbean: At World's Endcaptain barbossa, long believed to be dead, ha...
2206647Spectrea cryptic message from bond’s past sends him o...
349026The Dark Knight Risesfollowing the death of district attorney harve...
449529John Carterjohn carter is a war-weary, former military ca...
\n", "
" ], "text/plain": [ " movie_id title \\\n", "0 19995 Avatar \n", "1 285 Pirates of the Caribbean: At World's End \n", "2 206647 Spectre \n", "3 49026 The Dark Knight Rises \n", "4 49529 John Carter \n", "\n", " tags \n", "0 in the 22nd century, a paraplegic marine is di... \n", "1 captain barbossa, long believed to be dead, ha... \n", "2 a cryptic message from bond’s past sends him o... \n", "3 following the death of district attorney harve... \n", "4 john carter is a war-weary, former military ca... " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.head()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "from nltk.stem.porter import PorterStemmer\n", "ps=PorterStemmer()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "def stem(text):\n", " y=[]\n", " for i in text.split():\n", " y.append(ps.stem(i))\n", " return \" \".join(y) " ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ARYA SHARMA\\AppData\\Local\\Temp\\ipykernel_19872\\3514595201.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " new_df['tags']=new_df['tags'].apply(stem)\n" ] } ], "source": [ "new_df['tags']=new_df['tags'].apply(stem)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import CountVectorizer\n", "cv=CountVectorizer(max_features=7000,stop_words='english')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "vectors= cv.fit_transform(new_df['tags']).toarray()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ARYA SHARMA\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\utils\\deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n", " warnings.warn(msg, category=FutureWarning)\n" ] }, { "data": { "text/plain": [ "['000',\n", " '007',\n", " '10',\n", " '100',\n", " '10th',\n", " '11',\n", " '12',\n", " '12th',\n", " '13',\n", " '14',\n", " '15',\n", " '150',\n", " '16',\n", " '16th',\n", " '17',\n", " '17th',\n", " '18',\n", " '1890',\n", " '18th',\n", " '18thcenturi',\n", " '19',\n", " '1910',\n", " '1920',\n", " '1930',\n", " '1930s',\n", " '1937',\n", " '1940',\n", " '1941',\n", " '1944',\n", " '1945',\n", " '1950',\n", " '1950s',\n", " '1955',\n", " '1959',\n", " '1960',\n", " '1960s',\n", " '1962',\n", " '1964',\n", " '1965',\n", " '1967',\n", " '1970',\n", " '1970s',\n", " '1971',\n", " '1972',\n", " '1973',\n", " '1974',\n", " '1976',\n", " '1977',\n", " '1979',\n", " '1980',\n", " '1980s',\n", " '1984',\n", " '1985',\n", " '1990',\n", " '1994',\n", " '1995',\n", " '1997',\n", " '1999',\n", " '19th',\n", " '19thcenturi',\n", " '20',\n", " '200',\n", " '2000',\n", " '2001',\n", " '2002',\n", " '2003',\n", " '2008',\n", " '2009',\n", " '20th',\n", " '21st',\n", " '21stcenturi',\n", " '23',\n", " '24',\n", " '25',\n", " '27',\n", " '30',\n", " '300',\n", " '35',\n", " '3d',\n", " '40',\n", " '47',\n", " '50',\n", " '500',\n", " '51',\n", " '60',\n", " '60s',\n", " '70',\n", " '7th',\n", " '80',\n", " 'aaron',\n", " 'aaroneckhart',\n", " 'aarontaylor',\n", " 'abandon',\n", " 'abbi',\n", " 'abduct',\n", " 'abigailbreslin',\n", " 'abil',\n", " 'abl',\n", " 'aboard',\n", " 'aborigin',\n", " 'abov',\n", " 'abroad',\n", " 'absolut',\n", " 'absurd',\n", " 'abus',\n", " 'abuse',\n", " 'academ',\n", " 'academi',\n", " 'academy',\n", " 'accept',\n", " 'access',\n", " 'accid',\n", " 'accident',\n", " 'acclaim',\n", " 'accompani',\n", " 'accomplish',\n", " 'account',\n", " 'accus',\n", " 'ace',\n", " 'achiev',\n", " 'acquaint',\n", " 'acquir',\n", " 'act',\n", " 'action',\n", " 'actionhero',\n", " 'activ',\n", " 'activist',\n", " 'activities',\n", " 'actor',\n", " 'actress',\n", " 'actual',\n", " 'ad',\n", " 'ada',\n", " 'adam',\n", " 'adammckay',\n", " 'adamsandl',\n", " 'adamshankman',\n", " 'adapt',\n", " 'add',\n", " 'addict',\n", " 'addiction',\n", " 'addit',\n", " 'addl',\n", " 'adjust',\n", " 'admir',\n", " 'admit',\n", " 'adolesc',\n", " 'adopt',\n", " 'ador',\n", " 'adrienbrodi',\n", " 'adrift',\n", " 'adult',\n", " 'adultanim',\n", " 'adulteri',\n", " 'adulthood',\n", " 'adults',\n", " 'advanc',\n", " 'advantag',\n", " 'advantage',\n", " 'adventur',\n", " 'adventure',\n", " 'adventures',\n", " 'advertis',\n", " 'advic',\n", " 'advice',\n", " 'advis',\n", " 'affair',\n", " 'affect',\n", " 'affection',\n", " 'afflict',\n", " 'affluent',\n", " 'afford',\n", " 'afghanistan',\n", " 'afraid',\n", " 'africa',\n", " 'african',\n", " 'africanamerican',\n", " 'aftercreditssting',\n", " 'afterlif',\n", " 'aftermath',\n", " 'afternoon',\n", " 'ag',\n", " 'age',\n", " 'agediffer',\n", " 'agenc',\n", " 'agency',\n", " 'agenda',\n", " 'agent',\n", " 'agents',\n", " 'aggress',\n", " 'aggressionbyanim',\n", " 'ago',\n", " 'agre',\n", " 'ahead',\n", " 'aid',\n", " 'aidanquinn',\n", " 'ail',\n", " 'aim',\n", " 'air',\n", " 'airborn',\n", " 'aircraft',\n", " 'airplan',\n", " 'airplanecrash',\n", " 'airport',\n", " 'aka',\n", " 'al',\n", " 'alabama',\n", " 'alan',\n", " 'alanarkin',\n", " 'alaska',\n", " 'alaskan',\n", " 'albert',\n", " 'albertbrook',\n", " 'albertfinney',\n", " 'alcatraz',\n", " 'alcohol',\n", " 'alcoholabus',\n", " 'alecbaldwin',\n", " 'alejandrogonzáleziñárritu',\n", " 'alex',\n", " 'alexanderpayn',\n", " 'alexanderskarsgård',\n", " 'alexapenavega',\n", " 'alexia',\n", " 'alexisbledel',\n", " 'alexkendrick',\n", " 'alexpettyf',\n", " 'alexproya',\n", " 'alfi',\n", " 'alfredhitchcock',\n", " 'alfredmolina',\n", " 'algeria',\n", " 'ali',\n", " 'alic',\n", " 'alice',\n", " 'alicebraga',\n", " 'alien',\n", " 'alieninvas',\n", " 'alienlife',\n", " 'alienplanet',\n", " 'aliens',\n", " 'alik',\n", " 'alike',\n", " 'alilart',\n", " 'aliv',\n", " 'alive',\n", " 'alleg',\n", " 'allegedli',\n", " 'allen',\n", " 'alli',\n", " 'allianc',\n", " 'allies',\n", " 'allow',\n", " 'alon',\n", " 'alongsid',\n", " 'alpacino',\n", " 'alpha',\n", " 'alreadi',\n", " 'alt',\n", " 'alter',\n", " 'altern',\n", " 'alternatehistori',\n", " 'alternater',\n", " 'alway',\n", " 'alyssa',\n", " 'alzheimer',\n", " 'amanda',\n", " 'amandabyn',\n", " 'amandapeet',\n", " 'amandaseyfri',\n", " 'amateur',\n", " 'amaz',\n", " 'amazon',\n", " 'ambassador',\n", " 'amberheard',\n", " 'ambit',\n", " 'ambiti',\n", " 'ambul',\n", " 'ambush',\n", " 'america',\n", " 'american',\n", " 'americanabroad',\n", " 'americancivilwar',\n", " 'americandream',\n", " 'americanfootbal',\n", " 'americanfootballplay',\n", " 'ami',\n", " 'amid',\n", " 'amidst',\n", " 'amnesia',\n", " 'amp',\n", " 'amsterdam',\n", " 'amus',\n", " 'amusementpark',\n", " 'amy',\n", " 'amyadam',\n", " 'amysmart',\n", " 'ana',\n", " 'anakin',\n", " 'analyst',\n", " 'anarchiccomedi',\n", " 'ancient',\n", " 'ancientgreec',\n", " 'ancientrom',\n", " 'ancientworld',\n", " 'anderson',\n", " 'andi',\n", " 'andiemacdowel',\n", " 'andrew',\n", " 'andrewadamson',\n", " 'andrewgarfield',\n", " 'andrewniccol',\n", " 'android',\n", " 'andrzejbartkowiak',\n", " 'andy',\n", " 'andyfickman',\n", " 'andygarcía',\n", " 'andyserki',\n", " 'andytenn',\n", " 'angel',\n", " 'angela',\n", " 'angelabassett',\n", " 'angeles',\n", " 'angelinajoli',\n", " 'anger',\n", " 'angle',\n", " 'angri',\n", " 'ani',\n", " 'anim',\n", " 'animal',\n", " 'animalattack',\n", " 'animalhorror',\n", " 'animals',\n", " 'anjelicahuston',\n", " 'ann',\n", " 'anna',\n", " 'annabel',\n", " 'annafari',\n", " 'annafriel',\n", " 'annakendrick',\n", " 'annasophiarobb',\n", " 'anne',\n", " 'annefletch',\n", " 'annehathaway',\n", " 'annehech',\n", " 'annemoss',\n", " 'annetteben',\n", " 'anni',\n", " 'annie',\n", " 'anniversari',\n", " 'announc',\n", " 'annual',\n", " 'anonym',\n", " 'anoth',\n", " 'answer',\n", " 'answers',\n", " 'ant',\n", " 'antarct',\n", " 'antarctica',\n", " 'antholog',\n", " 'anthoni',\n", " 'anthonyanderson',\n", " 'anthonyhopkin',\n", " 'anthonymacki',\n", " 'anthropomorph',\n", " 'anti',\n", " 'antic',\n", " 'antihero',\n", " 'antiqu',\n", " 'antisemit',\n", " 'antiterror',\n", " 'antoinefuqua',\n", " 'anton',\n", " 'antonio',\n", " 'antoniobandera',\n", " 'antonyelchin',\n", " 'anyon',\n", " 'anyth',\n", " 'apart',\n", " 'apartheid',\n", " 'apartment',\n", " 'ape',\n", " 'apes',\n", " 'apocalyps',\n", " 'apocalypse',\n", " 'apocalypt',\n", " 'apollo',\n", " 'appar',\n", " 'appear',\n", " 'appl',\n", " 'apple',\n", " 'appli',\n", " 'appoint',\n", " 'appreci',\n", " 'apprehend',\n", " 'apprentic',\n", " 'approach',\n", " 'approaches',\n", " 'april',\n", " 'aquarium',\n", " 'arab',\n", " 'arbitrarylaw',\n", " 'arch',\n", " 'archaeologist',\n", " 'archeolog',\n", " 'archer',\n", " 'archeri',\n", " 'architect',\n", " 'arctic',\n", " 'area',\n", " 'aren',\n", " 'arena',\n", " 'argument',\n", " 'aris',\n", " 'aristocrat',\n", " 'arizona',\n", " 'arm',\n", " 'armi',\n", " 'armiehamm',\n", " 'armor',\n", " 'armsdeal',\n", " 'army',\n", " 'arnold',\n", " 'arnoldschwarzenegg',\n", " 'arrang',\n", " 'arrangedmarriag',\n", " 'array',\n", " 'arrest',\n", " 'arrested',\n", " 'arriv',\n", " 'arrive',\n", " 'arrives',\n", " 'arrog',\n", " 'arson',\n", " 'art',\n", " 'artgalleri',\n", " 'arthur',\n", " 'articl',\n", " 'artifact',\n", " 'artifici',\n", " 'artificialintellig',\n", " 'artist',\n", " 'artists',\n", " 'ash',\n", " 'ashley',\n", " 'ashleyjudd',\n", " 'ashtonkutch',\n", " 'asia',\n", " 'asian',\n", " 'asid',\n", " 'ask',\n", " 'aspect',\n", " 'aspir',\n", " 'assassin',\n", " 'assassins',\n", " 'assault',\n", " 'assembl',\n", " 'assign',\n", " 'assignment',\n", " 'assist',\n", " 'assistant',\n", " 'associ',\n", " 'assort',\n", " 'assum',\n", " 'asteroid',\n", " 'astronaut',\n", " 'asylum',\n", " 'atheist',\n", " 'athlet',\n", " 'atlant',\n", " 'atlanta',\n", " 'atom',\n", " 'atomegoyan',\n", " 'atomicbomb',\n", " 'attach',\n", " 'attack',\n", " 'attacks',\n", " 'attempt',\n", " 'attemptedmurd',\n", " 'attempttoescap',\n", " 'attend',\n", " 'attent',\n", " 'attic',\n", " 'attitud',\n", " 'attorney',\n", " 'attract',\n", " 'auction',\n", " 'audienc',\n", " 'audit',\n", " 'audreytaut',\n", " 'august',\n", " 'aunt',\n", " 'austin',\n", " 'australia',\n", " 'australian',\n", " 'austria',\n", " 'austrian',\n", " 'author',\n", " 'authorities',\n", " 'authority',\n", " 'autism',\n", " 'auto',\n", " 'autobiograph',\n", " 'automobilerac',\n", " 'avail',\n", " 'aveng',\n", " 'averag',\n", " 'aviat',\n", " 'avoid',\n", " 'await',\n", " 'awak',\n", " 'awaken',\n", " 'awar',\n", " 'award',\n", " 'away',\n", " 'awe',\n", " 'awesom',\n", " 'awkward',\n", " 'awri',\n", " 'awry',\n", " 'ax',\n", " 'axe',\n", " 'axel',\n", " 'babe',\n", " 'babi',\n", " 'baby',\n", " 'babysitt',\n", " 'bachelor',\n", " 'backdrop',\n", " 'background',\n", " 'backpack',\n", " 'backstag',\n", " 'backwat',\n", " 'backyard',\n", " 'bad',\n", " 'badli',\n", " 'bag',\n", " 'bahama',\n", " 'bail',\n", " 'bailey',\n", " 'baker',\n", " 'balanc',\n", " 'ball',\n", " 'ballet',\n", " 'balloon',\n", " 'baltimor',\n", " 'ban',\n", " 'band',\n", " 'bandit',\n", " 'bangkok',\n", " 'banish',\n", " 'bank',\n", " 'banker',\n", " 'bankrobb',\n", " 'bankrobberi',\n", " 'bar',\n", " 'barbara',\n", " 'barbarian',\n", " 'barbershop',\n", " 'barbrastreisand',\n", " 'bare',\n", " 'bargain',\n", " 'barn',\n", " 'barney',\n", " 'baron',\n", " 'barri',\n", " 'barrier',\n", " 'barry',\n", " 'barrylevinson',\n", " 'barrypepp',\n", " 'barrysonnenfeld',\n", " 'bars',\n", " 'bas',\n", " 'base',\n", " 'basebal',\n", " 'basedonchildren',\n", " 'basedoncomicbook',\n", " 'basedonfilm',\n", " 'basedongraphicnovel',\n", " 'basedonnovel',\n", " 'basedonplay',\n", " 'basedonstagemus',\n", " 'basedontrueev',\n", " 'basedontruestori',\n", " 'basedontvseri',\n", " 'basedonvideogam',\n", " 'basedonyoungadultnovel',\n", " 'basement',\n", " 'basketbal',\n", " 'basketball',\n", " 'bat',\n", " 'bathtub',\n", " 'batman',\n", " 'battalion',\n", " 'battl',\n", " 'battle',\n", " 'battlefield',\n", " 'bay',\n", " 'beach',\n", " 'beam',\n", " 'bear',\n", " 'beard',\n", " 'beast',\n", " 'beat',\n", " 'beaten',\n", " 'beau',\n", " 'beauti',\n", " 'beautiful',\n", " 'beautifulwoman',\n", " 'beauty',\n", " 'becam',\n", " 'becaus',\n", " 'becki',\n", " 'becom',\n", " 'becominganadult',\n", " 'bed',\n", " 'bedroom',\n", " 'bee',\n", " 'beer',\n", " 'befor',\n", " 'befriend',\n", " 'beg',\n", " 'began',\n", " 'begin',\n", " 'begins',\n", " 'beguil',\n", " 'behavior',\n", " 'behead',\n", " 'belgium',\n", " 'belief',\n", " 'beliefs',\n", " 'believ',\n", " 'bell',\n", " 'bella',\n", " 'belong',\n", " 'belov',\n", " 'ben',\n", " 'benaffleck',\n", " 'bend',\n", " 'beneath',\n", " 'benefit',\n", " 'benfost',\n", " 'beniciodeltoro',\n", " 'benjamin',\n", " 'benjaminbratt',\n", " 'benkingsley',\n", " 'bennett',\n", " 'benstil',\n", " 'bent',\n", " 'berlin',\n", " 'berniemac',\n", " 'best',\n", " 'bestfriend',\n", " 'bestfriendsinlov',\n", " 'bet',\n", " 'beth',\n", " 'betray',\n", " 'betrayal',\n", " 'bettemidl',\n", " 'better',\n", " 'betti',\n", " 'bettythoma',\n", " 'beverli',\n", " 'bibl',\n", " 'biblic',\n", " 'bicker',\n", " 'bicycl',\n", " 'bid',\n", " 'big',\n", " 'bigger',\n", " 'biggest',\n", " 'bike',\n", " 'biker',\n", " 'bikini',\n", " 'bilbo',\n", " 'billcondon',\n", " 'billhad',\n", " 'billi',\n", " 'billion',\n", " 'billionair',\n", " 'billmoseley',\n", " 'billmurray',\n", " 'billnighi',\n", " 'billpaxton',\n", " 'billpullman',\n", " 'billybobthornton',\n", " 'billyconnolli',\n", " 'billycrudup',\n", " 'billycryst',\n", " 'bind',\n", " 'biograph',\n", " 'biographi',\n", " 'biolog',\n", " 'biologist',\n", " 'biopic',\n", " 'bird',\n", " 'birth',\n", " 'birthday',\n", " 'bisexu',\n", " 'bishop',\n", " 'bit',\n", " 'bite',\n", " 'bitten',\n", " 'bitter',\n", " 'bittersweet',\n", " 'bizarr',\n", " 'black',\n", " 'blackandwhit',\n", " 'blackhol',\n", " 'blackmag',\n", " 'blackmail',\n", " 'blackpeopl',\n", " 'blacksmith',\n", " 'blade',\n", " 'blake',\n", " 'blame',\n", " 'blart',\n", " 'blast',\n", " 'blaxploit',\n", " 'blaze',\n", " 'blend',\n", " 'bless',\n", " 'blind',\n", " 'blinddat',\n", " 'blindnessandimpairedvis',\n", " 'bliss',\n", " 'blizzard',\n", " 'block',\n", " 'blofeld',\n", " 'blog',\n", " 'blond',\n", " 'blonde',\n", " 'blood',\n", " 'bloodi',\n", " 'bloodsplatt',\n", " 'bloodthirsti',\n", " 'bloom',\n", " 'blossom',\n", " 'blow',\n", " 'blu',\n", " 'blue',\n", " 'blunt',\n", " 'blur',\n", " 'board',\n", " 'boardingschool',\n", " 'boat',\n", " 'bob',\n", " 'bobbi',\n", " 'bobby',\n", " 'bobbyfarrelli',\n", " 'bobhoskin',\n", " 'bodi',\n", " 'body',\n", " 'bodyguard',\n", " 'bodyhorror',\n", " 'bohemian',\n", " 'bold',\n", " 'bollywood',\n", " 'bolt',\n", " 'bomb',\n", " 'bomber',\n", " 'bond',\n", " 'bone',\n", " 'bonniehunt',\n", " 'boo',\n", " 'book',\n", " 'books',\n", " 'boot',\n", " 'border',\n", " 'bore',\n", " 'boredom',\n", " 'boring',\n", " 'born',\n", " 'borrow',\n", " 'boss',\n", " 'boston',\n", " 'botch',\n", " 'bottl',\n", " 'bounc',\n", " 'bound',\n", " 'boundari',\n", " 'bounti',\n", " 'bountyhunt',\n", " 'bourn',\n", " 'bout',\n", " 'bowl',\n", " 'box',\n", " 'boxer',\n", " 'boxingmatch',\n", " 'boy',\n", " 'boyd',\n", " 'boyfriend',\n", " 'boys',\n", " 'brad',\n", " 'bradanderson',\n", " 'bradleycoop',\n", " 'bradpitt',\n", " 'brain',\n", " 'braintumor',\n", " 'brainwash',\n", " 'brand',\n", " 'brandon',\n", " 'brave',\n", " 'braveri',\n", " 'brazil',\n", " 'brazilian',\n", " 'break',\n", " 'breakdown',\n", " 'breakingthefourthwal',\n", " 'breakup',\n", " 'breast',\n", " 'breath',\n", " 'breed',\n", " 'brendan',\n", " 'brendanfras',\n", " 'brendangleeson',\n", " 'brent',\n", " 'brentspin',\n", " 'brettratn',\n", " 'brew',\n", " 'brian',\n", " 'briancox',\n", " 'briandepalma',\n", " 'brianhelgeland',\n", " 'brianlev',\n", " 'brianrobbin',\n", " 'brick',\n", " 'bride',\n", " 'bridesmaid',\n", " 'bridg',\n", " 'bridge',\n", " 'bridgetfonda',\n", " 'brief',\n", " 'brielarson',\n", " 'brien',\n", " 'bright',\n", " 'brilliant',\n", " 'bring',\n", " 'brink',\n", " 'britain',\n", " 'british',\n", " 'britisharmi',\n", " 'britishsecretservic',\n", " 'brittanymurphi',\n", " 'broadcast',\n", " 'broadway',\n", " 'broke',\n", " 'broken',\n", " 'broker',\n", " 'bronson',\n", " 'bronx',\n", " 'brood',\n", " 'brook',\n", " 'brooklyn',\n", " 'brooklynnewyorkc',\n", " 'broom',\n", " 'brothel',\n", " 'brother',\n", " 'brotherbrotherrelationship',\n", " 'brotherhood',\n", " 'brothers',\n", " 'brothersisterrelationship',\n", " 'brought',\n", " 'brown',\n", " 'bruce',\n", " 'brucecampbel',\n", " 'brucedern',\n", " 'brucegreenwood',\n", " 'brucewilli',\n", " 'brutal',\n", " 'bryancranston',\n", " 'bryansing',\n", " 'brycedallashoward',\n", " 'bu',\n", " 'buck',\n", " 'bud',\n", " 'budapest',\n", " 'buddi',\n", " 'buddy',\n", " 'buddycomedi',\n", " 'buddycop',\n", " 'budget',\n", " 'buffalo',\n", " 'bug',\n", " 'build',\n", " 'building',\n", " 'built',\n", " 'bull',\n", " 'bullet',\n", " 'bulli',\n", " 'bum',\n", " 'bumbl',\n", " 'bump',\n", " 'bunch',\n", " 'bunker',\n", " 'bunni',\n", " 'bureaucraci',\n", " 'burglar',\n", " 'buri',\n", " 'burn',\n", " 'burnedal',\n", " 'burtlancast',\n", " 'burton',\n", " 'burtreynold',\n", " 'bush',\n", " 'busi',\n", " 'business',\n", " 'businessman',\n", " 'businesswoman',\n", " 'bust',\n", " 'butch',\n", " 'butcher',\n", " 'butler',\n", " 'butt',\n", " 'button',\n", " 'buy',\n", " 'buzz',\n", " 'cabin',\n", " 'cabl',\n", " 'caesar',\n", " 'cage',\n", " 'cain',\n", " 'cairo',\n", " 'cal',\n", " 'cale',\n", " 'caleb',\n", " 'california',\n", " 'calm',\n", " 'calvin',\n", " 'cambodia',\n", " 'camcord',\n", " 'came',\n", " 'camelot',\n", " 'camera',\n", " 'cameraman',\n", " 'cameron',\n", " 'cameroncrow',\n", " 'camerondiaz',\n", " 'camgigandet',\n", " 'camillabel',\n", " 'camp',\n", " 'campaign',\n", " 'campbel',\n", " 'campbell',\n", " 'campbellscott',\n", " 'campu',\n", " 'canada',\n", " 'canadian',\n", " 'cancer',\n", " 'candi',\n", " 'candid',\n", " 'candlelightvigil',\n", " 'canin',\n", " 'cannabi',\n", " 'cannib',\n", " 'canuxploit',\n", " 'canyon',\n", " 'capabl',\n", " 'cape',\n", " 'caper',\n", " 'capit',\n", " 'capt',\n", " 'captain',\n", " 'captiv',\n", " 'captur',\n", " 'capture',\n", " 'captured',\n", " 'car',\n", " 'caraccid',\n", " 'carbomb',\n", " 'carchas',\n", " 'carcrash',\n", " 'card',\n", " 'care',\n", " 'career',\n", " 'carefre',\n", " 'caretak',\n", " 'careymulligan',\n", " 'cargo',\n", " 'caribbean',\n", " 'carjourney',\n", " 'carl',\n", " 'carlagugino',\n", " 'carlo',\n", " 'carmen',\n", " 'carniv',\n", " 'carol',\n", " 'carolina',\n", " 'carrac',\n", " 'carri',\n", " 'carrie',\n", " 'carriefish',\n", " 'cartel',\n", " 'carter',\n", " 'cartoon',\n", " 'caryelw',\n", " 'case',\n", " 'casey',\n", " 'caseyaffleck',\n", " 'cash',\n", " ...]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cv.get_feature_names()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics.pairwise import cosine_similarity" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "similarity=cosine_similarity(vectors)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4806, 4806)" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cosine_similarity(vectors).shape" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "def recommend(movie):\n", " movie_index=new_df[new_df['title']==movie].index[0]\n", " distances=similarity[movie_index]\n", " movies_list=sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]\n", " \n", " for i in movies_list:\n", " print(new_df.iloc[i[0]].title)\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Dark Knight\n", "Batman\n", "Batman\n", "The Dark Knight Rises\n", "10th & Wolf\n" ] } ], "source": [ "recommend('Batman Begins')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "import pickle" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "pickle.dump(new_df.to_dict(),open('movies_dict.pkl','wb'))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitletags
019995Avatarin the 22nd century, a parapleg marin is dispa...
1285Pirates of the Caribbean: At World's Endcaptain barbossa, long believ to be dead, ha c...
2206647Spectrea cryptic messag from bond’ past send him on a...
349026The Dark Knight Risesfollow the death of district attorney harvey d...
449529John Carterjohn carter is a war-weary, former militari ca...
............
48049367El Mariachiel mariachi just want to play hi guitar and ca...
480572766Newlywedsa newlyw couple' honeymoon is upend by the arr...
4806231617Signed, Sealed, Delivered\"signed, sealed, delivered\" introduc a dedic q...
4807126186Shanghai Callingwhen ambiti new york attorney sam is sent to s...
480825975My Date with Drewever sinc the second grade when he first saw h...
\n", "

4806 rows × 3 columns

\n", "
" ], "text/plain": [ " movie_id title \\\n", "0 19995 Avatar \n", "1 285 Pirates of the Caribbean: At World's End \n", "2 206647 Spectre \n", "3 49026 The Dark Knight Rises \n", "4 49529 John Carter \n", "... ... ... \n", "4804 9367 El Mariachi \n", "4805 72766 Newlyweds \n", "4806 231617 Signed, Sealed, Delivered \n", "4807 126186 Shanghai Calling \n", "4808 25975 My Date with Drew \n", "\n", " tags \n", "0 in the 22nd century, a parapleg marin is dispa... \n", "1 captain barbossa, long believ to be dead, ha c... \n", "2 a cryptic messag from bond’ past send him on a... \n", "3 follow the death of district attorney harvey d... \n", "4 john carter is a war-weary, former militari ca... \n", "... ... \n", "4804 el mariachi just want to play hi guitar and ca... \n", "4805 a newlyw couple' honeymoon is upend by the arr... \n", "4806 \"signed, sealed, delivered\" introduc a dedic q... \n", "4807 when ambiti new york attorney sam is sent to s... \n", "4808 ever sinc the second grade when he first saw h... \n", "\n", "[4806 rows x 3 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "pickle.dump(similarity,open('similarity.pkl','wb'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "547e4bcef05e70932573b937d3cf0146e04e40231df58f37570a8721868a7605" } } }, "nbformat": 4, "nbformat_minor": 2 }