{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['bollywood_full_1950-2019.csv',\n", " 'common_words.csv',\n", " 'MovieGenre.csv',\n", " 'Top_10000_Movies.csv']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dirc = 'data'\n", "os.listdir(dirc)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
title_ximdb_idposter_pathwiki_linktitle_yoriginal_titleis_adultyear_of_releaseruntimegenresimdb_ratingimdb_votesstorysummarytaglineactorswins_nominationsrelease_date
2559Waaris (1988 film)tt0367214https://upload.wikimedia.org/wikipedia/en/thum...https://en.wikipedia.org/wiki/Waaris_(1988_film)WaarisWaaris01988150Drama6.7121.0Multi-millionaire widower Kishan Singh prepare...Multi-millionaire widower Kishan Singh prepare...NaNSmita Patil|Amrita Singh|Raj Babbar|Kulbhushan...NaN6 May 1988 (India)
\n", "
" ], "text/plain": [ " title_x imdb_id \\\n", "2559 Waaris (1988 film) tt0367214 \n", "\n", " poster_path \\\n", "2559 https://upload.wikimedia.org/wikipedia/en/thum... \n", "\n", " wiki_link title_y original_title \\\n", "2559 https://en.wikipedia.org/wiki/Waaris_(1988_film) Waaris Waaris \n", "\n", " is_adult year_of_release runtime genres imdb_rating imdb_votes \\\n", "2559 0 1988 150 Drama 6.7 121.0 \n", "\n", " story \\\n", "2559 Multi-millionaire widower Kishan Singh prepare... \n", "\n", " summary tagline \\\n", "2559 Multi-millionaire widower Kishan Singh prepare... NaN \n", "\n", " actors wins_nominations \\\n", "2559 Smita Patil|Amrita Singh|Raj Babbar|Kulbhushan... NaN \n", "\n", " release_date \n", "2559 6 May 1988 (India) " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('data/bollywood_full_1950-2019.csv')\n", "df.sample()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4330, 18)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 4330 entries, 0 to 4329\n", "Data columns (total 18 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 title_x 4330 non-null object \n", " 1 imdb_id 4330 non-null object \n", " 2 poster_path 3580 non-null object \n", " 3 wiki_link 4330 non-null object \n", " 4 title_y 4330 non-null object \n", " 5 original_title 4330 non-null object \n", " 6 is_adult 4330 non-null int64 \n", " 7 year_of_release 4330 non-null object \n", " 8 runtime 4330 non-null object \n", " 9 genres 4330 non-null object \n", " 10 imdb_rating 4317 non-null float64\n", " 11 imdb_votes 4317 non-null float64\n", " 12 story 4065 non-null object \n", " 13 summary 4329 non-null object \n", " 14 tagline 685 non-null object \n", " 15 actors 4320 non-null object \n", " 16 wins_nominations 1344 non-null object \n", " 17 release_date 3049 non-null object \n", "dtypes: float64(2), int64(1), object(15)\n", "memory usage: 609.0+ KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.duplicated().sum()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "l = []\n", "def year(df) : \n", " try : \n", " if int(df['year_of_release']) > 1999 : \n", " l.append(df.name)\n", " except : \n", " pass" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 None\n", "1 None\n", "2 None\n", "3 None\n", "4 None\n", " ... \n", "4325 None\n", "4326 None\n", "4327 None\n", "4328 None\n", "4329 None\n", "Length: 4330, dtype: object" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.apply(year, axis=1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1687" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(l)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "new_df = df.iloc[l,:]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1687, 18)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.shape" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "new_df.reset_index(inplace=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indextitle_ximdb_idposter_pathwiki_linktitle_yoriginal_titleis_adultyear_of_releaseruntimegenresimdb_ratingimdb_votesstorysummarytaglineactorswins_nominationsrelease_date
11451151Adharm (2006 film)tt2435680https://upload.wikimedia.org/wikipedia/en/thum...https://en.wikipedia.org/wiki/Adharm_(2006_film)AdharmAdharm02006\\NAction5.17.0This story is inspired by true incident that h...This story is inspired by true incident that h...NaNRahul Dev|Mayuri Kango|Rocky Khanna|Anupam Khe...NaNNaN
\n", "
" ], "text/plain": [ " index title_x imdb_id \\\n", "1145 1151 Adharm (2006 film) tt2435680 \n", "\n", " poster_path \\\n", "1145 https://upload.wikimedia.org/wikipedia/en/thum... \n", "\n", " wiki_link title_y original_title \\\n", "1145 https://en.wikipedia.org/wiki/Adharm_(2006_film) Adharm Adharm \n", "\n", " is_adult year_of_release runtime genres imdb_rating imdb_votes \\\n", "1145 0 2006 \\N Action 5.1 7.0 \n", "\n", " story \\\n", "1145 This story is inspired by true incident that h... \n", "\n", " summary tagline \\\n", "1145 This story is inspired by true incident that h... NaN \n", "\n", " actors wins_nominations \\\n", "1145 Rahul Dev|Mayuri Kango|Rocky Khanna|Anupam Khe... NaN \n", "\n", " release_date \n", "1145 NaN " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.sample()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.duplicated().sum()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://en.wikipedia.org/wiki/Uri:_The_Surgical_Strike'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df[\"wiki_link\"][0]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index 0\n", "title_x 0\n", "imdb_id 0\n", "poster_path 111\n", "wiki_link 0\n", "title_y 0\n", "original_title 0\n", "is_adult 0\n", "year_of_release 0\n", "runtime 0\n", "genres 0\n", "imdb_rating 12\n", "imdb_votes 12\n", "story 20\n", "summary 0\n", "tagline 1106\n", "actors 5\n", "wins_nominations 954\n", "release_date 113\n", "dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.isnull().sum()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Considering Only Important features :" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['index', 'title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y',\n", " 'original_title', 'is_adult', 'year_of_release', 'runtime', 'genres',\n", " 'imdb_rating', 'imdb_votes', 'story', 'summary', 'tagline', 'actors',\n", " 'wins_nominations', 'release_date'],\n", " dtype='object')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df.columns" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# poster_path (required to show poster in the end (GUI))\n", "# original_title\n", "# is_adult \n", "# genres\n", "# imdb_rating\n", "# imdb_votes\n", "# story\n", "# summary\n", "# actors" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactors
1017https://upload.wikimedia.org/wikipedia/en/thum...Chamku02008Action|Crime|Thriller4.0474.0After witnessing his entire Bhagpur Bihar-bas...A traumatized child becomes a Naxalite and up...Bobby Deol|Priyanka Chopra|Irrfan Khan|Akhilen...
\n", "
" ], "text/plain": [ " poster_path original_title \\\n", "1017 https://upload.wikimedia.org/wikipedia/en/thum... Chamku \n", "\n", " is_adult year_of_release genres imdb_rating \\\n", "1017 0 2008 Action|Crime|Thriller 4.0 \n", "\n", " imdb_votes story \\\n", "1017 474.0 After witnessing his entire Bhagpur Bihar-bas... \n", "\n", " summary \\\n", "1017 A traumatized child becomes a Naxalite and up... \n", "\n", " actors \n", "1017 Bobby Deol|Priyanka Chopra|Irrfan Khan|Akhilen... " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = new_df.drop(columns=[\"index\",\"title_x\",\"imdb_id\",\"title_y\",\"runtime\",\"tagline\",'wins_nominations', 'release_date', \"wiki_link\"])\n", "data.sample()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## ______________________________________________________________________________" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Dealing with NULL values :" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "poster_path 111\n", "original_title 0\n", "is_adult 0\n", "year_of_release 0\n", "genres 0\n", "imdb_rating 12\n", "imdb_votes 12\n", "story 20\n", "summary 0\n", "actors 5\n", "dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://upload.wikimedia.org/wikipedia/en/thumb/3/3b/URI_-_New_poster.jpg/220px-URI_-_New_poster.jpg'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"poster_path\"][0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "# Replacing the NULL poster links with 'image not available link' :\n", "\n", "data[\"poster_path\"].fillna(\"https://indianbankseauction.com/PropertyImages/nopreview.jpeg\", inplace=True)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "data['imdb_rating'].fillna(0, inplace=True)\n", "data['imdb_votes'].fillna(0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "data.fillna(\"\", inplace=True)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "poster_path 0\n", "original_title 0\n", "is_adult 0\n", "year_of_release 0\n", "genres 0\n", "imdb_rating 0\n", "imdb_votes 0\n", "story 0\n", "summary 0\n", "actors 0\n", "dtype: int64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1687, 10)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "data.reset_index(drop = True, inplace=True)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactors
0https://upload.wikimedia.org/wikipedia/en/thum...Uri: The Surgical Strike02019Action|Drama|War8.435112.0Divided over five chapters the film chronicle...Indian army special forces execute a covert op...Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga...
\n", "
" ], "text/plain": [ " poster_path \\\n", "0 https://upload.wikimedia.org/wikipedia/en/thum... \n", "\n", " original_title is_adult year_of_release genres \\\n", "0 Uri: The Surgical Strike 0 2019 Action|Drama|War \n", "\n", " imdb_rating imdb_votes story \\\n", "0 8.4 35112.0 Divided over five chapters the film chronicle... \n", "\n", " summary \\\n", "0 Indian army special forces execute a covert op... \n", "\n", " actors \n", "0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head(1)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "def splitter(row):\n", " return (str(row).split(\"|\"))\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "data[\"genres\"] = data[\"genres\"].apply(splitter)\n", "data[\"actors\"] = data[\"actors\"].apply(splitter)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "data[\"num_actors\"] = data[\"actors\"].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "max(data[\"num_actors\"])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# so as we can see no. of actors in a movie can be unto 15 that's why we will \n", "# create a func. to take only upto 4 actors into the consideration. \n", "\n", "data[\"actors\"] = data[\"actors\"].apply(lambda x: x[:4])\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactors
0https://upload.wikimedia.org/wikipedia/en/thum...Uri: The Surgical Strike02019[Action, Drama, War]8.435112.0Divided over five chapters the film chronicle...Indian army special forces execute a covert op...[Vicky Kaushal, Paresh Rawal, Mohit Raina, Yam...
\n", "
" ], "text/plain": [ " poster_path \\\n", "0 https://upload.wikimedia.org/wikipedia/en/thum... \n", "\n", " original_title is_adult year_of_release genres \\\n", "0 Uri: The Surgical Strike 0 2019 [Action, Drama, War] \n", "\n", " imdb_rating imdb_votes story \\\n", "0 8.4 35112.0 Divided over five chapters the film chronicle... \n", "\n", " summary \\\n", "0 Indian army special forces execute a covert op... \n", "\n", " actors \n", "0 [Vicky Kaushal, Paresh Rawal, Mohit Raina, Yam... " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.drop(columns=\"num_actors\", inplace=True)\n", "data.head(1)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "data[\"story\"] = data[\"story\"].apply(lambda x: str(x).split())\n", "data[\"summary\"] = data[\"summary\"].apply(lambda x: str(x).split())" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactors
0https://upload.wikimedia.org/wikipedia/en/thum...Uri: The Surgical Strike02019[Action, Drama, War]8.435112.0[Divided, over, five, chapters, the, film, chr...[Indian, army, special, forces, execute, a, co...[Vicky Kaushal, Paresh Rawal, Mohit Raina, Yam...
1https://indianbankseauction.com/PropertyImages...Battalion 60902019[War]4.173.0[The, story, revolves, around, a, cricket, mat...[The, story, of, Battalion, 609, revolves, aro...[Vicky Ahuja, Shoaib Ibrahim, Shrikant Kamat, ...
\n", "
" ], "text/plain": [ " poster_path \\\n", "0 https://upload.wikimedia.org/wikipedia/en/thum... \n", "1 https://indianbankseauction.com/PropertyImages... \n", "\n", " original_title is_adult year_of_release genres \\\n", "0 Uri: The Surgical Strike 0 2019 [Action, Drama, War] \n", "1 Battalion 609 0 2019 [War] \n", "\n", " imdb_rating imdb_votes story \\\n", "0 8.4 35112.0 [Divided, over, five, chapters, the, film, chr... \n", "1 4.1 73.0 [The, story, revolves, around, a, cricket, mat... \n", "\n", " summary \\\n", "0 [Indian, army, special, forces, execute, a, co... \n", "1 [The, story, of, Battalion, 609, revolves, aro... \n", "\n", " actors \n", "0 [Vicky Kaushal, Paresh Rawal, Mohit Raina, Yam... \n", "1 [Vicky Ahuja, Shoaib Ibrahim, Shrikant Kamat, ... " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head(2)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactors
1189https://upload.wikimedia.org/wikipedia/en/thum...Naksha02006[Action, Adventure, Fantasy]3.1916.0[Bhagwan, Shri, Kishan, had, predicted, that, ...[Bhagwan, Shri, Kishan, had, predicted, that, ...[SunnyDeol, VivekOberoi, SameeraReddy, JackieS...
\n", "
" ], "text/plain": [ " poster_path original_title \\\n", "1189 https://upload.wikimedia.org/wikipedia/en/thum... Naksha \n", "\n", " is_adult year_of_release genres imdb_rating \\\n", "1189 0 2006 [Action, Adventure, Fantasy] 3.1 \n", "\n", " imdb_votes story \\\n", "1189 916.0 [Bhagwan, Shri, Kishan, had, predicted, that, ... \n", "\n", " summary \\\n", "1189 [Bhagwan, Shri, Kishan, had, predicted, that, ... \n", "\n", " actors \n", "1189 [SunnyDeol, VivekOberoi, SameeraReddy, JackieS... " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"genres\"] = data[\"genres\"].apply(lambda x: [i.replace(\" \",\"\") for i in x])\n", "data[\"story\"] = data[\"story\"].apply(lambda x: [i.replace(\" \",\"\") for i in x])\n", "data[\"summary\"] = data[\"summary\"].apply(lambda x: [i.replace(\" \",\"\") for i in x])\n", "data[\"actors\"] = data[\"actors\"].apply(lambda x: [i.replace(\" \",\"\") for i in x])\n", "data.sample()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactorstitle
0https://upload.wikimedia.org/wikipedia/en/thum...Uri: The Surgical Strike02019[Action, Drama, War]8.435112.0[Divided, over, five, chapters, the, film, chr...[Indian, army, special, forces, execute, a, co...[VickyKaushal, PareshRawal, MohitRaina, YamiGa...[Uri:, The, Surgical, Strike]
\n", "
" ], "text/plain": [ " poster_path \\\n", "0 https://upload.wikimedia.org/wikipedia/en/thum... \n", "\n", " original_title is_adult year_of_release genres \\\n", "0 Uri: The Surgical Strike 0 2019 [Action, Drama, War] \n", "\n", " imdb_rating imdb_votes story \\\n", "0 8.4 35112.0 [Divided, over, five, chapters, the, film, chr... \n", "\n", " summary \\\n", "0 [Indian, army, special, forces, execute, a, co... \n", "\n", " actors \\\n", "0 [VickyKaushal, PareshRawal, MohitRaina, YamiGa... \n", "\n", " title \n", "0 [Uri:, The, Surgical, Strike] " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"title\"] = data[\"original_title\"].apply(lambda x: str(x).split())\n", "data.head(1)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "data[\"tags\"] = data[\"genres\"] + data[\"story\"] + data[\"summary\"]+ data[\"actors\"] + data[\"title\"]" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "movies = data.drop(columns=[ \"story\",\"actors\", \"title\", \"is_adult\"])" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleyear_of_releasegenresimdb_ratingimdb_votessummarytags
197https://upload.wikimedia.org/wikipedia/en/thum...Raabta2017[Action, Adventure, Drama]4.12667.0[In, 2017, Shiv, and, Saira, fall, for, each, ...action adventure drama in 2017 shiv and saira ...
\n", "
" ], "text/plain": [ " poster_path original_title \\\n", "197 https://upload.wikimedia.org/wikipedia/en/thum... Raabta \n", "\n", " year_of_release genres imdb_rating imdb_votes \\\n", "197 2017 [Action, Adventure, Drama] 4.1 2667.0 \n", "\n", " summary \\\n", "197 [In, 2017, Shiv, and, Saira, fall, for, each, ... \n", "\n", " tags \n", "197 action adventure drama in 2017 shiv and saira ... " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[\"tags\"] = movies[\"tags\"].apply(lambda x: (\" \".join(x)).lower())\n", "movies.sample()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"crime thriller akash a blind pianist is preparing for a london concert.sophie's scooter bumps into akash and upon finding he a pianist she takes him to her restaurant franco's run by her father and offers him a job.akash meets 70s actor pramod sinha at the restaurant who asks him to do a private concert of his songs at his residence to surprise his wife simi. next day upon reaching his house a shocking surprise is waiting for akash. a series of mysterious events changes the life of a blind pianist who must now report a crime that he should technically know nothing of. ayushmannkhurrana tabu radhikaapte anildhawan andhadhun\"" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[\"tags\"][145]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "from nltk.stem.porter import PorterStemmer\n", "ps = PorterStemmer()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "def stem(text):\n", " word=[]\n", "\n", " for i in text.split():\n", " word.append(ps.stem(i))\n", "\n", " return \" \".join(word)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "movies[\"tags\"] = movies[\"tags\"].apply(stem)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"crime thriller akash a blind pianist is prepar for a london concert.sophie' scooter bump into akash and upon find he a pianist she take him to her restaur franco' run by her father and offer him a job.akash meet 70 actor pramod sinha at the restaur who ask him to do a privat concert of hi song at hi resid to surpris hi wife simi. next day upon reach hi hous a shock surpris is wait for akash. a seri of mysteri event chang the life of a blind pianist who must now report a crime that he should technic know noth of. ayushmannkhurrana tabu radhikaapt anildhawan andhadhun\"" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[\"tags\"][145]" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import CountVectorizer\n", "cv = CountVectorizer(max_features=5000, stop_words='english')" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "vectors = cv.fit_transform(movies[\"tags\"]).toarray()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1687, 5000)" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vectors.shape" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", " ...,\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0]], dtype=int64)" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vectors" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "frozenset({'a',\n", " 'about',\n", " 'above',\n", " 'across',\n", " 'after',\n", " 'afterwards',\n", " 'again',\n", " 'against',\n", " 'all',\n", " 'almost',\n", " 'alone',\n", " 'along',\n", " 'already',\n", " 'also',\n", " 'although',\n", " 'always',\n", " 'am',\n", " 'among',\n", " 'amongst',\n", " 'amoungst',\n", " 'amount',\n", " 'an',\n", " 'and',\n", " 'another',\n", " 'any',\n", " 'anyhow',\n", " 'anyone',\n", " 'anything',\n", " 'anyway',\n", " 'anywhere',\n", " 'are',\n", " 'around',\n", " 'as',\n", " 'at',\n", " 'back',\n", " 'be',\n", " 'became',\n", " 'because',\n", " 'become',\n", " 'becomes',\n", " 'becoming',\n", " 'been',\n", " 'before',\n", " 'beforehand',\n", " 'behind',\n", " 'being',\n", " 'below',\n", " 'beside',\n", " 'besides',\n", " 'between',\n", " 'beyond',\n", " 'bill',\n", " 'both',\n", " 'bottom',\n", " 'but',\n", " 'by',\n", " 'call',\n", " 'can',\n", " 'cannot',\n", " 'cant',\n", " 'co',\n", " 'con',\n", " 'could',\n", " 'couldnt',\n", " 'cry',\n", " 'de',\n", " 'describe',\n", " 'detail',\n", " 'do',\n", " 'done',\n", " 'down',\n", " 'due',\n", " 'during',\n", " 'each',\n", " 'eg',\n", " 'eight',\n", " 'either',\n", " 'eleven',\n", " 'else',\n", " 'elsewhere',\n", " 'empty',\n", " 'enough',\n", " 'etc',\n", " 'even',\n", " 'ever',\n", " 'every',\n", " 'everyone',\n", " 'everything',\n", " 'everywhere',\n", " 'except',\n", " 'few',\n", " 'fifteen',\n", " 'fifty',\n", " 'fill',\n", " 'find',\n", " 'fire',\n", " 'first',\n", " 'five',\n", " 'for',\n", " 'former',\n", " 'formerly',\n", " 'forty',\n", " 'found',\n", " 'four',\n", " 'from',\n", " 'front',\n", " 'full',\n", " 'further',\n", " 'get',\n", " 'give',\n", " 'go',\n", " 'had',\n", " 'has',\n", " 'hasnt',\n", " 'have',\n", " 'he',\n", " 'hence',\n", " 'her',\n", " 'here',\n", " 'hereafter',\n", " 'hereby',\n", " 'herein',\n", " 'hereupon',\n", " 'hers',\n", " 'herself',\n", " 'him',\n", " 'himself',\n", " 'his',\n", " 'how',\n", " 'however',\n", " 'hundred',\n", " 'i',\n", " 'ie',\n", " 'if',\n", " 'in',\n", " 'inc',\n", " 'indeed',\n", " 'interest',\n", " 'into',\n", " 'is',\n", " 'it',\n", " 'its',\n", " 'itself',\n", " 'keep',\n", " 'last',\n", " 'latter',\n", " 'latterly',\n", " 'least',\n", " 'less',\n", " 'ltd',\n", " 'made',\n", " 'many',\n", " 'may',\n", " 'me',\n", " 'meanwhile',\n", " 'might',\n", " 'mill',\n", " 'mine',\n", " 'more',\n", " 'moreover',\n", " 'most',\n", " 'mostly',\n", " 'move',\n", " 'much',\n", " 'must',\n", " 'my',\n", " 'myself',\n", " 'name',\n", " 'namely',\n", " 'neither',\n", " 'never',\n", " 'nevertheless',\n", " 'next',\n", " 'nine',\n", " 'no',\n", " 'nobody',\n", " 'none',\n", " 'noone',\n", " 'nor',\n", " 'not',\n", " 'nothing',\n", " 'now',\n", " 'nowhere',\n", " 'of',\n", " 'off',\n", " 'often',\n", " 'on',\n", " 'once',\n", " 'one',\n", " 'only',\n", " 'onto',\n", " 'or',\n", " 'other',\n", " 'others',\n", " 'otherwise',\n", " 'our',\n", " 'ours',\n", " 'ourselves',\n", " 'out',\n", " 'over',\n", " 'own',\n", " 'part',\n", " 'per',\n", " 'perhaps',\n", " 'please',\n", " 'put',\n", " 'rather',\n", " 're',\n", " 'same',\n", " 'see',\n", " 'seem',\n", " 'seemed',\n", " 'seeming',\n", " 'seems',\n", " 'serious',\n", " 'several',\n", " 'she',\n", " 'should',\n", " 'show',\n", " 'side',\n", " 'since',\n", " 'sincere',\n", " 'six',\n", " 'sixty',\n", " 'so',\n", " 'some',\n", " 'somehow',\n", " 'someone',\n", " 'something',\n", " 'sometime',\n", " 'sometimes',\n", " 'somewhere',\n", " 'still',\n", " 'such',\n", " 'system',\n", " 'take',\n", " 'ten',\n", " 'than',\n", " 'that',\n", " 'the',\n", " 'their',\n", " 'them',\n", " 'themselves',\n", " 'then',\n", " 'thence',\n", " 'there',\n", " 'thereafter',\n", " 'thereby',\n", " 'therefore',\n", " 'therein',\n", " 'thereupon',\n", " 'these',\n", " 'they',\n", " 'thick',\n", " 'thin',\n", " 'third',\n", " 'this',\n", " 'those',\n", " 'though',\n", " 'three',\n", " 'through',\n", " 'throughout',\n", " 'thru',\n", " 'thus',\n", " 'to',\n", " 'together',\n", " 'too',\n", " 'top',\n", " 'toward',\n", " 'towards',\n", " 'twelve',\n", " 'twenty',\n", " 'two',\n", " 'un',\n", " 'under',\n", " 'until',\n", " 'up',\n", " 'upon',\n", " 'us',\n", " 'very',\n", " 'via',\n", " 'was',\n", " 'we',\n", " 'well',\n", " 'were',\n", " 'what',\n", " 'whatever',\n", " 'when',\n", " 'whence',\n", " 'whenever',\n", " 'where',\n", " 'whereafter',\n", " 'whereas',\n", " 'whereby',\n", " 'wherein',\n", " 'whereupon',\n", " 'wherever',\n", " 'whether',\n", " 'which',\n", " 'while',\n", " 'whither',\n", " 'who',\n", " 'whoever',\n", " 'whole',\n", " 'whom',\n", " 'whose',\n", " 'why',\n", " 'will',\n", " 'with',\n", " 'within',\n", " 'without',\n", " 'would',\n", " 'yet',\n", " 'you',\n", " 'your',\n", " 'yours',\n", " 'yourself',\n", " 'yourselves'})" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cv.get_stop_words()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "most_common_words = cv.get_feature_names_out()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0001010010th1111th121251313th...zeenatzeenatamanzerozindazindagizintazoonizoyazubeidazulfi
00000100000...0000000000
10000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000000
..................................................................
16820000000000...0000000000
16830000000000...0000000000
16840000000000...0000000000
16850000000000...0000000000
16860000000000...0000000000
\n", "

1687 rows × 5000 columns

\n", "
" ], "text/plain": [ " 000 10 100 10th 11 11th 12 125 13 13th ... zeenat \\\n", "0 0 0 0 0 1 0 0 0 0 0 ... 0 \n", "1 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "2 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "3 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "4 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "... ... .. ... ... .. ... .. ... .. ... ... ... \n", "1682 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "1683 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "1684 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "1685 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "1686 0 0 0 0 0 0 0 0 0 0 ... 0 \n", "\n", " zeenataman zero zinda zindagi zinta zooni zoya zubeida zulfi \n", "0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... ... ... \n", "1682 0 0 0 0 0 0 0 0 0 \n", "1683 0 0 0 0 0 0 0 0 0 \n", "1684 0 0 0 0 0 0 0 0 0 \n", "1685 0 0 0 0 0 0 0 0 0 \n", "1686 0 0 0 0 0 0 0 0 0 \n", "\n", "[1687 rows x 5000 columns]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cs = pd.DataFrame(vectors, columns=most_common_words)\n", "cs.to_csv(\"data\\common_words.csv\")\n", "cs" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "# Euclidian distance is not a good method for high-demensions, so we'll use Cosine Similarity.\n", "from sklearn.metrics.pairwise import cosine_similarity" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "similarity = cosine_similarity(vectors) # Relation of a movie with other movies." ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1. , 0.30833492, 0.06123412, ..., 0.02261078, 0.01049901,\n", " 0.06855751])" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# As, correlation of movie 1 with movie 1 will always be equal to 1, so all the diagonal elements would be 1.as_integer_ratio\n", "\n", "similarity[0]" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1687, 1687)" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "similarity.shape" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0, 1.0),\n", " (218, 0.34341479757995447),\n", " (1, 0.3083349194223892),\n", " (166, 0.30604692017553103),\n", " (1487, 0.3031960178847318),\n", " (1535, 0.2721655269759087),\n", " (1426, 0.22416986876995007),\n", " (127, 0.22371868507134143),\n", " (22, 0.21667041532219725),\n", " (667, 0.21650635094610968),\n", " (1497, 0.21103396143486278),\n", " (269, 0.20775007961358385),\n", " (1444, 0.20659326226697075),\n", " (1038, 0.2056971041128498),\n", " (1313, 0.1965021149155211),\n", " (255, 0.19444444444444448),\n", " (1301, 0.18371173070873836),\n", " (715, 0.18219813149998287),\n", " (835, 0.17891501337389773),\n", " (1331, 0.17757784879581398),\n", " (287, 0.17690759253434063),\n", " (373, 0.17391639824998362),\n", " (335, 0.17213259316477408),\n", " (924, 0.1700204568168902),\n", " (105, 0.1683064344266991),\n", " (865, 0.16601941503058407),\n", " (894, 0.1653060996890293),\n", " (158, 0.16453801395794543),\n", " (50, 0.16239245061540775),\n", " (58, 0.16139400871168066),\n", " (1360, 0.16118270179676272),\n", " (950, 0.15811388300841894),\n", " (497, 0.15555555555555556),\n", " (168, 0.1547461151475432),\n", " (364, 0.15408338784034142),\n", " (1483, 0.1534255013186446),\n", " (41, 0.1529438225803745),\n", " (595, 0.1527777777777778),\n", " (624, 0.15097027121927947),\n", " (440, 0.15059374348566512),\n", " (1145, 0.1484575537959652),\n", " (1009, 0.14638501094227996),\n", " (1328, 0.14483345242495155),\n", " (42, 0.14349100534871306),\n", " (1369, 0.14285714285714285),\n", " (483, 0.1396930126874916),\n", " (1353, 0.1388888888888889),\n", " (1241, 0.13882112167876096),\n", " (1634, 0.13636480060602701),\n", " (909, 0.1357813616483921),\n", " (577, 0.13552389967500503),\n", " (1238, 0.135381903394124),\n", " (659, 0.13489077471234956),\n", " (68, 0.13471506281091267),\n", " (1671, 0.13295400586957762),\n", " (574, 0.13230943668928924),\n", " (496, 0.1312159702703695),\n", " (493, 0.13105951658213078),\n", " (968, 0.13074409009212268),\n", " (1390, 0.13013931011614385),\n", " (1569, 0.12803687993289598),\n", " (1236, 0.1267916223003882),\n", " (416, 0.1252448582170299),\n", " (760, 0.12487810821089254),\n", " (779, 0.12487810821089254),\n", " (351, 0.1242259987499883),\n", " (1553, 0.12376844287208426),\n", " (122, 0.1200137166371826),\n", " (1070, 0.11952286093343938),\n", " (443, 0.11909826683508273),\n", " (438, 0.11873260194275806),\n", " (525, 0.11844484241978356),\n", " (457, 0.11755760264309971),\n", " (98, 0.11688906406161201),\n", " (293, 0.11669000700233415),\n", " (411, 0.11669000700233415),\n", " (1018, 0.11637608921255717),\n", " (1326, 0.11586227443356203),\n", " (1357, 0.11509484552229768),\n", " (1400, 0.11494271803476605),\n", " (1401, 0.11494271803476605),\n", " (219, 0.11480778669894409),\n", " (663, 0.1147550621098494),\n", " (470, 0.11419811830458859),\n", " (331, 0.11396057645963795),\n", " (144, 0.11385500851066219),\n", " (805, 0.1137539387983225),\n", " (285, 0.11274690420042435),\n", " (176, 0.11270870433342904),\n", " (131, 0.11262289639991432),\n", " (1324, 0.11106914279569094),\n", " (796, 0.11013785635132559),\n", " (833, 0.1097134314340639),\n", " (899, 0.10929387566615228),\n", " (7, 0.10926834468453098),\n", " (1397, 0.10910894511799618),\n", " (621, 0.10786315658047482),\n", " (1233, 0.10691671651659736),\n", " (915, 0.10650160840378228),\n", " (1194, 0.10611908999450222),\n", " (1159, 0.10610212315611824),\n", " (1191, 0.10580184237878974),\n", " (1329, 0.10549170295056474),\n", " (292, 0.10532224809726146),\n", " (1293, 0.10524493839518115),\n", " (366, 0.10499013139145202),\n", " (367, 0.10499013139145202),\n", " (332, 0.10497277621629561),\n", " (931, 0.1049727762162956),\n", " (491, 0.10497277621629558),\n", " (359, 0.10480469092042208),\n", " (432, 0.1047565601757848),\n", " (531, 0.10437071518085826),\n", " (1203, 0.10429644129291062),\n", " (1106, 0.10277830647412974),\n", " (78, 0.10206207261596575),\n", " (1456, 0.10125804187397412),\n", " (552, 0.10041067934611822),\n", " (1660, 0.10041067934611822),\n", " (33, 0.1002194704003772),\n", " (318, 0.1002194704003772),\n", " (474, 0.10015420209622192),\n", " (934, 0.09999999999999998),\n", " (1262, 0.09960238411119947),\n", " (1466, 0.09951829597116882),\n", " (879, 0.09938079899999067),\n", " (115, 0.09913531291324379),\n", " (1274, 0.09829463743659808),\n", " (532, 0.09820927516479827),\n", " (782, 0.09798005352332918),\n", " (1057, 0.0975900072948533),\n", " (456, 0.09743547036924464),\n", " (408, 0.09698572899377438),\n", " (687, 0.09698572899377436),\n", " (1017, 0.09622504486493763),\n", " (883, 0.09566067023247539),\n", " (1340, 0.0951061109496652),\n", " (14, 0.09496714704969829),\n", " (1373, 0.09483242373202184),\n", " (1661, 0.09476225544736291),\n", " (23, 0.09459675396808044),\n", " (1256, 0.09455143815519593),\n", " (582, 0.09435641951204965),\n", " (1252, 0.09365858115816941),\n", " (537, 0.0935568998979686),\n", " (187, 0.09340808971963417),\n", " (1623, 0.09304842103984709),\n", " (1317, 0.09296222517045284),\n", " (1399, 0.09296222517045283),\n", " (473, 0.09247943325597503),\n", " (279, 0.09245003270420485),\n", " (654, 0.09212846639876111),\n", " (1601, 0.09200874124564723),\n", " (82, 0.09179850920431568),\n", " (890, 0.09143835580986637),\n", " (268, 0.09128709291752768),\n", " (1421, 0.09128709291752768),\n", " (513, 0.09016696346674324),\n", " (507, 0.09012301173931253),\n", " (481, 0.09001028747788695),\n", " (551, 0.08980265101338746),\n", " (1524, 0.08961112680829966),\n", " (111, 0.08948747402853659),\n", " (651, 0.08948747402853657),\n", " (1298, 0.08934271123648291),\n", " (1639, 0.08921722983855179),\n", " (606, 0.08913971702649112),\n", " (477, 0.0890870806374748),\n", " (798, 0.08908708063747478),\n", " (892, 0.08882529023711003),\n", " (1120, 0.08882529023711003),\n", " (1185, 0.08867576348258963),\n", " (124, 0.08793155726408239),\n", " (1657, 0.08779776400125336),\n", " (1383, 0.08748177652797064),\n", " (1065, 0.08744344477119373),\n", " (152, 0.08716272672808179),\n", " (157, 0.08703882797784893),\n", " (183, 0.08703882797784893),\n", " (611, 0.08703882797784893),\n", " (940, 0.0868810058571145),\n", " (1342, 0.08671369113740707),\n", " (699, 0.08632206622970622),\n", " (1163, 0.08632206622970622),\n", " (1512, 0.0863220662297062),\n", " (1428, 0.08613751405140559),\n", " (1380, 0.08554415042912275),\n", " (435, 0.08486251286955257),\n", " (1549, 0.08451542547285168),\n", " (426, 0.08399210511316162),\n", " (797, 0.0839921051131616),\n", " (216, 0.08394256685753218),\n", " (674, 0.08341955511876983),\n", " (560, 0.08333333333333334),\n", " (199, 0.08312577800896942),\n", " (855, 0.08300970751529203),\n", " (1559, 0.08240220541217404),\n", " (670, 0.08206099398622184),\n", " (1031, 0.08190729279494122),\n", " (104, 0.08168300490163391),\n", " (215, 0.08168300490163391),\n", " (1438, 0.08164965809277261),\n", " (789, 0.08161701781239863),\n", " (70, 0.08146881698903526),\n", " (110, 0.08144629623753681),\n", " (1584, 0.08108546209455464),\n", " (1166, 0.0807502638519182),\n", " (378, 0.08058638469539994),\n", " (1602, 0.08040844011283461),\n", " (908, 0.08018753738744802),\n", " (77, 0.08006407690254358),\n", " (1044, 0.08006407690254357),\n", " (545, 0.08001322641986387),\n", " (1625, 0.07911417286705225),\n", " (361, 0.07830153977496952),\n", " (419, 0.07772873369453565),\n", " (424, 0.07754035086653924),\n", " (181, 0.07745966692414834),\n", " (517, 0.07745966692414832),\n", " (579, 0.07736784709142126),\n", " (133, 0.07726438749546088),\n", " (1155, 0.076847327936784),\n", " (99, 0.07684732793678399),\n", " (921, 0.07654655446197431),\n", " (1679, 0.07613869876268811),\n", " (1577, 0.07613869876268808),\n", " (1076, 0.07607257743127308),\n", " (881, 0.07580980435789034),\n", " (1500, 0.07580980435789032),\n", " (809, 0.07566499084579775),\n", " (1218, 0.07522257782070334),\n", " (386, 0.07516460280028289),\n", " (1332, 0.07491109582924912),\n", " (1321, 0.0747696205456059),\n", " (1505, 0.07453559924999298),\n", " (12, 0.07423923386456233),\n", " (917, 0.07423923386456233),\n", " (1673, 0.07410810764999062),\n", " (555, 0.07407407407407407),\n", " (709, 0.07372097807744857),\n", " (1268, 0.07358513094805799),\n", " (681, 0.0734930919740164),\n", " (225, 0.07345531603115878),\n", " (1557, 0.07333055637076138),\n", " (1307, 0.07302967433402215),\n", " (1183, 0.07294801825540143),\n", " (1598, 0.07273929674533079),\n", " (1498, 0.07267523746672638),\n", " (148, 0.07241672621247579),\n", " (1344, 0.07241672621247579),\n", " (1654, 0.07241672621247579),\n", " (138, 0.07216878364870322),\n", " (689, 0.0721021519443032),\n", " (1210, 0.07202013512085205),\n", " (347, 0.07180336973794409),\n", " (1096, 0.07178425360775541),\n", " (1117, 0.07178425360775541),\n", " (795, 0.07152351257047909),\n", " (162, 0.07147416898918632),\n", " (620, 0.07147416898918632),\n", " (1107, 0.07136240321480627),\n", " (165, 0.07106690545187014),\n", " (596, 0.07093804422989572),\n", " (1035, 0.07086586712703119),\n", " (169, 0.07017932675554325),\n", " (893, 0.07009925201600498),\n", " (479, 0.0700140042014005),\n", " (1423, 0.06999342092763468),\n", " (1306, 0.06978631577988531),\n", " (1085, 0.06969444979857556),\n", " (454, 0.06944444444444446),\n", " (1543, 0.06944444444444445),\n", " (718, 0.06900655593423542),\n", " (725, 0.06875537172325702),\n", " (961, 0.06874649261535878),\n", " (1618, 0.06865346047348511),\n", " (1686, 0.0685575058158106),\n", " (800, 0.068308735943808),\n", " (1534, 0.0682523632789935),\n", " (427, 0.06804138174397717),\n", " (436, 0.06804138174397717),\n", " (1099, 0.06804138174397717),\n", " (744, 0.06804138174397716),\n", " (1597, 0.06787191353128069),\n", " (1384, 0.0676115519099702),\n", " (390, 0.0675737378399486),\n", " (71, 0.06737100695453693),\n", " (874, 0.06737100695453693),\n", " (128, 0.06735753140545635),\n", " (929, 0.06735753140545635),\n", " (888, 0.0672577696407091),\n", " (535, 0.06721326105145359),\n", " (178, 0.06720648654545931),\n", " (75, 0.06672006408545297),\n", " (175, 0.06672006408545297),\n", " (1105, 0.06672006408545297),\n", " (121, 0.06666666666666665),\n", " (1627, 0.06641465930020336),\n", " (170, 0.06600983619844494),\n", " (1325, 0.06584269219592206),\n", " (678, 0.0657951694959769),\n", " (1646, 0.06577808649698821),\n", " (623, 0.06573421981221797),\n", " (1614, 0.06573421981221797),\n", " (296, 0.06542341797740515),\n", " (751, 0.06537204504606134),\n", " (1053, 0.06502285955340587),\n", " (567, 0.0649569802461631),\n", " (1352, 0.06487491201346025),\n", " (460, 0.06481481481481483),\n", " (73, 0.06454972243679027),\n", " (1350, 0.0643711613134312),\n", " (1664, 0.06429306248420497),\n", " (1403, 0.06401843996644799),\n", " (1028, 0.06393293950763894),\n", " (38, 0.06372659274182271),\n", " (109, 0.06364688465216445),\n", " (49, 0.06337242505244779),\n", " (1455, 0.06337242505244779),\n", " (134, 0.06336463543591632),\n", " (978, 0.06327719971683327),\n", " (164, 0.06317483696490861),\n", " (1337, 0.06314335411250997),\n", " (864, 0.06308610821751877),\n", " (799, 0.06306450264538696),\n", " (1272, 0.0627702980962258),\n", " (254, 0.06264546476465861),\n", " (538, 0.06262242910851495),\n", " (519, 0.06250000000000001),\n", " (948, 0.06250000000000001),\n", " (1405, 0.062257280636469035),\n", " (767, 0.06203228069323139),\n", " (974, 0.06189844605901729),\n", " (1244, 0.06189844605901729),\n", " (588, 0.06163335513613657),\n", " (849, 0.06163335513613656),\n", " (1672, 0.06160177257954774),\n", " (949, 0.061545745489666376),\n", " (1496, 0.061545745489666376),\n", " (975, 0.06154574548966636),\n", " (1636, 0.061475926130276456),\n", " (383, 0.06143675701121218),\n", " (944, 0.06123724356957945),\n", " (2, 0.06123411945950577),\n", " (203, 0.06119900613621046),\n", " (657, 0.06098264357375362),\n", " (791, 0.060858061945018464),\n", " (952, 0.060858061945018464),\n", " (295, 0.06085806194501846),\n", " (1566, 0.06085806194501846),\n", " (1567, 0.06085806194501846),\n", " (858, 0.06085806194501845),\n", " (1064, 0.06073424073445792),\n", " (252, 0.06063390625908324),\n", " (1254, 0.060633906259083235),\n", " (65, 0.060616080621108986),\n", " (349, 0.060616080621108986),\n", " (660, 0.06052275326688025),\n", " (1253, 0.06048884885927481),\n", " (379, 0.06033884900318473),\n", " (802, 0.060192926542884606),\n", " (1442, 0.05976143046671969),\n", " (401, 0.059676239503286085),\n", " (1667, 0.05954913341754137),\n", " (898, 0.05953620902598002),\n", " (1005, 0.059391387091649865),\n", " (1621, 0.059391387091649865),\n", " (1286, 0.05933490454240869),\n", " (756, 0.059304999573790707),\n", " (1149, 0.059062442321861836),\n", " (820, 0.05892556509887897),\n", " (852, 0.05892556509887897),\n", " (765, 0.05892556509887896),\n", " (1021, 0.05892556509887896),\n", " (824, 0.05885054843562047),\n", " (362, 0.05869672708837492),\n", " (711, 0.058696727088374916),\n", " (1345, 0.058627205890120715),\n", " (920, 0.05818804460627859),\n", " (126, 0.05802588531856595),\n", " (414, 0.05802588531856595),\n", " (492, 0.05802588531856595),\n", " (1246, 0.057993350477029436),\n", " (448, 0.05797890810338324),\n", " (11, 0.057972132749994555),\n", " (572, 0.05773502691896258),\n", " (1537, 0.05753758026694707),\n", " (288, 0.0573775310549247),\n", " (325, 0.0573775310549247),\n", " (992, 0.05737753105492469),\n", " (1551, 0.057338538504682214),\n", " (229, 0.05721881005638328),\n", " (1066, 0.057218810056383275),\n", " (728, 0.057099059152294296),\n", " (1378, 0.05706139900145506),\n", " (1243, 0.05673521058541754),\n", " (375, 0.05672527447140345),\n", " (816, 0.05661385170722979),\n", " (1261, 0.05659684799706785),\n", " (566, 0.056343616981901115),\n", " (34, 0.056131276171213614),\n", " (450, 0.05607721540920443),\n", " (1592, 0.05602046370727958),\n", " (1588, 0.05599473674210774),\n", " (1420, 0.05593738517329483),\n", " (542, 0.055929671267835365),\n", " (399, 0.05581455721859476),\n", " (253, 0.055555555555555566),\n", " (863, 0.055555555555555566),\n", " (1585, 0.05555555555555556),\n", " (719, 0.05555555555555555),\n", " (1016, 0.05555555555555555),\n", " (976, 0.055441595321592964),\n", " (1619, 0.05532833351724881),\n", " (392, 0.0553001263609331),\n", " (1479, 0.05499719409228703),\n", " (1137, 0.0549857399228215),\n", " (1362, 0.0549857399228215),\n", " (1019, 0.05482930791331408),\n", " (482, 0.05463417234226549),\n", " (163, 0.05443310539518174),\n", " (706, 0.05416296424682583),\n", " (1508, 0.05416296424682583),\n", " (778, 0.05392918574411949),\n", " (857, 0.053896805563629545),\n", " (712, 0.05383819020581655),\n", " (1467, 0.05362412100731557),\n", " (1407, 0.05360562674188975),\n", " (547, 0.05358677475370628),\n", " (639, 0.05345835825829868),\n", " (57, 0.05337605126836238),\n", " (1385, 0.053376051268362375),\n", " (770, 0.053250804201891155),\n", " (1104, 0.05324820425539145),\n", " (24, 0.05314940034527339),\n", " (265, 0.05313139275527819),\n", " (282, 0.05292561240249631),\n", " (312, 0.052870105714542946),\n", " (964, 0.052870105714542946),\n", " (36, 0.05281035421037316),\n", " (1503, 0.05274585147528237),\n", " (580, 0.05249506569572601),\n", " (625, 0.05249506569572601),\n", " (990, 0.05243776708047439),\n", " (1174, 0.05243776708047438),\n", " (1605, 0.05213745913371834),\n", " (672, 0.052057920629535355),\n", " (418, 0.05196746370519363),\n", " (1144, 0.05196746370519363),\n", " (745, 0.05184758473652127),\n", " (1015, 0.05184758473652126),\n", " (289, 0.05178237582537449),\n", " (376, 0.05175491695067656),\n", " (1574, 0.05166561721947523),\n", " (1431, 0.051434449987363975),\n", " (759, 0.05128812124138864),\n", " (44, 0.05103103630798288),\n", " (697, 0.05103103630798288),\n", " (463, 0.05099917178017538),\n", " (815, 0.05099917178017538),\n", " (1437, 0.05083285677753489),\n", " (1363, 0.05071505162084871),\n", " (870, 0.050486604280923476),\n", " (1223, 0.050486604280923476),\n", " (54, 0.050435627722400206),\n", " (149, 0.050435627722400206),\n", " (1025, 0.050435627722400206),\n", " (1568, 0.05034538273357869),\n", " (871, 0.0501097352001886),\n", " (907, 0.0501097352001886),\n", " (1146, 0.0501097352001886),\n", " (377, 0.05006645879933536),\n", " (723, 0.05006645879933536),\n", " (503, 0.05006261743217588),\n", " (1365, 0.05000390670782329),\n", " (1239, 0.04996877926639076),\n", " (1570, 0.04989036167410414),\n", " (1242, 0.04975185951049945),\n", " (943, 0.04969039949999533),\n", " (1119, 0.04969039949999533),\n", " (51, 0.049690399499995326),\n", " (59, 0.049690399499995326),\n", " (221, 0.049690399499995326),\n", " (705, 0.049669963389939134),\n", " (132, 0.049624277847951134),\n", " (1513, 0.04958847036804648),\n", " (698, 0.049587200220373454),\n", " (453, 0.04936240647992436),\n", " (549, 0.04936240647992436),\n", " (1138, 0.04928141806363819),\n", " (1474, 0.04928141806363819),\n", " (942, 0.04914731871829904),\n", " (622, 0.049104637582399135),\n", " (1258, 0.049104637582399135),\n", " (937, 0.04904082386137497),\n", " (780, 0.04899539464934427),\n", " (1579, 0.048970210687439175),\n", " (10, 0.04888237167378443),\n", " (828, 0.04835490443213829),\n", " (1356, 0.04794633014853843),\n", " (838, 0.04787374913511885),\n", " (1416, 0.04787374913511885),\n", " (1318, 0.0478689131586294),\n", " (1477, 0.0478689131586294),\n", " (1164, 0.04778184825674965),\n", " (1013, 0.04763849587291913),\n", " (1115, 0.04746792363392696),\n", " (142, 0.047467923633926955),\n", " (872, 0.04745789978762495),\n", " (914, 0.04740667699904787),\n", " (932, 0.04740667699904787),\n", " (274, 0.047178209756024825),\n", " (731, 0.04702304105723989),\n", " (1495, 0.04695301415158427),\n", " (120, 0.046829290579084706),\n", " (246, 0.046829290579084706),\n", " (1020, 0.046731012841003686),\n", " (1311, 0.04667600280093366),\n", " (1554, 0.046524210519923545),\n", " (691, 0.04633653645712773),\n", " (1153, 0.046225016352102424),\n", " (1514, 0.04608343863411092),\n", " (1372, 0.04607756775840914),\n", " (501, 0.046004370622823615),\n", " (772, 0.045873490213598364),\n", " (469, 0.04578685464956301),\n", " (55, 0.04567924732183544),\n", " (1169, 0.045643546458763846),\n", " (1222, 0.04564354645876384),\n", " (1092, 0.04562027674200355),\n", " (1139, 0.04562027674200354),\n", " (1677, 0.04546206046583175),\n", " (724, 0.04536092116265145),\n", " (561, 0.04504915855996573),\n", " (946, 0.04504915855996573),\n", " (459, 0.04486515850484713),\n", " (919, 0.04480556340414984),\n", " (911, 0.044743737014268294),\n", " (935, 0.044743737014268294),\n", " (345, 0.0445435403187374),\n", " (1628, 0.0445435403187374),\n", " (994, 0.044480042723635316),\n", " (1475, 0.044480042723635316),\n", " (753, 0.044291166954394494),\n", " (1026, 0.044291166954394494),\n", " (1054, 0.044291166954394494),\n", " (1069, 0.044151078568834795),\n", " (680, 0.04415107856883479),\n", " (433, 0.04410467700208026),\n", " (546, 0.04402254531628119),\n", " (1481, 0.04402254531628119),\n", " (783, 0.04392052305789416),\n", " (281, 0.043863446330651264),\n", " (1520, 0.043846018300165025),\n", " (1184, 0.04376881095324085),\n", " (1434, 0.04376881095324085),\n", " (1062, 0.04366140266629866),\n", " (372, 0.04358136336404089),\n", " (686, 0.04358136336404089),\n", " (1611, 0.04351941398892446),\n", " (1540, 0.0433816005246128),\n", " (1225, 0.043121240805683414),\n", " (205, 0.04303314829119352),\n", " (471, 0.04303314829119352),\n", " (637, 0.04303314829119352),\n", " (1001, 0.04303314829119352),\n", " (1170, 0.04303314829119352),\n", " (792, 0.04286204165613664),\n", " (1032, 0.042766686606638946),\n", " (452, 0.04269295996488),\n", " (1078, 0.042686101107101804),\n", " (409, 0.04256282653793743),\n", " (1263, 0.04252586358998573),\n", " (340, 0.04251569682960854),\n", " (63, 0.04250511420422255),\n", " (172, 0.04250511420422255),\n", " (258, 0.04250511420422255),\n", " (405, 0.04250511420422255),\n", " (1547, 0.04250511420422255),\n", " (977, 0.04244763599780089),\n", " (1051, 0.04236071398127907),\n", " (437, 0.04224828336829853),\n", " (1178, 0.04222003309207491),\n", " (884, 0.04219747362848612),\n", " (1289, 0.04219747362848612),\n", " (1312, 0.04209845712841021),\n", " (1071, 0.04187657563580051),\n", " (92, 0.04177559931443569),\n", " (321, 0.0417482860723433),\n", " (669, 0.0417482860723433),\n", " (1077, 0.04171884786014657),\n", " (1460, 0.04171884786014657),\n", " (114, 0.04166666666666667),\n", " (374, 0.04166666666666667),\n", " (563, 0.04166666666666667),\n", " (1527, 0.04156288900448471),\n", " (1255, 0.04129955231527935),\n", " (1180, 0.041256147624028086),\n", " (636, 0.041239304942116126),\n", " (518, 0.041030496993110906),\n", " (1682, 0.041030496993110906),\n", " (710, 0.040841502450816955),\n", " (922, 0.04080850890619932),\n", " (985, 0.04080850890619932),\n", " (1220, 0.040799337424140304),\n", " (1565, 0.040799337424140304),\n", " (650, 0.04051805319385961),\n", " (1355, 0.04039836272875922),\n", " (260, 0.040348502177920166),\n", " (360, 0.040348502177920166),\n", " (1036, 0.040348502177920166),\n", " (1669, 0.040348502177920166),\n", " (510, 0.04009376869372401),\n", " (1167, 0.04009376869372401),\n", " (241, 0.04003203845127179),\n", " (283, 0.04003203845127179),\n", " (790, 0.04003203845127178),\n", " (523, 0.03991228933928331),\n", " (873, 0.03991228933928331),\n", " (1284, 0.03991228933928331),\n", " (1182, 0.03981820688062471),\n", " (1257, 0.03969942227836091),\n", " (6, 0.03965257928590721),\n", " (353, 0.03965257928590721),\n", " (403, 0.03965257928590721),\n", " (902, 0.03965257928590721),\n", " (1499, 0.03955938860646178),\n", " (1179, 0.039553704377880604),\n", " (1388, 0.03946685189819293),\n", " (1111, 0.039415314153366854),\n", " (499, 0.03937496154790789),\n", " (343, 0.03928371006591931),\n", " (1168, 0.03928371006591931),\n", " (1539, 0.03928371006591931),\n", " (1635, 0.03928371006591931),\n", " (601, 0.03918180662561556),\n", " (1330, 0.03915341547791163),\n", " (1084, 0.039080692117369954),\n", " (618, 0.038924947208076155),\n", " (1411, 0.038924947208076155),\n", " (1412, 0.038924947208076155),\n", " (748, 0.03889666900077805),\n", " (1112, 0.03889666900077805),\n", " (1201, 0.03889666900077805),\n", " (1300, 0.038880789567986955),\n", " (95, 0.03877017543326962),\n", " (1192, 0.03877017543326962),\n", " (45, 0.03868392354571063),\n", " (1638, 0.03864490798001402),\n", " (853, 0.03829899930809508),\n", " (1128, 0.038152379685825785),\n", " (382, 0.037917979599440835),\n", " (302, 0.03774256780481986),\n", " (918, 0.03774256780481986),\n", " (1121, 0.037742567804819854),\n", " (1008, 0.03766217885773547),\n", " (1083, 0.03745554791462456),\n", " (1488, 0.03742405942825599),\n", " (1067, 0.03736323588785367),\n", " (1375, 0.037267799624996496),\n", " (1123, 0.037230587634064186),\n", " (1143, 0.03703703703703704),\n", " (1082, 0.037037037037037035),\n", " (13, 0.03696106354772864),\n", " (1010, 0.036866750907288734),\n", " (848, 0.03668545443023433),\n", " (1124, 0.036661778755338326),\n", " (244, 0.03657927344217893),\n", " (1264, 0.03657927344217892),\n", " (1616, 0.0365331082900794),\n", " (112, 0.03647400912770071),\n", " (235, 0.036369648372665396),\n", " (330, 0.036369648372665396),\n", " (478, 0.036369648372665396),\n", " (609, 0.036369648372665396),\n", " (773, 0.036369648372665396),\n", " (810, 0.036369648372665396),\n", " (607, 0.03636964837266539),\n", " (1295, 0.03636964837266539),\n", " (1415, 0.03636964837266539),\n", " (1319, 0.03626617832410372),\n", " (1354, 0.03608439182435161),\n", " (1600, 0.03608439182435161),\n", " (758, 0.036049204695725014),\n", " (8, 0.03594425773447947),\n", " (261, 0.035761756285239546),\n", " (1271, 0.035761756285239546),\n", " (156, 0.03573708449459316),\n", " (653, 0.03573708449459316),\n", " (722, 0.03558403417890825),\n", " (441, 0.03546902211494786),\n", " (1140, 0.03546902211494786),\n", " (1043, 0.03537302999816741),\n", " (1217, 0.03537302999816741),\n", " (1215, 0.03533326266687867),\n", " (1465, 0.03528374160166421),\n", " (694, 0.03520166719043814),\n", " (1630, 0.03513641844631533),\n", " (46, 0.03508966337777163),\n", " (231, 0.03500700210070025),\n", " (291, 0.03500700210070025),\n", " (272, 0.034904492357641294),\n", " (1622, 0.034904492357641294),\n", " (1093, 0.03487900532075779),\n", " (299, 0.03456505649101418),\n", " (341, 0.03456505649101418),\n", " (717, 0.03456505649101418),\n", " (1406, 0.03454277801206993),\n", " (257, 0.03450327796711771),\n", " (1472, 0.03450327796711771),\n", " (1648, 0.0344540929414468),\n", " (89, 0.03442651863295482),\n", " (430, 0.03442651863295482),\n", " (358, 0.034289633324909315),\n", " (250, 0.0342787529079053),\n", " (130, 0.03425943549137658),\n", " (565, 0.03425943549137658),\n", " (1410, 0.034192080827592425),\n", " (420, 0.034154367971904),\n", " (230, 0.034106062877919084),\n", " (1452, 0.03406416781097072),\n", " (388, 0.03402069087198859),\n", " (368, 0.034020690871988585),\n", " (1647, 0.034020690871988585),\n", " (509, 0.03388857118502326),\n", " (1419, 0.03385185265426614),\n", " (428, 0.0337868689199743),\n", " (1302, 0.03376837076971283),\n", " (837, 0.03375797890278889),\n", " (1533, 0.03361463227264072),\n", " (113, 0.033501260508640406),\n", " (851, 0.033501260508640406),\n", " (981, 0.033501260508640406),\n", " (1490, 0.033501260508640406),\n", " (732, 0.033333333333333326),\n", " (84, 0.03328012765961966),\n", " (1379, 0.03327791628198609),\n", " (129, 0.03312693299999689),\n", " (1613, 0.03312693299999689),\n", " (1377, 0.033122026396548515),\n", " (300, 0.03311330892662609),\n", " (1655, 0.03311330892662609),\n", " (1259, 0.03305898024536432),\n", " (16, 0.03289758474798845),\n", " (40, 0.03289758474798845),\n", " (342, 0.03289758474798845),\n", " (631, 0.03289758474798845),\n", " (1366, 0.03288904324849411),\n", " (1663, 0.0328124679565899),\n", " (556, 0.032660908402900854),\n", " (1676, 0.03264680712495945),\n", " (1109, 0.032585911958573965),\n", " (354, 0.03253000243161777),\n", " (94, 0.03247849012308155),\n", " (485, 0.032414442555087686),\n", " (303, 0.03229101076217202),\n", " (1339, 0.03222454963485546),\n", " (1234, 0.032186969972351175),\n", " (1275, 0.03214653124210248),\n", " (1059, 0.032075014954979206),\n", " (1189, 0.032075014954979206),\n", " (1030, 0.031863296370911356),\n", " (882, 0.03185456550449977),\n", " (1470, 0.03185456550449977),\n", " (1591, 0.03178208630818641),\n", " (544, 0.03172445781778671),\n", " (96, 0.031686212526223896),\n", " (671, 0.031686212526223896),\n", " (841, 0.031686212526223896),\n", " (1029, 0.03165571568323276),\n", " (150, 0.031497039417435604),\n", " (916, 0.031497039417435604),\n", " (19, 0.031322732382329306),\n", " (206, 0.031322732382329306),\n", " (684, 0.03131121455425748),\n", " (227, 0.031219527052723135),\n", " (823, 0.031219527052723135),\n", " (1464, 0.031219527052723135),\n", " (1433, 0.031128640318234518),\n", " (768, 0.03111733520062244),\n", " (905, 0.03111733520062244),\n", " (1607, 0.03111733520062244),\n", " (1586, 0.031056499687497078),\n", " (1304, 0.031056499687497074),\n", " (209, 0.03099200013773341),\n", " (160, 0.030949223029508643),\n", " (371, 0.030949223029508643),\n", " (239, 0.030816677568068284),\n", " (612, 0.030816677568068284),\n", " (15, 0.03077287274483318),\n", " (1335, 0.03077287274483318),\n", " (1270, 0.030718378505606093),\n", " (1260, 0.03061326493211021),\n", " (1530, 0.03061326493211021),\n", " (329, 0.03052456976637534),\n", " (1110, 0.030513909884867668),\n", " (39, 0.030429030972509232),\n", " (200, 0.030429030972509232),\n", " (1175, 0.03033438367955267),\n", " (822, 0.030261376633440126),\n", " (847, 0.030261376633440126),\n", " (1484, 0.030261376633440126),\n", " (599, 0.030147708776408974),\n", " (310, 0.03012923025259114),\n", " (1187, 0.03007032652029301),\n", " (136, 0.030055654488914414),\n", " (529, 0.030055654488914414),\n", " (581, 0.030055654488914414),\n", " (1046, 0.03001176691940082),\n", " (107, 0.02996443833169965),\n", " (413, 0.029934217004462485),\n", " (1292, 0.029895666805590637),\n", " (1417, 0.029895666805590634),\n", " (26, 0.02987404766321472),\n", " (1516, 0.029784470107251354),\n", " (530, 0.02978447010725135),\n", " (740, 0.029695693545824933),\n", " (613, 0.02961744388795462),\n", " (923, 0.02961744388795462),\n", " (1235, 0.029607706112288502),\n", " (1294, 0.029607706112288502),\n", " (1538, 0.029607706112288502),\n", " (1408, 0.029520496184669842),\n", " (1454, 0.029520496184669842),\n", " (926, 0.029462782549439483),\n", " (444, 0.029434052379223197),\n", " (1186, 0.029434052379223197),\n", " (154, 0.02934836354418746),\n", " (614, 0.02934836354418746),\n", " (1296, 0.02934836354418746),\n", " (20, 0.029310519088027467),\n", " (641, 0.02926341875374314),\n", " (885, 0.02926341875374314),\n", " (1521, 0.0292262759929956),\n", " (602, 0.029160592175990215),\n", " (951, 0.029160592175990215),\n", " (702, 0.029095718698132315),\n", " (1125, 0.029095718698132315),\n", " (541, 0.029012942659282975),\n", " (646, 0.029012942659282975),\n", " (1358, 0.029012942659282975),\n", " (1392, 0.029012942659282975),\n", " (1492, 0.029012942659282972),\n", " (1349, 0.028939954008661634),\n", " (598, 0.028930869106849753),\n", " (730, 0.02886751345948129),\n", " (1364, 0.0288494881605277),\n", " (1074, 0.02875273163926476),\n", " (324, 0.02868876552746235),\n", " (1063, 0.02868876552746235),\n", " (1091, 0.02868876552746235),\n", " (1522, 0.02868876552746235),\n", " (1656, 0.02868876552746235),\n", " (1594, 0.02859992596060751),\n", " (476, 0.028583097523751475),\n", " (901, 0.028583097523751475),\n", " (1023, 0.028583097523751475),\n", " (1422, 0.028452639985087996),\n", " (784, 0.028444006199428717),\n", " (1668, 0.028444006199428717),\n", " (679, 0.028375217691958287),\n", " (1432, 0.028375217691958287),\n", " (86, 0.028306925853614898),\n", " (1681, 0.02825264598420043),\n", " (1251, 0.02822225044448678),\n", " (814, 0.028171808490950558),\n", " (868, 0.028171808490950558),\n", " (204, 0.0280717307022173),\n", " (1219, 0.028038607704602213),\n", " (1172, 0.02799736837105387),\n", " (1022, 0.027923593886113034),\n", " (1632, 0.027923593886113034),\n", " (27, 0.02790727860929738),\n", " (47, 0.027777777777777783),\n", " (464, 0.027777777777777783),\n", " (586, 0.027777777777777783),\n", " (592, 0.027777777777777783),\n", " (957, 0.027777777777777783),\n", " (1683, 0.027777777777777783),\n", " (1666, 0.027705721164112757),\n", " (1088, 0.02756327435315744),\n", " (1171, 0.02756327435315744),\n", " (48, 0.02749286996141075),\n", " (1494, 0.02749286996141075),\n", " (306, 0.02742300232632424),\n", " (504, 0.02742300232632424),\n", " (887, 0.02742300232632424),\n", " (707, 0.02739983121755955),\n", " (913, 0.02739983121755955),\n", " (439, 0.02721655269759087),\n", " (1441, 0.027194805548534904),\n", " (1439, 0.02718636239135184),\n", " (4, 0.02715627232967842),\n", " (240, 0.02715627232967842),\n", " (190, 0.027081482123412916),\n", " (969, 0.027036903521793762),\n", " (1599, 0.027036903521793762),\n", " (264, 0.026948402781814772),\n", " (721, 0.026948402781814772),\n", " (1237, 0.026948402781814772),\n", " (1469, 0.026948402781814772),\n", " (391, 0.026919095102908276),\n", " (605, 0.026919095102908276),\n", " (412, 0.026882594618183726),\n", " (793, 0.026688025634181187),\n", " (1542, 0.026574700172636696),\n", " (839, 0.02646280620124816),\n", " (85, 0.026435052857271473),\n", " (271, 0.026372925737641186),\n", " (585, 0.026352313834736497),\n", " (1291, 0.026352313834736497),\n", " (1651, 0.026352313834736497),\n", " (234, 0.026243194054073903),\n", " (971, 0.026243194054073903),\n", " (387, 0.026189140043946204),\n", " (764, 0.02602896031476768),\n", " (1278, 0.02602896031476768),\n", " (1436, 0.025949964805384102),\n", " (494, 0.02592379236826063),\n", " (1309, 0.02592379236826063),\n", " (597, 0.025819888974716113),\n", " (1430, 0.025774822434235307),\n", " (1670, 0.025774822434235307),\n", " (278, 0.025717224993681988),\n", " (1081, 0.025615775978928),\n", " (1519, 0.02560317908808562),\n", " (754, 0.02551551815399144),\n", " (1228, 0.025490637096729087),\n", " (421, 0.025434919790550525),\n", " (1643, 0.025434919790550525),\n", " (998, 0.025392818139000215),\n", " (1620, 0.025342864042323523),\n", " (1640, 0.02532457254658621),\n", " (875, 0.025269934785963444),\n", " (1609, 0.02521564914907446),\n", " (690, 0.025125945381480302),\n", " (1346, 0.025108119238490316),\n", " (1348, 0.025108119238490316),\n", " (174, 0.0250548676000943),\n", " (381, 0.0250548676000943),\n", " (396, 0.0250548676000943),\n", " (266, 0.02503130871608794),\n", " (716, 0.02503130871608794),\n", " (67, 0.024845199749997667),\n", " (66, 0.024845199749997663),\n", " (495, 0.024845199749997663),\n", " (262, 0.024753688574416854),\n", " (1359, 0.024753688574416854),\n", " (933, 0.0246631811833495),\n", " (1515, 0.024640709031819096),\n", " (1526, 0.024540339127743675),\n", " (301, 0.024441185836892216),\n", " (1202, 0.024392057775486746),\n", " (81, 0.024246432248443594),\n", " (895, 0.02414022747926338),\n", " (904, 0.024056261216234408),\n", " (866, 0.024056261216234404),\n", " (750, 0.023973165074269213),\n", " (589, 0.023916533444472508),\n", " (806, 0.02389092412837483),\n", " (76, 0.023870495801314433),\n", " (313, 0.023870495801314433),\n", " (813, 0.023870495801314433),\n", " (831, 0.023870495801314433),\n", " (1154, 0.023733961816963477),\n", " (506, 0.023728949893812475),\n", " (989, 0.023728949893812475),\n", " (1157, 0.023688968483956713),\n", " (979, 0.02364918849202011),\n", " (947, 0.023570226039551584),\n", " (1343, 0.02349204928674698),\n", " (311, 0.023262105259961773),\n", " (451, 0.023262105259961773),\n", " (512, 0.023262105259961773),\n", " (1004, 0.023262105259961773),\n", " (980, 0.023168268228563865),\n", " (1162, 0.023112508176051212),\n", " (1049, 0.023084778702148128),\n", " (938, 0.02303878387920457),\n", " (1287, 0.02303878387920457),\n", " (1061, 0.023002185311411807),\n", " (1114, 0.023002185311411807),\n", " (1443, 0.023002185311411807),\n", " (1402, 0.022893427324781505),\n", " (267, 0.02283962366091772),\n", " (355, 0.02283962366091772),\n", " (214, 0.022821773229381923),\n", " (1113, 0.022799518935225645),\n", " (180, 0.022680460581325727),\n", " (97, 0.022680460581325723),\n", " (736, 0.022680460581325723),\n", " (840, 0.022680460581325723),\n", " (1173, 0.022680460581325723),\n", " (1281, 0.022680460581325723),\n", " (1684, 0.02261078158230673),\n", " (801, 0.02254174086668581),\n", " (1212, 0.02254174086668581),\n", " (514, 0.022524579279982866),\n", " (1132, 0.022524579279982866),\n", " (832, 0.02240553574741104),\n", " ...]" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(list(enumerate(similarity[0])), reverse=True, key=lambda x: x[1])" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Creating Recommendations :" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleyear_of_releasegenresimdb_ratingimdb_votessummarytags
145https://upload.wikimedia.org/wikipedia/en/thum...Andhadhun2018[Crime, Thriller]8.451615.0[A, series, of, mysterious, events, changes, t...crime thriller akash a blind pianist is prepar...
\n", "
" ], "text/plain": [ " poster_path original_title \\\n", "145 https://upload.wikimedia.org/wikipedia/en/thum... Andhadhun \n", "\n", " year_of_release genres imdb_rating imdb_votes \\\n", "145 2018 [Crime, Thriller] 8.4 51615.0 \n", "\n", " summary \\\n", "145 [A, series, of, mysterious, events, changes, t... \n", "\n", " tags \n", "145 crime thriller akash a blind pianist is prepar... " ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[movies[\"original_title\"] == \"Andhadhun\"] # Returns Row" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "145" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[movies[\"original_title\"] == \"Andhadhun\"].index[0] # Return Index" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Raag Desh'" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.iloc[218].original_title" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "def recommended_movies(mov):\n", " idx = movies[movies[\"original_title\"] == mov].index[0]\n", " corr = similarity[idx]\n", " rec = sorted(list(enumerate(corr)), reverse=True, key=lambda x: x[1])[1:6]\n", "\n", " for i in rec:\n", " print(movies.iloc[i[0]].original_title)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://upload.wikimedia.org/wikipedia/en/thumb/3/3b/URI_-_New_poster.jpg/220px-URI_-_New_poster.jpg'" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies.iloc[0].poster_path" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Armaan\n", "Haider\n", "Chehraa\n", "Rukh\n", "London Dreams\n" ] } ], "source": [ "recommended_movies(\"Andhadhun\")" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
poster_pathoriginal_titleis_adultyear_of_releasegenresimdb_ratingimdb_votesstorysummaryactorstitletags
5https://upload.wikimedia.org/wikipedia/en/thum...Soni02018[Drama]7.21595.0[Soni, a, young, policewoman, in, Delhi, and, ...[While, fighting, crimes, against, women, in, ...[GeetikaVidyaOhlyan, SaloniBatra, VikasShukla,...[Soni][Drama, Soni, a, young, policewoman, in, Delhi...
\n", "
" ], "text/plain": [ " poster_path original_title is_adult \\\n", "5 https://upload.wikimedia.org/wikipedia/en/thum... Soni 0 \n", "\n", " year_of_release genres imdb_rating imdb_votes \\\n", "5 2018 [Drama] 7.2 1595.0 \n", "\n", " story \\\n", "5 [Soni, a, young, policewoman, in, Delhi, and, ... \n", "\n", " summary \\\n", "5 [While, fighting, crimes, against, women, in, ... \n", "\n", " actors title \\\n", "5 [GeetikaVidyaOhlyan, SaloniBatra, VikasShukla,... [Soni] \n", "\n", " tags \n", "5 [Drama, Soni, a, young, policewoman, in, Delhi... " ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[data[\"original_title\"]==\"Soni\"]" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "from pickle import dump" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "dump(movies.to_dict(), open(\"movie_info.pkl\", 'wb'))" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Uri: The Surgical Strike', 'Battalion 609',\n", " 'The Accidental Prime Minister', ..., 'Sabse Bada Sukh', 'Daaka',\n", " 'Humsafar'], dtype=object)" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[\"original_title\"].values" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "dump(similarity, open(\"similarity.pkl\", 'wb'))" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Series([], Name: poster_path, dtype: object)" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[data[\"original_title\"]==\"Smuggler\"].poster_path" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'A series of mysterious events changes the life of a blind pianist who must now report a crime that he should technically know nothing of.'" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "idx = movies[movies[\"original_title\"] == \"Andhadhun\"].index[0]\n", "\" \".join(movies.iloc[idx].summary)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "706654849fe4d07e215a38f448ee8e5d780794e2be3793e11d37ab3169b306ae" } } }, "nbformat": 4, "nbformat_minor": 2 }