Spaces:
Runtime error
Runtime error
import ast | |
import pandas as pd | |
from recommendation_model import Model | |
""" | |
The dataset is obtained from TMDB 5000 Movie Dataset | |
https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata | |
""" | |
def get_name(x): | |
return ', '.join([i['name'].lower() for i in ast.literal_eval(x)][:5]) | |
def get_director(x): | |
return ', '.join(i['name'].lower() for i in ast.literal_eval(x) if i['job'].lower() == 'director') | |
def get_year(x): | |
return str(x)[:4] | |
def normalize_data(x): | |
return (x - x.min()) / (x.max() - x.min()) | |
raw1 = pd.read_csv('tmdb_5000_movies.csv') | |
raw2 = pd.read_csv('tmdb_5000_credits.csv') | |
raw2 = raw2.rename(columns={'movie_id': 'id'}) | |
df = pd.merge(raw1, raw2, on='id') | |
df = df.drop([ | |
'budget', | |
'homepage', | |
'overview', | |
'tagline', | |
'status', | |
'production_companies', | |
'production_countries', | |
'revenue', | |
'spoken_languages', | |
'title_x', | |
'title_y', | |
'vote_count' | |
], axis=1) | |
df['genres'] = df['genres'].map(get_name) | |
df['keywords'] = df['keywords'].map(get_name) | |
df['cast'] = df['cast'].map(get_name) | |
df['crew'] = df['crew'].map(get_director) | |
df['release_date'] = df['release_date'].map(get_year) | |
for i in range(len(df)): | |
df.loc[i, 'id'] = i | |
df = df.rename(columns={ | |
'original_language': 'language', | |
'original_title': 'title', | |
'release_date': 'year', | |
'vote_average': 'rating', | |
'crew': 'director' | |
}) | |
df = df[[ | |
'id', | |
'title', | |
'genres', | |
'keywords', | |
'director', | |
'cast', | |
'year', | |
'language', | |
'runtime', | |
'popularity', | |
'rating' | |
]] | |
df['id'] = df['id'].apply(lambda x: str(x)) | |
df['year'] = df['year'].apply(lambda x: str(x)) | |
df['runtime'] = normalize_data(df['runtime']) | |
df['popularity'] = normalize_data(df['popularity']) | |
df['rating'] = normalize_data(df['rating']) | |
df_trim = df[['title', 'genres', 'keywords', 'director', 'cast']] | |
model = Model(df) | |
model.fit(save=True) | |