import gradio as gr import numpy as np import pandas as pd from scipy.sparse import csr_matrix from sklearn.neighbors import NearestNeighbors def create_matrix(df): N = len(df['userId'].unique()) M = len(df['movieId'].unique()) # Map Ids to indices user_mapper = dict(zip(np.unique(df["userId"]), list(range(N)))) movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M)))) # Map indices to IDs user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"]))) movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"]))) user_index = [user_mapper[i] for i in df['userId']] movie_index = [movie_mapper[i] for i in df['movieId']] X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N)) return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False): neighbour_ids = [] movie_ind = movie_mapper[movie_id] movie_vec = X[movie_ind] k += 1 kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric) kNN.fit(X) movie_vec = movie_vec.reshape(1, -1) neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance) for i in range(0, k): n = neighbour.item(i) neighbour_ids.append(movie_inv_mapper[n]) neighbour_ids.pop(0) return neighbour_ids def recommend_movies(movie_name): movie_id = [k for k, v in movie_titles.items() if movie_name.lower() in v.lower()] if len(movie_id) == 0: return "Movie not found. Please check the spelling and try again" movie_id = movie_id[0] similar_ids = find_similar_movies(movie_id, X, k=10) recommendations = "\n".join([movie_titles[i] for i in similar_ids]) return recommendations # Load data ratings = pd.read_csv("ratings.csv") movies = pd.read_csv("movies.csv") n_ratings = len(ratings) n_movies = len(ratings['movieId'].unique()) n_users = len(ratings['userId'].unique()) user_freq = ratings[['userId', 'movieId']].groupby('userId').count().reset_index() user_freq.columns = ['userId', 'n_ratings'] mean_rating = ratings.groupby('movieId')[['rating']].mean() lowest_rated = mean_rating['rating'].idxmin() highest_rated = mean_rating['rating'].idxmax() movie_stats = ratings.groupby('movieId')[['rating']].agg(['count', 'mean']) movie_stats.columns = movie_stats.columns.droplevel() X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings) movie_titles = dict(zip(movies['movieId'], movies['title'])) # Set up Gradio interface movie_name = gr.inputs.Textbox(label="Movie Name") outputs = gr.outputs.Textbox(label="Recommended Movies", type="text") iface = gr.Interface( fn=recommend_movies, inputs=movie_name, outputs=outputs, theme=gr.themes.Default(primary_hue="slate")) ) iface.launch()