""" Movie Recommendation System - HuggingFace Gradio Interface This application provides movie recommendations using three different algorithms: 1. Item-Based Collaborative Filtering 2. SVD (Singular Value Decomposition) 3. NMF (Non-negative Matrix Factorization) Author: Movie Recommendation System Date: 2025 """ import gradio as gr import pandas as pd import pickle import numpy as np from pathlib import Path from typing import List, Dict import warnings from huggingface_hub import hf_hub_download warnings.filterwarnings('ignore') class MovieRecommender: """ Unified movie recommendation system supporting multiple algorithms. """ def __init__(self): """Initialize the recommender system.""" self.models = {} self.movie_info = None self.load_movie_data() def load_movie_data(self): """ Load movie metadata from CSV file. Loads movie titles and genres for display in recommendations. """ try: movies_df = pd.read_csv('movies.csv') self.movie_info = movies_df.set_index('movieId')[['title', 'genres']].to_dict('index') print(f"Loaded {len(self.movie_info)} movies") except Exception as e: print(f"Error loading movies: {e}") self.movie_info = {} def load_model(self, model_type: str): """ Load a trained recommendation model from HuggingFace Models repository. Args: model_type: Type of model ('item_cf', 'svd', 'nmf') Returns: bool: True if loaded successfully, False otherwise """ model_files = { 'item_cf': 'surprise_item_cf.pkl', 'svd': 'funk_svd.pkl', 'nmf': 'nmf_model.pkl' } if model_type in self.models: return True try: print(f"Downloading {model_type} model from HuggingFace...") model_path = hf_hub_download( repo_id="MdzBeast/movie-recommendation-models", filename=model_files[model_type], repo_type="model" ) with open(model_path, 'rb') as f: model_data = pickle.load(f) self.models[model_type] = model_data print(f"Loaded {model_type} model successfully") return True except Exception as e: print(f"Error loading {model_type} model: {e}") return False def get_user_rated_items(self, user_id: int, trainset) -> set: """ Get all items rated by a specific user. Args: user_id: User ID trainset: Surprise trainset object Returns: set: Set of movie IDs rated by the user """ try: inner_uid = trainset.to_inner_uid(user_id) rated_items = set() for (iid, rating) in trainset.ur[inner_uid]: rated_items.add(trainset.to_raw_iid(iid)) return rated_items except ValueError: return set() def recommend_item_cf(self, user_id: int, n: int) -> List[Dict]: """ Generate recommendations using Item-Based Collaborative Filtering. Args: user_id: User ID to generate recommendations for n: Number of recommendations to return Returns: List of dictionaries containing movie recommendations """ if 'item_cf' not in self.models: return [] model_data = self.models['item_cf'] model = model_data['model'] trainset = model_data['trainset'] try: inner_uid = trainset.to_inner_uid(user_id) except ValueError: return [{"error": f"User {user_id} not found in training data"}] rated_items = self.get_user_rated_items(user_id, trainset) all_items = [iid for iid in trainset.all_items()] candidates = [iid for iid in all_items if trainset.to_raw_iid(iid) not in rated_items] predictions = [] for inner_iid in candidates: raw_iid = trainset.to_raw_iid(inner_iid) pred = model.predict(user_id, raw_iid) predictions.append({'movieId': raw_iid, 'score': pred.est}) predictions.sort(key=lambda x: x['score'], reverse=True) top_n = predictions[:n] for item in top_n: mid = item['movieId'] if mid in self.movie_info: item['title'] = self.movie_info[mid]['title'] item['genres'] = self.movie_info[mid]['genres'] else: item['title'] = 'Unknown' item['genres'] = 'Unknown' return top_n def recommend_svd(self, user_id: int, n: int) -> List[Dict]: """ Generate recommendations using SVD (Singular Value Decomposition). Args: user_id: User ID to generate recommendations for n: Number of recommendations to return Returns: List of dictionaries containing movie recommendations """ if 'svd' not in self.models: return [] model_data = self.models['svd'] model = model_data['model'] trainset = model_data['trainset'] try: inner_uid = trainset.to_inner_uid(user_id) except ValueError: return [{"error": f"User {user_id} not found in training data"}] rated_items = self.get_user_rated_items(user_id, trainset) all_items = [iid for iid in trainset.all_items()] candidates = [iid for iid in all_items if trainset.to_raw_iid(iid) not in rated_items] predictions = [] for inner_iid in candidates: raw_iid = trainset.to_raw_iid(inner_iid) pred = model.predict(user_id, raw_iid) predictions.append({'movieId': raw_iid, 'score': pred.est}) predictions.sort(key=lambda x: x['score'], reverse=True) top_n = predictions[:n] for item in top_n: mid = item['movieId'] if mid in self.movie_info: item['title'] = self.movie_info[mid]['title'] item['genres'] = self.movie_info[mid]['genres'] else: item['title'] = 'Unknown' item['genres'] = 'Unknown' return top_n def recommend_nmf(self, user_id: int, n: int) -> List[Dict]: """ Generate recommendations using NMF (Non-negative Matrix Factorization). Args: user_id: User ID to generate recommendations for n: Number of recommendations to return Returns: List of dictionaries containing movie recommendations """ if 'nmf' not in self.models: return [] model_data = self.models['nmf'] model = model_data['model'] trainset = model_data['trainset'] try: inner_uid = trainset.to_inner_uid(user_id) except ValueError: return [{"error": f"User {user_id} not found in training data"}] user_factors = model.pu[inner_uid] all_item_factors = model.qi scores = np.dot(all_item_factors, user_factors) min_score = scores.min() max_score = scores.max() scores_normalized = 0.5 + (scores - min_score) / (max_score - min_score) * 4.5 raw_item_ids = [] valid_scores = [] for inner_iid in range(len(scores_normalized)): try: raw_iid = trainset.to_raw_iid(inner_iid) raw_item_ids.append(raw_iid) valid_scores.append(scores_normalized[inner_iid]) except ValueError: continue sorted_indices = np.argsort(valid_scores)[::-1] rated_items = self.get_user_rated_items(user_id, trainset) recommendations = [] for idx in sorted_indices: movie_id = raw_item_ids[idx] if movie_id in rated_items: continue score = valid_scores[idx] if movie_id in self.movie_info: recommendations.append({ 'movieId': int(movie_id), 'title': self.movie_info[movie_id]['title'], 'genres': self.movie_info[movie_id]['genres'], 'score': float(score) }) if len(recommendations) >= n: break return recommendations def recommend_movies(self, user_id: int, N: int, model_type: str = 'svd') -> List[Dict]: """ Generate movie recommendations for a given user. This is the main recommendation function as required by the specification. Args: user_id: User ID to generate recommendations for N: Number of recommendations to return model_type: Algorithm to use ('item_cf', 'svd', 'nmf'). Default is 'svd' Returns: List of dictionaries containing movie recommendations with keys: - movieId: Movie ID - title: Movie title - genres: Movie genres - score: Predicted rating score """ if not self.load_model(model_type): return [{"error": f"Could not load {model_type} model"}] if model_type == 'item_cf': recommendations = self.recommend_item_cf(user_id, N) elif model_type == 'svd': recommendations = self.recommend_svd(user_id, N) elif model_type == 'nmf': recommendations = self.recommend_nmf(user_id, N) else: return [{"error": "Invalid model type. Choose from: item_cf, svd, nmf"}] return recommendations def recommend(self, user_id: int, n: int, model_type: str) -> str: """ Generate recommendations and format as string for Gradio display. Args: user_id: User ID to generate recommendations for n: Number of recommendations to return model_type: Algorithm to use ('item_cf', 'svd', 'nmf') Returns: Formatted string containing recommendations """ recommendations = self.recommend_movies(user_id, n, model_type) if not recommendations: return f"No recommendations available for User {user_id}" if isinstance(recommendations[0], dict) and 'error' in recommendations[0]: return recommendations[0]['error'] output = f"Top-{n} Movie Recommendations for User {user_id} using {model_type.upper()}\n" output += "=" * 80 + "\n\n" for idx, rec in enumerate(recommendations, 1): output += f"{idx}. {rec['title'][:50]}\n" output += f" Score: {rec['score']:.4f}\n" output += f" Genres: {rec['genres']}\n" output += f" Movie ID: {rec['movieId']}\n\n" return output def create_gradio_interface(): """ Create and configure the Gradio interface. Returns: Gradio Blocks interface """ recommender = MovieRecommender() with gr.Blocks(title="Movie Recommendation System", theme=gr.themes.Soft()) as interface: gr.Markdown( """ # Movie Recommendation System Get personalized movie recommendations using state-of-the-art collaborative filtering algorithms. **Available Models:** - **Item-Based CF**: Recommends movies similar to ones you've liked - **SVD**: Matrix factorization approach for latent feature discovery - **NMF**: Non-negative matrix factorization for interpretable recommendations """ ) with gr.Row(): with gr.Column(): user_id_input = gr.Number( label="User ID", value=5, precision=0, info="Enter a user ID from the dataset" ) n_recommendations = gr.Slider( minimum=1, maximum=20, value=10, step=1, label="Number of Recommendations", info="How many movies to recommend" ) model_choice = gr.Radio( choices=['item_cf', 'svd', 'nmf'], value='svd', label="Recommendation Algorithm", info="Choose the algorithm for generating recommendations" ) submit_btn = gr.Button("Get Recommendations", variant="primary") with gr.Column(): output_text = gr.Textbox( label="Recommendations", lines=20, max_lines=30, show_copy_button=True ) submit_btn.click( fn=recommender.recommend, inputs=[user_id_input, n_recommendations, model_choice], outputs=output_text ) gr.Markdown( """ ### Model Descriptions **Item-Based Collaborative Filtering (Item-CF)** - Finds movies similar to ones the user has rated highly - Uses cosine similarity between movie rating patterns - Fast and interpretable **Singular Value Decomposition (SVD)** - Matrix factorization technique that learns latent features - Captures complex user-movie interaction patterns - Often provides the best accuracy **Non-negative Matrix Factorization (NMF)** - Factorizes rating matrix with non-negativity constraints - Produces interpretable latent factors - Good for sparse datasets ### Dataset - MovieLens dataset with user ratings and movie metadata - Ratings scale: 0.5 to 5.0 """ ) return interface if __name__ == "__main__": interface = create_gradio_interface() interface.launch()