import faiss import numpy as np import pickle import gradio as gr from datasets import load_dataset dataset = load_dataset("asfilcnx3/clean-embedding-movies") # Cargar datos preprocesados embeddings = np.load("embeddings.npy") with open("titles.pkl", "rb") as f: titles_list = pickle.load(f) # FAISS index dimension = embeddings.shape[1] faiss_index = faiss.IndexFlatL2(dimension) faiss_index.add(embeddings) # Lista de títulos titles_list = [title.lower() for title in dataset["train"]["title"]] # Función auxiliar para encontrar el índice del título def get_index_from_title(title_query, titles_list): try: return titles_list.index(title_query) except ValueError: return None # Función de recomendación def recommend_by_title(title_query): title_query = title_query.lower().strip() idx = get_index_from_title(title_query, titles_list) if idx is None: return "Movie not found. Please check the title and try again." _, indices = faiss_index.search(embeddings[idx:idx+1], 6) similar_titles = [titles_list[i] for i in indices[0] if i != idx] return "\n".join(similar_titles[:5]) # Interfaz Gradio demo = gr.Interface( fn=recommend_by_title, inputs=gr.Textbox(label="Enter a movie title"), outputs=gr.Textbox(label="Recommended Movies"), title="Movie Recommender", description="Type the title of a movie and get 5 similar recommendations based on text embeddings.", examples=[["Interstellar"], ["The Dark Knight"], ["Alien"]] ) demo.launch()