import faiss
import numpy as np
import pickle
import gradio as gr
from datasets import load_dataset

dataset = load_dataset("asfilcnx3/clean-embedding-movies")

# Cargar datos preprocesados
embeddings = np.load("embeddings.npy")
with open("titles.pkl", "rb") as f:
    titles_list = pickle.load(f)

# FAISS index
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)

# Lista de títulos
titles_list = [title.lower() for title in dataset["train"]["title"]]

# Función auxiliar para encontrar el índice del título
def get_index_from_title(title_query, titles_list):
    try:
        return titles_list.index(title_query)
    except ValueError:
        return None

# Función de recomendación
def recommend_by_title(title_query):
    title_query = title_query.lower().strip()
    idx = get_index_from_title(title_query, titles_list)
    if idx is None:
        return "Movie not found. Please check the title and try again."

    _, indices = faiss_index.search(embeddings[idx:idx+1], 6)
    similar_titles = [titles_list[i] for i in indices[0] if i != idx]

    return "\n".join(similar_titles[:5])

# Interfaz Gradio
demo = gr.Interface(
    fn=recommend_by_title,
    inputs=gr.Textbox(label="Enter a movie title"),
    outputs=gr.Textbox(label="Recommended Movies"),
    title="Movie Recommender",
    description="Type the title of a movie and get 5 similar recommendations based on text embeddings.",
    examples=[["Interstellar"], ["The Dark Knight"], ["Alien"]]
)

demo.launch()