Spaces:
Runtime error
Runtime error
# Commented out IPython magic to ensure Python compatibility. | |
# %%capture | |
# !pip install -U sentence-transformers | |
# !pip install gradio chromadb | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer, util | |
import ast | |
from ast import literal_eval | |
import chromadb | |
from chromadb.utils import embedding_functions | |
import gdown | |
url = 'https://drive.google.com/uc?id=' | |
file_id = '1MgM3iObIAdqA-SvI-pXeUeXEiEAuMzXw' | |
output = '25k IMDb movie Dataset.csv' | |
gdown.download(url+file_id, output, quiet=False) | |
df = pd.read_csv(output) | |
def concatenar_lista(lista): | |
lista = literal_eval(lista) | |
return ' '.join(lista) | |
def string_to_list(lista): | |
lista = literal_eval(lista) | |
return lista | |
df = df.fillna(' ') | |
df['Keywords'] = df['Plot Kyeword'].apply(concatenar_lista) | |
df['Stars'] = df['Top 5 Casts'].apply(concatenar_lista) | |
df['Generes'] = df['Generes'].apply(string_to_list) | |
df['Rating'] = pd.to_numeric(df['Rating'], errors="coerce").fillna(0).astype("float") | |
unique_generes = df['Generes'].explode().unique() | |
df.drop(['Plot Kyeword','Top 5 Casts'],axis=1, inplace=True) | |
df['text'] = df.apply(lambda x: str(x['Overview']) + ' ' + x['Keywords'] + ' ' + x['Stars'], axis=1) | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True) | |
df['embeddings'] = embeddings.tolist() | |
df['ids'] = df.index | |
df['ids'] = df['ids'].astype('str') | |
client_persistent = chromadb.PersistentClient(path='data_embeddings') | |
db = client_persistent.create_collection(name='movies_db') | |
df['Generes'] = df['Generes'].apply(lambda x: ', '.join(x)) | |
from torch import embedding | |
db.add( | |
ids = df['ids'].tolist(), | |
embeddings = df['embeddings'].tolist(), | |
metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1).to_dict('records') | |
) | |
from chromadb.api.types import Metadatas | |
def search(query, genre, rating, num): | |
num = int(num) | |
if rating: | |
filter_rating = rating | |
else: | |
filter_rating = 0 | |
if genre: | |
conditions = { | |
"$and": [ | |
{"Generes": genre}, | |
{"Rating": {"$gte": filter_rating}} | |
] | |
} | |
else: | |
conditions = { | |
"Rating": {"$gte": filter_rating} | |
} | |
responses = db.query( | |
query_texts=[query], | |
n_results=num, | |
where=conditions, | |
include=['metadatas'] | |
) | |
response_data = [] | |
for response in responses['metadatas']: | |
for metadata in response: | |
if not isinstance(genre, list): | |
genre = [genre] | |
response_data.append({ | |
'Title': metadata['movie title'], | |
'Overview': metadata['Overview'], | |
'Director': metadata['Director'], | |
'Stars': metadata['Stars'], | |
'Genre': metadata['Generes'], | |
'year': metadata['year'], | |
'Rating': metadata['Rating'] | |
}) | |
df = pd.DataFrame(response_data) | |
return df | |
import gradio as gr | |
genres = unique_generes.tolist() | |
iface = gr.Interface( | |
fn=search, | |
inputs=[ | |
gr.Textbox(lines=5, placeholder="Escribe aquí tu consulta...", label="Consulta"), | |
gr.Dropdown(choices=genres, label="Género de la película"), | |
gr.Slider(minimum=1, maximum=10, value=5, label="Puntuación mínima"), | |
gr.Number(minimum=1, maximum=10, value=3, label="Número de resultados") | |
], | |
outputs=gr.Dataframe(type="pandas",label="Resultados"), | |
title="Buscador de películas", | |
description="Introduce tu consulta (en INGLES), selecciona un género y define una puntuación mínima para buscar películas." | |
) | |
iface.launch(share=False) |