Spaces:
Runtime error
Runtime error
| # Commented out IPython magic to ensure Python compatibility. | |
| # %%capture | |
| # !pip install -U sentence-transformers | |
| # !pip install gradio chromadb | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer, util | |
| import ast | |
| from ast import literal_eval | |
| import chromadb | |
| from chromadb.utils import embedding_functions | |
| import gdown | |
| url = 'https://drive.google.com/uc?id=' | |
| file_id = '1MgM3iObIAdqA-SvI-pXeUeXEiEAuMzXw' | |
| output = '25k IMDb movie Dataset.csv' | |
| gdown.download(url+file_id, output, quiet=False) | |
| df = pd.read_csv(output) | |
| def concatenar_lista(lista): | |
| lista = literal_eval(lista) | |
| return ' '.join(lista) | |
| def string_to_list(lista): | |
| lista = literal_eval(lista) | |
| return lista | |
| df = df.fillna(' ') | |
| df['Keywords'] = df['Plot Kyeword'].apply(concatenar_lista) | |
| df['Stars'] = df['Top 5 Casts'].apply(concatenar_lista) | |
| df['Generes'] = df['Generes'].apply(string_to_list) | |
| df['Rating'] = pd.to_numeric(df['Rating'], errors="coerce").fillna(0).astype("float") | |
| unique_generes = df['Generes'].explode().unique() | |
| df.drop(['Plot Kyeword','Top 5 Casts'],axis=1, inplace=True) | |
| df['text'] = df.apply(lambda x: str(x['Overview']) + ' ' + x['Keywords'] + ' ' + x['Stars'], axis=1) | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True) | |
| df['embeddings'] = embeddings.tolist() | |
| df['ids'] = df.index | |
| df['ids'] = df['ids'].astype('str') | |
| client_persistent = chromadb.PersistentClient(path='data_embeddings') | |
| db = client_persistent.create_collection(name='movies_db') | |
| df['Generes'] = df['Generes'].apply(lambda x: ', '.join(x)) | |
| from torch import embedding | |
| db.add( | |
| ids = df['ids'].tolist(), | |
| embeddings = df['embeddings'].tolist(), | |
| metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1).to_dict('records') | |
| ) | |
| from chromadb.api.types import Metadatas | |
| def search(query, genre, rating, num): | |
| num = int(num) | |
| if rating: | |
| filter_rating = rating | |
| else: | |
| filter_rating = 0 | |
| if genre: | |
| conditions = { | |
| "$and": [ | |
| {"Generes": genre}, | |
| {"Rating": {"$gte": filter_rating}} | |
| ] | |
| } | |
| else: | |
| conditions = { | |
| "Rating": {"$gte": filter_rating} | |
| } | |
| responses = db.query( | |
| query_texts=[query], | |
| n_results=num, | |
| where=conditions, | |
| include=['metadatas'] | |
| ) | |
| response_data = [] | |
| for response in responses['metadatas']: | |
| for metadata in response: | |
| if not isinstance(genre, list): | |
| genre = [genre] | |
| response_data.append({ | |
| 'Title': metadata['movie title'], | |
| 'Overview': metadata['Overview'], | |
| 'Director': metadata['Director'], | |
| 'Stars': metadata['Stars'], | |
| 'Genre': metadata['Generes'], | |
| 'year': metadata['year'], | |
| 'Rating': metadata['Rating'] | |
| }) | |
| df = pd.DataFrame(response_data) | |
| return df | |
| import gradio as gr | |
| genres = unique_generes.tolist() | |
| iface = gr.Interface( | |
| fn=search, | |
| inputs=[ | |
| gr.Textbox(lines=5, placeholder="Escribe aquí tu consulta...", label="Consulta"), | |
| gr.Dropdown(choices=genres, label="Género de la película"), | |
| gr.Slider(minimum=1, maximum=10, value=5, label="Puntuación mínima"), | |
| gr.Number(minimum=1, maximum=10, value=3, label="Número de resultados") | |
| ], | |
| outputs=gr.Dataframe(type="pandas",label="Resultados"), | |
| title="Buscador de películas", | |
| description="Introduce tu consulta (en INGLES), selecciona un género y define una puntuación mínima para buscar películas." | |
| ) | |
| iface.launch(share=False) |