Search_Movie / app.py
rriverar75's picture
Update app.py
b3365e8
# Commented out IPython magic to ensure Python compatibility.
# %%capture
# !pip install -U sentence-transformers
# !pip install gradio chromadb
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import ast
from ast import literal_eval
import chromadb
from chromadb.utils import embedding_functions
import gdown
url = 'https://drive.google.com/uc?id='
file_id = '1MgM3iObIAdqA-SvI-pXeUeXEiEAuMzXw'
output = '25k IMDb movie Dataset.csv'
gdown.download(url+file_id, output, quiet=False)
df = pd.read_csv(output)
def concatenar_lista(lista):
lista = literal_eval(lista)
return ' '.join(lista)
def string_to_list(lista):
lista = literal_eval(lista)
return lista
df = df.fillna(' ')
df['Keywords'] = df['Plot Kyeword'].apply(concatenar_lista)
df['Stars'] = df['Top 5 Casts'].apply(concatenar_lista)
df['Generes'] = df['Generes'].apply(string_to_list)
df['Rating'] = pd.to_numeric(df['Rating'], errors="coerce").fillna(0).astype("float")
unique_generes = df['Generes'].explode().unique()
df.drop(['Plot Kyeword','Top 5 Casts'],axis=1, inplace=True)
df['text'] = df.apply(lambda x: str(x['Overview']) + ' ' + x['Keywords'] + ' ' + x['Stars'], axis=1)
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True)
df['embeddings'] = embeddings.tolist()
df['ids'] = df.index
df['ids'] = df['ids'].astype('str')
client_persistent = chromadb.PersistentClient(path='data_embeddings')
db = client_persistent.create_collection(name='movies_db')
df['Generes'] = df['Generes'].apply(lambda x: ', '.join(x))
from torch import embedding
db.add(
ids = df['ids'].tolist(),
embeddings = df['embeddings'].tolist(),
metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1).to_dict('records')
)
from chromadb.api.types import Metadatas
def search(query, genre, rating, num):
num = int(num)
if rating:
filter_rating = rating
else:
filter_rating = 0
if genre:
conditions = {
"$and": [
{"Generes": genre},
{"Rating": {"$gte": filter_rating}}
]
}
else:
conditions = {
"Rating": {"$gte": filter_rating}
}
responses = db.query(
query_texts=[query],
n_results=num,
where=conditions,
include=['metadatas']
)
response_data = []
for response in responses['metadatas']:
for metadata in response:
if not isinstance(genre, list):
genre = [genre]
response_data.append({
'Title': metadata['movie title'],
'Overview': metadata['Overview'],
'Director': metadata['Director'],
'Stars': metadata['Stars'],
'Genre': metadata['Generes'],
'year': metadata['year'],
'Rating': metadata['Rating']
})
df = pd.DataFrame(response_data)
return df
import gradio as gr
genres = unique_generes.tolist()
iface = gr.Interface(
fn=search,
inputs=[
gr.Textbox(lines=5, placeholder="Escribe aquí tu consulta...", label="Consulta"),
gr.Dropdown(choices=genres, label="Género de la película"),
gr.Slider(minimum=1, maximum=10, value=5, label="Puntuación mínima"),
gr.Number(minimum=1, maximum=10, value=3, label="Número de resultados")
],
outputs=gr.Dataframe(type="pandas",label="Resultados"),
title="Buscador de películas",
description="Introduce tu consulta (en INGLES), selecciona un género y define una puntuación mínima para buscar películas."
)
iface.launch(share=False)