Spaces:

IoriU
/

anime_space_predictor

Runtime error

hk-bt-rnd

Update output gradio

d27c7e7 over 1 year ago

4.22 kB

	import gradio as gr
	import numpy as np
	from PIL import Image
	from matplotlib import cm
	import torch
	from transformers import AutoTokenizer, AutoModel
	from model import ImageModel, TextModel
	import torch.nn.functional as F
	import torchvision.transforms.v2 as transforms

	# Load model directly
	MODEL_NAME = "distilbert/distilroberta-base"
	class_names = ['Action', 'Adventure', 'Comedy', 'Drama', 'Fantasy', 'Romance', 'Sci-Fi']
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	cp = torch.load(r"model_only.pt", map_location="cpu")
	model_img = ImageModel(len(class_names))
	model_img.load_state_dict(cp['w_i'])
	model_text = TextModel(MODEL_NAME, len(class_names))
	model_text.load_state_dict(cp['w_t'])

	image_transforms = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
	])
	def text_predictor(title, synopsis):
	encoded_synopsis = tokenizer(f"{title} </s> {synopsis}", \
	add_special_tokens = True, \
	max_length = 128, \
	padding = "max_length", \
	truncation = True,
	return_tensors='pt')

	with torch.no_grad():
	score, isAward, genres = model_text((encoded_synopsis['input_ids'], encoded_synopsis['attention_mask']))
	score, isAward, genres = score.squeeze(0), F.sigmoid(isAward.squeeze(0)) >= 0.5 , F.sigmoid(genres.squeeze(0))

	preds_name = []
	for prob, cls in zip(genres, class_names):
	if prob >= 0.5:
	preds_name.append(cls)
	return round(score.item(), 2), isAward.item(), {"genres":preds_name}

	def img_predictor(img):
	# Preprocess the image
	img = Image.fromarray(img.astype('uint8'), 'RGB') # Convert NumPy array to PIL Image
	img = image_transforms(img).unsqueeze(0) # Apply transforms and add batch dimension

	# Make predictions
	with torch.no_grad():
	output = model_img(img)
	score, isAward, genres = output[0].squeeze(0), F.sigmoid(output[1].squeeze(0)) >= 0.5, F.sigmoid(output[2].squeeze(0))

	preds_name = []
	for prob, cls in zip(genres, class_names):
	if prob >= 0.5:
	preds_name.append(cls)

	return round(score.item(), 2), isAward.item(), {"genres": preds_name}


	def combine_predictor(title, synopsis, img):
	encoded_synopsis = tokenizer(f"{title} </s> {synopsis}", \
	add_special_tokens = True, \
	max_length = 128, \
	padding = "max_length", \
	truncation = True,
	return_tensors='pt')

	img = Image.fromarray(img.astype('uint8'), 'RGB') # Convert NumPy array to PIL Image
	img = image_transforms(img).unsqueeze(0) # Apply transforms and add batch dimension

	# Make predictions
	with torch.no_grad():
	output_text = model_text((encoded_synopsis['input_ids'], encoded_synopsis['attention_mask']))
	output_img = model_img(img)

	score = (output_img[0].squeeze(0) + output_text[0].squeeze(0))/2
	isAward = F.sigmoid((output_img[1].squeeze(0) + output_text[1].squeeze(0))/2) >= 0.5
	genres = F.sigmoid((output_img[2].squeeze(0) + output_text[2].squeeze(0))/2)
	preds_name = []
	for prob, cls in zip(genres, class_names):
	if prob >= 0.5:
	preds_name.append(cls)

	return round(score.item(), 2), isAward.item(), {"genres": preds_name}

	# iface_1 = gr.Interface(age_predictor_image, gr.Image(height=256, width=256), "json", examples=[["young.webp"], ["old.jpg"]])
	iface_1 = gr.Interface(text_predictor, [gr.Text(placeholder="Input title here"), gr.Text(placeholder="Input synopsis here")], [gr.Label(label='Score'), gr.Label(label='Is Winning Award?'), "json"])

	iface_2 = gr.Interface(img_predictor, gr.Image(height=224, width=224), [gr.Label(label='Score'), gr.Label(label='Is Winning Award?'), "json"])

	iface_3 = gr.Interface(combine_predictor, [gr.Text(placeholder="Input title here"), gr.Text(placeholder="Input synopsis here"), gr.Image(height=224, width=224)], [gr.Label(label='Score'), gr.Label(label='Is Winning Award?'), "json"])
	demo = gr.TabbedInterface([iface_1, iface_2, iface_3], ["From Text", "From Image", "From Text and Image"])
	demo.launch() # Launches the mini app!