Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from PIL import Image | |
from torchvision.transforms import functional as F | |
from typing import List | |
from transformers import CLIPModel, CLIPProcessor | |
# Load the pre-trained model | |
model_path = "1024_MLP_best-MSE4.1636_ep75.pth" | |
model = torch.load(model_path) | |
model.eval() | |
# Load the CLIP model and processor | |
clip_model = CLIPModel.from_pretrained("ViT-L/14") | |
clip_processor = CLIPProcessor.from_pretrained("ViT-L/14") | |
# Define the prediction function | |
def predict(images: List[Image.Image]) -> float: | |
image_tensors = [F.to_tensor(img) for img in images] | |
inputs = clip_processor(images=image_tensors, return_tensors="pt", padding=True) | |
with torch.no_grad(): | |
outputs = model(inputs.pixel_values) | |
scores = outputs.clamp(0, 10).cpu().numpy().reshape(-1).tolist() | |
return scores | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=predict, | |
inputs="image", | |
outputs="number", | |
title="Kemono Aesthetic Scorer", | |
description="Predict the score of a kemono based on aesthetic features.", | |
) | |
# Run the Gradio interface | |
iface.launch() | |