MUTED64 commited on
Commit
88b279a
1 Parent(s): 806212d

Add app and pth

Browse files
Files changed (2) hide show
  1. 1024_MLP_best-MSE4.1636_ep75.pth +3 -0
  2. app.py +36 -0
1024_MLP_best-MSE4.1636_ep75.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617a1c3fe8cfdcfb79fa0df3d46d4673497ca47a8ee43ddd6d6a5027a478ec64
3
+ size 3716120
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from torchvision.transforms import functional as F
5
+ from typing import List
6
+ from transformers import CLIPModel, CLIPProcessor
7
+
8
+ # Load the pre-trained model
9
+ model_path = "1024_MLP_best-MSE4.1636_ep75.pth"
10
+ model = torch.load(model_path)
11
+ model.eval()
12
+
13
+ # Load the CLIP model and processor
14
+ clip_model = CLIPModel.from_pretrained("ViT-L/14")
15
+ clip_processor = CLIPProcessor.from_pretrained("ViT-L/14")
16
+
17
+ # Define the prediction function
18
+ def predict(images: List[Image.Image]) -> float:
19
+ image_tensors = [F.to_tensor(img) for img in images]
20
+ inputs = clip_processor(images=image_tensors, return_tensors="pt", padding=True)
21
+ with torch.no_grad():
22
+ outputs = model(inputs.pixel_values)
23
+ scores = outputs.clamp(0, 10).cpu().numpy().reshape(-1).tolist()
24
+ return scores
25
+
26
+ # Define the Gradio interface
27
+ iface = gr.Interface(
28
+ fn=predict,
29
+ inputs="image",
30
+ outputs="number",
31
+ title="Kemono Aesthetic Scorer",
32
+ description="Predict the score of a kemono based on aesthetic features.",
33
+ )
34
+
35
+ # Run the Gradio interface
36
+ iface.launch()