File size: 1,465 Bytes
81898e8
744fb8a
 
 
 
 
565dc8e
81898e8
565dc8e
 
744fb8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565dc8e
 
 
 
 
 
21884ee
565dc8e
81898e8
 
 
744fb8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import os
import torch
from torchvision import transforms
from PIL import Image
from torchvision.datasets import CIFAR100
from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

cifar100 = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False)

IMG_SIZE = 32 if torch.cuda.is_available() else 32
COMPOSED_TRANSFORMERS = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
])

NORMALIZE_TENSOR = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)


def np_array_to_tensor_image(img, width=IMG_SIZE, height=IMG_SIZE, device='cpu'):
    image = Image.fromarray(img).convert('RGB').resize((width, height))
    image = COMPOSED_TRANSFORMERS(image).unsqueeze(0)
    return image.to(device, torch.float)


def normalize_tensor(tensor: torch.tensor) -> torch.tensor:
    return NORMALIZE_TENSOR(tensor)


def send_inputs(img):
    ##img = np_array_to_tensor_image(img)
    ##img = normalize_tensor(img)
    inputs = processor(images=img, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    print(probs)
    return probs


if __name__ == "__main__":
    gr.Interface(fn=send_inputs, inputs=["image"], outputs="text").launch()