File size: 1,576 Bytes
be954f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch
from PIL import Image
import open_clip
from gradio import Interface, inputs, outputs

def start():
    model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
    tokenizer = open_clip.get_tokenizer('ViT-B-32')
    return model, preprocess, tokenizer

def process(model, preprocess, tokenizer, image_path, text):
    if isinstance(image_path, str):
        image = Image.open(image_path)
        image = preprocess(image.resize((512, 512))).unsqueeze(0)
    else:
        image = preprocess(image_path.resize((512, 512))).unsqueeze(0)

    text = tokenizer([text])
    with torch.no_grad(), torch.cuda.amp.autocast():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        text_features /= text_features.norm(dim=-1, keepdim=True)
        similarity = (image_features @ text_features.T) * 100
    return similarity

def predict(image, text):
    model, preprocess, tokenizer = start()
    similarity = process(model, preprocess, tokenizer, image, text)
    return similarity.item()

inputs = [
    inputs.Image(type="pil", label="Image"),
    inputs.Textbox(label="Text")
]

outputs = outputs.Textbox(label="Similarity")

# If you want to run it locally, you can use the following code :(
Interface(fn=predict, inputs=inputs, outputs=outputs).launch()
# If you want to share it online, you can use the following code :)
# Interface(fn=predict, inputs=inputs, outputs=outputs).launch(share=True)