Spaces:
Sleeping
Sleeping
File size: 1,576 Bytes
be954f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import torch
from PIL import Image
import open_clip
from gradio import Interface, inputs, outputs
def start():
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
tokenizer = open_clip.get_tokenizer('ViT-B-32')
return model, preprocess, tokenizer
def process(model, preprocess, tokenizer, image_path, text):
if isinstance(image_path, str):
image = Image.open(image_path)
image = preprocess(image.resize((512, 512))).unsqueeze(0)
else:
image = preprocess(image_path.resize((512, 512))).unsqueeze(0)
text = tokenizer([text])
with torch.no_grad(), torch.cuda.amp.autocast():
image_features = model.encode_image(image)
text_features = model.encode_text(text)
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (image_features @ text_features.T) * 100
return similarity
def predict(image, text):
model, preprocess, tokenizer = start()
similarity = process(model, preprocess, tokenizer, image, text)
return similarity.item()
inputs = [
inputs.Image(type="pil", label="Image"),
inputs.Textbox(label="Text")
]
outputs = outputs.Textbox(label="Similarity")
# If you want to run it locally, you can use the following code :(
Interface(fn=predict, inputs=inputs, outputs=outputs).launch()
# If you want to share it online, you can use the following code :)
# Interface(fn=predict, inputs=inputs, outputs=outputs).launch(share=True) |