import torch from PIL import Image import open_clip import gradio as gr def start(): model, _, preprocess = open_clip.create_model_and_transforms( "ViT-B-32", pretrained="laion2b_s34b_b79k" ) tokenizer = open_clip.get_tokenizer("ViT-B-32") return model, preprocess, tokenizer def process(model, preprocess, tokenizer, image_path, text): if isinstance(image_path, str): image = Image.open(image_path) image = preprocess(image.resize((512, 512))).unsqueeze(0) else: image = preprocess(image_path.resize((512, 512))).unsqueeze(0) text = tokenizer([text]) with torch.no_grad(), torch.cuda.amp.autocast(): image_features = model.encode_image(image) text_features = model.encode_text(text) image_features /= image_features.norm(dim=-1, keepdim=True) text_features /= text_features.norm(dim=-1, keepdim=True) similarity = (image_features @ text_features.T) * 100 return similarity def predict(image, text): similarity = process(model, preprocess, tokenizer, image, text) return similarity.item() gradio_app = gr.Interface( predict, inputs=[ gr.Image(label="Select the picture", type="pil"), gr.Textbox(label="Enter the text"), ], outputs=gr.Textbox(label="Similarity"), title="You draw&AI guess", ) if __name__ == "__main__": print("Start loading model...") model, preprocess, tokenizer = start() # If you want to run it locally, you can use the following code :( gradio_app.launch() # If you want to share it online, you can use the following code :) # gradio_app.launch(share=True)