import gradio as gr import os import skimage import matplotlib.pyplot as plt from PIL import Image import numpy as np from collections import OrderedDict import torch from imagebind import data from imagebind.models import imagebind_model from imagebind.models.imagebind_model import ModalityType import torch.nn as nn import pickle device = "cpu" #"cuda:0" if torch.cuda.is_available() else "cpu" model = imagebind_model.imagebind_huge(pretrained=True) model.eval() model.to(device) image_features = pickle.load(open("./assets/image_features_norm_2.pkl","rb")) image_paths = pickle.load(open("./assets/image_paths.pkl","rb")) def generate_image(text): inputs = { ModalityType.TEXT: data.load_and_transform_text([text], device) } with torch.no_grad(): embeddings = model(inputs) text_features = embeddings[ModalityType.TEXT] text_features /= text_features.norm(dim=-1, keepdim=True) similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T index_img = np.argmax(similarity) img_name = os.path.basename(image_paths[index_img]) im = Image.open(f"./assets/images/{img_name}").convert("RGB") return im iface = gr.Interface( fn=generate_image, inputs="text", outputs="image", examples=[ ["a page of text about segmentation", "assets/images/page.png"], ["a facial photo of a tabby cat", "assets/images/chelsea.png"], ["a portrait of an astronaut with the American flag", "assets/images/astronaut.png"], ["a rocket standing on a launchpad", "assets/images/rocket.png"], ["a red motorcycle standing in a garage", "assets/images/motorcycle_right.png"], ["a person looking at a camera on a tripod", "assets/images/camera.png"], ["a black-and-white silhouette of a horse", "assets/images/horse.png"], ["a cup of coffee on a saucer", "assets/images/coffee.png"] ], title="Find the image most similar to the given text", description='''

Welcome to a straightforward demonstration of ImageBind. This simple demo is designed to find the image most similar to a given text using cosine similarity. For a comprehensive understanding of its capabilities, we encourage you to explore the original research paper and visit the repository for more in-depth information.

''' ) iface.launch()