Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import skimage | |
import matplotlib.pyplot as plt | |
from PIL import Image | |
import numpy as np | |
from collections import OrderedDict | |
import torch | |
from imagebind import data | |
from imagebind.models import imagebind_model | |
from imagebind.models.imagebind_model import ModalityType | |
import torch.nn as nn | |
import pickle | |
device = "cpu" #"cuda:0" if torch.cuda.is_available() else "cpu" | |
model = imagebind_model.imagebind_huge(pretrained=True) | |
model.eval() | |
model.to(device) | |
image_features = pickle.load(open("./assets/image_features_norm_2.pkl","rb")) | |
image_paths = pickle.load(open("./assets/image_paths.pkl","rb")) | |
def generate_image(text): | |
inputs = { | |
ModalityType.TEXT: data.load_and_transform_text([text], device) | |
} | |
with torch.no_grad(): | |
embeddings = model(inputs) | |
text_features = embeddings[ModalityType.TEXT] | |
text_features /= text_features.norm(dim=-1, keepdim=True) | |
similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T | |
index_img = np.argmax(similarity) | |
img_name = os.path.basename(image_paths[index_img]) | |
im = Image.open(f"./assets/images/{img_name}").convert("RGB") | |
return im | |
iface = gr.Interface( | |
fn=generate_image, | |
inputs="text", | |
outputs="image", | |
examples=[ | |
["a page of text about segmentation", "assets/images/page.png"], | |
["a facial photo of a tabby cat", "assets/images/chelsea.png"], | |
["a portrait of an astronaut with the American flag", "assets/images/astronaut.png"], | |
["a rocket standing on a launchpad", "assets/images/rocket.png"], | |
["a red motorcycle standing in a garage", "assets/images/motorcycle_right.png"], | |
["a person looking at a camera on a tripod", "assets/images/camera.png"], | |
["a black-and-white silhouette of a horse", "assets/images/horse.png"], | |
["a cup of coffee on a saucer", "assets/images/coffee.png"] | |
], | |
title="Find the image most similar to the given text", | |
description='''<p> | |
Welcome to a straightforward demonstration of ImageBind. | |
This simple demo is designed to find the image most similar to a given text | |
using cosine similarity. For a comprehensive | |
understanding of its capabilities, we encourage you to explore the original research <a href='https://arxiv.org/abs/2305.05665' target='_blank'>paper</a> | |
and visit the <a href='https://github.com/facebookresearch/ImageBind' target='_blank'>repository</a> | |
for more in-depth information.<p> | |
''' | |
) | |
iface.launch() | |