from typing import Optional import gradio as gr import torch from src.nn import CaSED PAPER_TITLE = "Vocabulary-free Image Classification" PAPER_DESCRIPTION = """
Vocabulary-free Image Classification aims to assign a class to an image *without* prior knowledge on the list of class names, thus operating on the semantic class space that contains all the possible concepts. Our proposed method CaSED finds the best matching category within the unconstrained semantic space by multimodal data from large vision-language databases. We first retrieve the semantically most similar captions from a database, from which we extract a set of candidate categories by applying text parsing and filtering techniques. We further score the candidates using the multimodal aligned representation of the large pre-trained VLM, *i.e.* CLIP, to obtain the best-matching category, using *alpha* as a hyperparameter to control the trade-off between the visual and textual similarity. """ PAPER_URL = "https://arxiv.org/abs/2306.00917" DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = CaSED().to(DEVICE).eval() def vic(filename: str, alpha: Optional[float] = None): # get the outputs of the model vocabulary, scores = model(filename, alpha=alpha) confidences = dict(zip(vocabulary, scores)) return confidences def resize_image(image, max_size: int = 256): """Resize image to max_size keeping the aspect ratio.""" width, height = image.size if width > height: ratio = width / height new_width = max_size * ratio new_height = max_size else: ratio = height / width new_width = max_size new_height = max_size * ratio return image.resize((int(new_width), int(new_height))) demo = gr.Interface( fn=vic, inputs=[ gr.Image(type="filepath", label="input"), gr.Slider(0.0, 1.0, value=0.5, label="alpha"), ], outputs=[gr.Label(num_top_classes=5, label="output")], title=PAPER_TITLE, description=PAPER_DESCRIPTION, article=f"Check out the original paper for more information.", examples="./artifacts/examples/", allow_flagging='never', theme=gr.themes.Soft() ) demo.launch(share=False)