Spaces:

OFA-Sys
/

chinese-clip-zero-shot-image-classification

Running on CPU Upgrade

chinese-clip-zero-shot-image-classification

File size: 3,636 Bytes

23cf698
 
 
 
 
 
 
29a378b
e4fced7
 
 
 
29a378b
ceb927d
33640e7
 
 
 
ceb927d
e4fced7
 
 
 
33640e7
 
 
 
ceb927d
23cf698
 
33640e7
494fb65
ceb927d
5119d09
33640e7
23cf698
ceb927d
23cf698
520d4cb
 
33640e7
 
 
520d4cb
 
b725b48
33640e7
83dac04
520d4cb
23cf698

from turtle import title
import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image


pipes = {
    "ViT/B-16": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-base-patch16"),
    "ViT/L-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14"),
    "ViT/L-14@336px": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14-336px"),
    "ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
}
inputs = [
    gr.inputs.Image(type='pil', 
                    label="Image 输入图片"),
    gr.inputs.Textbox(lines=1, 
                      label="Candidate Labels 候选分类标签"),
    gr.inputs.Radio(choices=[
                                "ViT/B-16",
                                "ViT/L-14", 
                                "ViT/L-14@336px", 
                                "ViT/H-14",
                            ], type="value", default="ViT/B-16", label="Model 模型规模"), 
    gr.inputs.Textbox(lines=1, 
                      label="Prompt Template Prompt模板 ({}指代候选标签)", 
                      default="一张{}的图片。"),
]
images="festival.jpg"

def shot(image, labels_text, model_name, hypothesis_template):
    labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
    res = pipes[model_name](images=image, 
           candidate_labels=labels,
           hypothesis_template=hypothesis_template)
    return {dic["label"]: dic["score"] for dic in res}

iface = gr.Interface(shot, 
            inputs, 
            "label", 
            examples=[["festival.jpg", "灯笼, 鞭炮, 对联", "ViT/B-16", "一张{}的图片。"], 
                      ["cat-dog-music.png", "音乐表演, 体育运动", "ViT/B-16", "一张{}的图片。"],
                      ["football-match.jpg", "梅西, C罗, 马奎尔", "ViT/B-16", "一张{}的图片。"]],
            description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
            Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
            Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
            To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片，并输入多个分类标签，用英文逗号分隔。可点击页面最下方示例参考。<br>
            You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a></p>""",
            title="Zero-shot Image Classification (中文零样本图像分类)")

iface.launch()