Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,636 Bytes
23cf698 29a378b e4fced7 29a378b ceb927d 33640e7 ceb927d e4fced7 33640e7 ceb927d 23cf698 33640e7 494fb65 ceb927d 5119d09 33640e7 23cf698 ceb927d 23cf698 520d4cb 33640e7 520d4cb b725b48 33640e7 83dac04 520d4cb 23cf698 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from turtle import title
import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image
pipes = {
"ViT/B-16": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-base-patch16"),
"ViT/L-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14"),
"ViT/L-14@336px": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14-336px"),
"ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
}
inputs = [
gr.inputs.Image(type='pil',
label="Image 输入图片"),
gr.inputs.Textbox(lines=1,
label="Candidate Labels 候选分类标签"),
gr.inputs.Radio(choices=[
"ViT/B-16",
"ViT/L-14",
"ViT/L-14@336px",
"ViT/H-14",
], type="value", default="ViT/B-16", label="Model 模型规模"),
gr.inputs.Textbox(lines=1,
label="Prompt Template Prompt模板 ({}指代候选标签)",
default="一张{}的图片。"),
]
images="festival.jpg"
def shot(image, labels_text, model_name, hypothesis_template):
labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
res = pipes[model_name](images=image,
candidate_labels=labels,
hypothesis_template=hypothesis_template)
return {dic["label"]: dic["score"] for dic in res}
iface = gr.Interface(shot,
inputs,
"label",
examples=[["festival.jpg", "灯笼, 鞭炮, 对联", "ViT/B-16", "一张{}的图片。"],
["cat-dog-music.png", "音乐表演, 体育运动", "ViT/B-16", "一张{}的图片。"],
["football-match.jpg", "梅西, C罗, 马奎尔", "ViT/B-16", "一张{}的图片。"]],
description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片,并输入多个分类标签,用英文逗号分隔。可点击页面最下方示例参考。<br>
You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14' alt='Duplicate Space'></a></p>""",
title="Zero-shot Image Classification (中文零样本图像分类)")
iface.launch() |