merve HF staff commited on
Commit
ae97e32
1 Parent(s): 1436f3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -82,7 +82,11 @@ def infer(image, labels):
82
 
83
  with gr.Blocks() as demo:
84
  gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
85
- gr.Markdown("Compare the performance of SigLIP and othe rmodels on zero-shot classification in this Space. Three models are compared: CLIP-ViT, NLLB-CLIP and SigLIP-Multilingual. Note that SigLIP outputs are normalized for visualization purposes.")
 
 
 
 
86
  with gr.Row():
87
  with gr.Column():
88
  image_input = gr.Image(type="pil")
 
82
 
83
  with gr.Blocks() as demo:
84
  gr.Markdown("# Compare Multilingual Zero-shot Image Classification")
85
+ gr.Markdown("Compare the performance of SigLIP and other models on zero-shot classification in this Space.")
86
+ gr.Markdown("Three models are compared: CLIP-ViT, NLLB-CLIP and SigLIP. Note that SigLIP outputs are normalized for visualization purposes.")
87
+ gr.Markdown("NLLB-CLIP is a multilingual vision-language model that combines [NLLB](https://ai.meta.com/research/no-language-left-behind/) with [CLIP](https://openai.com/research/clip) to extend CLIP to 200+ languages.")
88
+ gr.Markdown("CLIP-ViT is CLIP model extended to other languages using [multilingual knowledge distillation](https://arxiv.org/abs/2004.09813).")
89
+ gr.Markdown("Finally, SigLIP is the state-of-the-art vision-language model released by Google. Multilingual checkpoint is pre-trained by Google.")
90
  with gr.Row():
91
  with gr.Column():
92
  image_input = gr.Image(type="pil")