altndrr commited on
Commit
a0fbf80
1 Parent(s): 26b1205

Move alpha definition in slider

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -32,8 +32,7 @@ unconstrained semantic space by multimodal data from large vision-language datab
32
  retrieve the semantically most similar captions from a database, from which we extract a set of
33
  candidate categories by applying text parsing and filtering techniques. We further score the
34
  candidates using the multimodal aligned representation of the large pre-trained VLM, *i.e.* CLIP,
35
- to obtain the best-matching category, using *alpha* as a hyperparameter to control the trade-off
36
- between the visual and textual similarity.
37
  """
38
  PAPER_URL = "https://arxiv.org/abs/2306.00917"
39
 
@@ -67,7 +66,13 @@ demo = gr.Interface(
67
  fn=vic,
68
  inputs=[
69
  gr.Image(type="filepath", label="input"),
70
- gr.Slider(0.0, 1.0, value=0.5, label="alpha"),
 
 
 
 
 
 
71
  ],
72
  outputs=[gr.Label(num_top_classes=5, label="output")],
73
  title=PAPER_TITLE,
 
32
  retrieve the semantically most similar captions from a database, from which we extract a set of
33
  candidate categories by applying text parsing and filtering techniques. We further score the
34
  candidates using the multimodal aligned representation of the large pre-trained VLM, *i.e.* CLIP,
35
+ to obtain the best-matching category.
 
36
  """
37
  PAPER_URL = "https://arxiv.org/abs/2306.00917"
38
 
 
66
  fn=vic,
67
  inputs=[
68
  gr.Image(type="filepath", label="input"),
69
+ gr.Slider(
70
+ 0.0,
71
+ 1.0,
72
+ value=0.5,
73
+ label="alpha",
74
+ info="trade-off between the text (left) and image (right) modality",
75
+ ),
76
  ],
77
  outputs=[gr.Label(num_top_classes=5, label="output")],
78
  title=PAPER_TITLE,