altndrr commited on
Commit
54a3362
1 Parent(s): a0fbf80

Update thumbnail and description

Browse files
Files changed (2) hide show
  1. app.py +9 -8
  2. assets/thumbnail.png +0 -0
app.py CHANGED
@@ -7,8 +7,6 @@ from src.nn import CaSED
7
 
8
  PAPER_TITLE = "Vocabulary-free Image Classification"
9
  PAPER_DESCRIPTION = """
10
-
11
-
12
  <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 1rem;">
13
  <a href="https://github.com/altndrr/vic" style="margin-right: 0.5rem;">
14
  <img src="https://img.shields.io/badge/code-github.altndrr%2Fvic-blue.svg"/>
@@ -28,11 +26,14 @@ PAPER_DESCRIPTION = """
28
  Vocabulary-free Image Classification aims to assign a class to an image *without* prior knowledge
29
  on the list of class names, thus operating on the semantic class space that contains all the
30
  possible concepts. Our proposed method CaSED finds the best matching category within the
31
- unconstrained semantic space by multimodal data from large vision-language databases. We first
32
- retrieve the semantically most similar captions from a database, from which we extract a set of
33
- candidate categories by applying text parsing and filtering techniques. We further score the
34
- candidates using the multimodal aligned representation of the large pre-trained VLM, *i.e.* CLIP,
35
- to obtain the best-matching category.
 
 
 
36
  """
37
  PAPER_URL = "https://arxiv.org/abs/2306.00917"
38
 
@@ -81,7 +82,7 @@ demo = gr.Interface(
81
  examples="./artifacts/examples/",
82
  allow_flagging='never',
83
  theme=gr.themes.Soft(),
84
- thumbnail="./assets/thumbnail.png",
85
  )
86
 
87
  demo.launch(share=False)
 
7
 
8
  PAPER_TITLE = "Vocabulary-free Image Classification"
9
  PAPER_DESCRIPTION = """
 
 
10
  <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 1rem;">
11
  <a href="https://github.com/altndrr/vic" style="margin-right: 0.5rem;">
12
  <img src="https://img.shields.io/badge/code-github.altndrr%2Fvic-blue.svg"/>
 
26
  Vocabulary-free Image Classification aims to assign a class to an image *without* prior knowledge
27
  on the list of class names, thus operating on the semantic class space that contains all the
28
  possible concepts. Our proposed method CaSED finds the best matching category within the
29
+ unconstrained semantic space by multimodal data from large vision-language databases.
30
+
31
+ To assign a label to an image, we:
32
+ 1. extract the image features using a pre-trained Vision-Language Model (VLM);
33
+ 2. retrieve the semantically most similar captions from a textual database;
34
+ 3. extract from the captions a set of candidate categories by applying text parsing and filtering;
35
+ 4. score the candidates using the multimodal aligned representation of the pre-trained VLM to
36
+ obtain the best-matching category.
37
  """
38
  PAPER_URL = "https://arxiv.org/abs/2306.00917"
39
 
 
82
  examples="./artifacts/examples/",
83
  allow_flagging='never',
84
  theme=gr.themes.Soft(),
85
+ thumbnail="https://altndrr.github.io/vic/assets/images/method.png",
86
  )
87
 
88
  demo.launch(share=False)
assets/thumbnail.png DELETED
Binary file (785 kB)