Spaces:
Runtime error
Runtime error
Update thumbnail and description
Browse files- app.py +9 -8
- assets/thumbnail.png +0 -0
app.py
CHANGED
@@ -7,8 +7,6 @@ from src.nn import CaSED
|
|
7 |
|
8 |
PAPER_TITLE = "Vocabulary-free Image Classification"
|
9 |
PAPER_DESCRIPTION = """
|
10 |
-
|
11 |
-
|
12 |
<div style="display: flex; align-items: center; justify-content: center; margin-bottom: 1rem;">
|
13 |
<a href="https://github.com/altndrr/vic" style="margin-right: 0.5rem;">
|
14 |
<img src="https://img.shields.io/badge/code-github.altndrr%2Fvic-blue.svg"/>
|
@@ -28,11 +26,14 @@ PAPER_DESCRIPTION = """
|
|
28 |
Vocabulary-free Image Classification aims to assign a class to an image *without* prior knowledge
|
29 |
on the list of class names, thus operating on the semantic class space that contains all the
|
30 |
possible concepts. Our proposed method CaSED finds the best matching category within the
|
31 |
-
unconstrained semantic space by multimodal data from large vision-language databases.
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
36 |
"""
|
37 |
PAPER_URL = "https://arxiv.org/abs/2306.00917"
|
38 |
|
@@ -81,7 +82,7 @@ demo = gr.Interface(
|
|
81 |
examples="./artifacts/examples/",
|
82 |
allow_flagging='never',
|
83 |
theme=gr.themes.Soft(),
|
84 |
-
thumbnail="
|
85 |
)
|
86 |
|
87 |
demo.launch(share=False)
|
|
|
7 |
|
8 |
PAPER_TITLE = "Vocabulary-free Image Classification"
|
9 |
PAPER_DESCRIPTION = """
|
|
|
|
|
10 |
<div style="display: flex; align-items: center; justify-content: center; margin-bottom: 1rem;">
|
11 |
<a href="https://github.com/altndrr/vic" style="margin-right: 0.5rem;">
|
12 |
<img src="https://img.shields.io/badge/code-github.altndrr%2Fvic-blue.svg"/>
|
|
|
26 |
Vocabulary-free Image Classification aims to assign a class to an image *without* prior knowledge
|
27 |
on the list of class names, thus operating on the semantic class space that contains all the
|
28 |
possible concepts. Our proposed method CaSED finds the best matching category within the
|
29 |
+
unconstrained semantic space by multimodal data from large vision-language databases.
|
30 |
+
|
31 |
+
To assign a label to an image, we:
|
32 |
+
1. extract the image features using a pre-trained Vision-Language Model (VLM);
|
33 |
+
2. retrieve the semantically most similar captions from a textual database;
|
34 |
+
3. extract from the captions a set of candidate categories by applying text parsing and filtering;
|
35 |
+
4. score the candidates using the multimodal aligned representation of the pre-trained VLM to
|
36 |
+
obtain the best-matching category.
|
37 |
"""
|
38 |
PAPER_URL = "https://arxiv.org/abs/2306.00917"
|
39 |
|
|
|
82 |
examples="./artifacts/examples/",
|
83 |
allow_flagging='never',
|
84 |
theme=gr.themes.Soft(),
|
85 |
+
thumbnail="https://altndrr.github.io/vic/assets/images/method.png",
|
86 |
)
|
87 |
|
88 |
demo.launch(share=False)
|
assets/thumbnail.png
DELETED
Binary file (785 kB)
|
|