File size: 12,683 Bytes
44c11f2
 
c399665
ba57ea8
53ddc87
f54892c
b3c9da2
b929de0
919efff
cd12ada
b3c9da2
919efff
b3c9da2
919efff
b3c9da2
919efff
 
b3c9da2
919efff
b3c9da2
919efff
 
29e00f0
44c11f2
b3c9da2
44c11f2
8527e35
919efff
8527e35
919efff
8527e35
b929de0
63672a5
919efff
 
b3c9da2
8527e35
919efff
8527e35
919efff
8527e35
b929de0
63672a5
919efff
b3c9da2
44c11f2
919efff
b3c9da2
287b7cd
919efff
b3c9da2
 
53ddc87
b3c9da2
53ddc87
b3c9da2
53ddc87
919efff
b3c9da2
44c11f2
919efff
b3c9da2
53ddc87
b3c9da2
53ddc87
919efff
b3c9da2
919efff
546443e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80225b5
 
 
 
 
919efff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import streamlit as st
from apps.utils import read_markdown
from .streamlit_tensorboard import st_tensorboard, kill_tensorboard
from .utils import Toc
def app(state=None):
    #kill_tensorboard()
    toc = Toc()
    st.info("Welcome to our Multilingual-VQA demo. Please use the navigation sidebar to move to our demo, or scroll below to read all about our project. 🤗 In case the sidebar isn't properly rendered, please change to a smaller window size and back to full screen.")
    
    st.header("Table of contents")
    toc.placeholder()
    
    toc.header("Introduction and Motivation")
    st.write(read_markdown("intro/intro.md"))
    toc.subheader("Novel Contributions")
    st.write(read_markdown("intro/contributions.md"))
    
    toc.header("Methodology")

    toc.subheader("Pre-training")
    st.write(read_markdown("pretraining/intro.md"))
    # col1, col2 = st.beta_columns([5,5])
    st.image(
        "./misc/article/Multilingual-VQA.png",
        caption="Masked LM model for Image-text Pre-training.",
    )
    toc.subsubheader("MLM Dataset")
    st.write(read_markdown("pretraining/data.md"))
    toc.subsubheader("MLM Model")
    st.write(read_markdown("pretraining/model.md"))
    toc.subsubheader("MLM Training Logs")
    st.info("In case the TensorBoard logs are not displayed, please visit this link: https://huggingface.co/flax-community/multilingual-vqa-pt-ckpts/tensorboard")
    st_tensorboard(logdir='./logs/pretrain_logs', port=6006)
    
    
    toc.subheader("Finetuning")
    toc.subsubheader("VQA Dataset")
    st.write(read_markdown("finetuning/data.md"))
    toc.subsubheader("VQA Model")
    st.write(read_markdown("finetuning/model.md"))
    toc.subsubheader("VQA Training Logs")
    st.info("In case the TensorBoard logs are not displayed, please visit this link: https://huggingface.co/flax-community/multilingual-vqa-pt-60k-ft/tensorboard")
    st_tensorboard(logdir='./logs/finetune_logs', port=6007)
    
    toc.header("Challenges and Technical Difficulties")
    st.write(read_markdown("challenges.md"))
    
    toc.header("Limitations")
    st.write(read_markdown("limitations.md"))
    
    toc.header("Conclusion, Future Work, and Social Impact")
    toc.subheader("Conclusion")
    st.write(read_markdown("conclusion_future_work/conclusion.md"))
    toc.subheader("Future Work")
    st.write(read_markdown("conclusion_future_work/future_work.md"))
    toc.subheader("Social Impact")
    st.write(read_markdown("conclusion_future_work/social_impact.md"))
    
    toc.header("References")
    st.write(read_markdown("references.md"))

    toc.header("Checkpoints")
    st.write(read_markdown("checkpoints/checkpoints.md"))
    toc.subheader("Other Checkpoints")
    st.write(read_markdown("checkpoints/other_checkpoints.md"))
    
    toc.header("Acknowledgements")
    st.write(read_markdown("acknowledgements.md"))

    toc.header("VQA Examples")
    toc.subheader("Color Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])

    col1.image("./sections/examples/men_riding_horses.jpeg", use_column_width="auto", width=300)
    col1.write("**Custom Question**: What color are the horses?")
    col1.write("**Predicted Answer**: brown✅")

    col2.image("./sections/examples/cat_color.jpeg", use_column_width="auto", width=300)
    col2.write("**Custom Question**: What color is the cat?")
    col2.write("**Predicted Answer**: white✅")

    col3.image("./sections/examples/men_happy.jpeg", use_column_width="auto", width=300)
    col3.write("**Custom Question**: What color is the man's jacket?")
    col3.write("**Predicted Answer**: black⚫")

    col1.image("./sections/examples/car_color.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: What color is the car?")
    col1.write("**Predicted Answer**: blue❎")

    col2.image("./sections/examples/coat_color.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: What color is this person's coat?")
    col2.write("**Predicted Answer**: blue✅")

    toc.subheader("Counting Questions")

    col1, col2, col3 = st.beta_columns([1,1, 1])

    col1.image("./sections/examples/giraffe_zebra.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: How many zebras are there?")
    col1.write("**Predicted Answer**: 0❎")

    col2.image("./sections/examples/giraffe_zebra.jpeg", use_column_width="auto", width=300)
    col2.write("**Custom Question**: How many giraffes are there?")
    col2.write("**Predicted Answer**: 2❎")

    col3.image("./sections/examples/teddy.jpeg", use_column_width="auto", width=300)
    col3.write("**Custom Question**: How many teddy bears are present in the image?")
    col3.write("**Predicted Answer**: 3✅")

    col1.image("./sections/examples/candle_count.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: ¿Cuantas velas hay en el cupcake?")
    col1.write("**English Translation**: How many candles are in the cupcake?")
    col1.write("**Predicted Answer**: 0❎")

    col1.image("./sections/examples/people_picture.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: ¿A cuánta gente le están tomando una foto?")
    col1.write("**English Translation**: How many people are you taking a picture of?")
    col1.write("**Predicted Answer**: 10❎")

    toc.subheader("Size/Shape Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/vase.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**:  What shape is the vase? ")
    col1.write("**Predicted Answer**: round✅")
    

    toc.subheader("Yes/No Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])

    col1.image("./sections/examples/teddy.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: Sind das drei Teddybären?")
    col1.write("**English Translation**: Are those teddy bears?")
    col1.write("**Predicted Answer**: Ja (yes)✅")

    col2.image("./sections/examples/winter.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: ¿Se lo tomaron en invierno?")
    col2.write("**English Translation**: Did they take it in winter?")
    col2.write("**Predicted Answer**: si (yes)✅")

    col3.image("./sections/examples/clock.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: Is the clock ornate? ")
    col3.write("**Predicted Answer**: yes✅")

    col1.image("./sections/examples/decorated_building.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: Ist das Gebäude orniert?")
    col1.write("**English Translation**: Is the building decorated?")
    col1.write("**Predicted Answer**: Ja (yes)✅")
    
    col2.image("./sections/examples/commuter_train.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: Ist das ein Pendler-Zug?")
    col2.write("**English Translation**: Is that a commuter train?")
    col2.write("**Predicted Answer**: Ja (yes)❎")

    col3.image("./sections/examples/is_in_a_restaurant.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: Elle est dans un restaurant?")
    col3.write("**English Translation**: Is she in a restaurant?")
    col3.write("**Predicted Answer**: Oui (yes)❎")

    col1.image("./sections/examples/giraffe_eyes.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: Est-ce que l'œil de la girafe est fermé?")
    col1.write("**English Translation**: Are the giraffe's eyes closed?")
    col1.write("**Predicted Answer**: Oui (yes)❎")

    toc.subheader("Negatives Test")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/men_happy.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: Is the man happy?")
    col2.write("**Predicted Answer**: Yes✅")

    col3.write("**Actual Question**: Is the man not happy?")
    col3.write("**Predicted Answer**: Yes❎")

    col2.write("**Actual Question**: Is the man sad?")
    col2.write("**Predicted Answer**: No✅")

    col3.write("**Actual Question**: Is the man not sad?")
    col3.write("**Predicted Answer**: No❎")

    col2.write("**Actual Question**: Is the man unhappy?")
    col2.write("**Predicted Answer**: No✅")

    col3.write("**Actual Question**: Is the man not unhappy?")
    col3.write("**Predicted Answer**: No❎")

    toc.subheader("Multilinguality Test")

    toc.subsubheader("Color Question")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/truck_color.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: What color is the building?")
    col2.write("**Predicted Answer**: red✅")

    col3.write("**Actual Question**: Welche Farbe hat das Gebäude?")
    col3.write("**English Translation**: What color is the building?")
    col3.write("**Predicted Answer**: rot (red)✅")

    col2.write("**Actual Question**: ¿De qué color es el edificio?")
    col2.write("**English Translation**: What color is the building?")
    col2.write("**Predicted Answer**: rojo (red)✅")

    col3.write("**Actual Question**: De quelle couleur est le bâtiment ?")
    col3.write("**English Translation**: What color is the building?")
    col3.write("**Predicted Answer**: rouge (red)✅")

    toc.subsubheader("Counting Question")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/bear.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: How many bears do you see?")
    col2.write("**Predicted Answer**: 1✅")

    col3.write("**Actual Question**: Wie viele Bären siehst du?")
    col3.write("**English Translation**: How many bears do you see?")
    col3.write("**Predicted Answer**: 1✅")

    col2.write("**Actual Question**: ¿Cuántos osos ves?")
    col2.write("**English Translation**: How many bears do you see?")
    col2.write("**Predicted Answer**: 1✅")

    col3.write("**Actual Question**: Combien d'ours voyez-vous ?")
    col3.write("**English Translation**: How many bears do you see?")
    col3.write("**Predicted Answer**: 1✅")

    toc.subsubheader("Misc Question")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/bench.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: Where is the bench?")
    col2.write("**Predicted Answer**: field✅")

    col3.write("**Actual Question**: Où est le banc ?")
    col3.write("**English Translation**: Where is the bench?")
    col3.write("**Predicted Answer**: domaine (field)✅")

    col2.write("**Actual Question**: ¿Dónde está el banco?")
    col2.write("**English Translation**: Where is the bench?")
    col2.write("**Predicted Answer**: campo (field)✅")

    col3.write("**Actual Question**: Wo ist die Bank?")
    col3.write("**English Translation**: Where is the bench?")
    col3.write("**Predicted Answer**: Feld (field)✅")


    toc.subheader("Misc Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])

    col1.image("./sections/examples/tennis.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: ¿Qué clase de juego está viendo la multitud?")
    col1.write("**English Translation**: What kind of game is the crowd watching?")
    col1.write("**Predicted Answer**: tenis (tennis)✅")

    col2.image("./sections/examples/men_body_suits.jpeg", use_column_width="auto", width=300)
    col2.write("**Custom Question**: What are the men wearing?")
    col2.write("**Predicted Answer**: wetsuits✅")

    col3.image("./sections/examples/bathroom.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: ¿A qué habitación perteneces?")
    col3.write("**English Translation**: What room do you belong to?")
    col3.write("**Predicted Answer**: bano (bathroom)✅")

    col1.image("./sections/examples/men_riding_horses.jpeg", use_column_width="auto", width=300)
    col1.write("**Custom Question**: What are the men riding?")
    col1.write("**Predicted Answer**: horses✅")

    col2.image("./sections/examples/inside_outside.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: Was this taken inside or outside?")
    col2.write("**Predicted Answer**: inside✅")

    col3.image("./sections/examples/dog_looking_at.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: Was guckt der Hund denn so?")
    col3.write("**English Translation**: What is the dog looking at?")
    col3.write("**Predicted Answer**: Frisbeescheibe (frisbee)❎")

    toc.generate()