gchhablani commited on
Commit
0808df5
1 Parent(s): 74cb830

Update app

Browse files
app.py CHANGED
@@ -66,7 +66,7 @@ st.set_page_config(
66
  page_title="Multilingual VQA",
67
  layout="wide",
68
  initial_sidebar_state="collapsed",
69
- page_icon="./misc/mvqa-logo.png",
70
  )
71
 
72
  st.title("Multilingual Visual Question Answering")
@@ -74,8 +74,26 @@ st.write(
74
  "[Gunjan Chhablani](https://huggingface.co/gchhablani), [Bhavitvya Malik](https://huggingface.co/bhavitvyamalik)"
75
  )
76
 
 
 
 
77
  with st.beta_expander("Usage"):
78
- st.markdown(read_markdown("usage.md"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  first_index = 20
81
  # Init Session State
@@ -92,7 +110,7 @@ if state.image_file is None:
92
 
93
  col1, col2 = st.beta_columns([6, 4])
94
 
95
- if col2.button("Get a random example"):
96
  sample = dummy_data.sample(1).reset_index()
97
  state.image_file = sample.loc[0, "image_file"]
98
  state.question = sample.loc[0, "question"].strip("- ")
@@ -116,24 +134,26 @@ transformed_image = get_transformed_image(state.image)
116
  # Display Image
117
  col1.image(state.image, use_column_width="auto")
118
 
 
119
  # Display Question
120
- question = col2.text_input(label="Question", value=state.question)
121
- col2.markdown(
122
  f"""**English Translation**: {question if state.question_lang_id == "en" else translate(question, 'en')}"""
123
  )
124
 
125
- col2.markdown("**Actual Answer in English**: " + answer_reverse_mapping[str(state.answer_label)])
126
-
127
  question_inputs = get_text_attributes(question)
128
 
129
  # Select Language
130
  options = ["en", "de", "es", "fr"]
131
- state.answer_lang_id = col2.selectbox(
132
  "Answer Language",
133
  index=options.index(state.answer_lang_id),
134
  options=options,
135
  format_func=lambda x: code_to_name[x],
136
  )
 
 
 
137
  # Display Top-5 Predictions
138
  with st.spinner("Loading model..."):
139
  model = load_model(checkpoints[0])
@@ -144,18 +164,3 @@ labels, values = get_top_5_predictions(logits, answer_reverse_mapping)
144
  translated_labels = translate_labels(labels, state.answer_lang_id)
145
  fig = plotly_express_horizontal_bar_plot(values, translated_labels)
146
  st.plotly_chart(fig, use_container_width=True)
147
-
148
-
149
- st.write(read_markdown("abstract.md"))
150
- st.write(read_markdown("caveats.md"))
151
- st.write("# Methodology")
152
- st.image(
153
- "./misc/Multilingual-VQA.png", caption="Masked LM model for Image-text Pretraining."
154
- )
155
- st.markdown(read_markdown("pretraining.md"))
156
- st.markdown(read_markdown("finetuning.md"))
157
- st.write(read_markdown("challenges.md"))
158
- st.write(read_markdown("social_impact.md"))
159
- st.write(read_markdown("references.md"))
160
- st.write(read_markdown("checkpoints.md"))
161
- st.write(read_markdown("acknowledgements.md"))
66
  page_title="Multilingual VQA",
67
  layout="wide",
68
  initial_sidebar_state="collapsed",
69
+ page_icon="./misc/mvqa-logo-white.png",
70
  )
71
 
72
  st.title("Multilingual Visual Question Answering")
74
  "[Gunjan Chhablani](https://huggingface.co/gchhablani), [Bhavitvya Malik](https://huggingface.co/bhavitvyamalik)"
75
  )
76
 
77
+ image_col, intro_col = st.beta_columns([2,8])
78
+ image_col.image("./misc/mvqa-logo-white.png", use_column_width='always')
79
+ intro_col.write(read_markdown('intro.md'))
80
  with st.beta_expander("Usage"):
81
+ st.write(read_markdown("usage.md"))
82
+
83
+ with st.beta_expander("Article"):
84
+ st.write(read_markdown("abstract.md"))
85
+ st.write(read_markdown("caveats.md"))
86
+ st.write("# Methodology")
87
+ st.image(
88
+ "./misc/Multilingual-VQA.png", caption="Masked LM model for Image-text Pretraining."
89
+ )
90
+ st.markdown(read_markdown("pretraining.md"))
91
+ st.markdown(read_markdown("finetuning.md"))
92
+ st.write(read_markdown("challenges.md"))
93
+ st.write(read_markdown("social_impact.md"))
94
+ st.write(read_markdown("references.md"))
95
+ st.write(read_markdown("checkpoints.md"))
96
+ st.write(read_markdown("acknowledgements.md"))
97
 
98
  first_index = 20
99
  # Init Session State
110
 
111
  col1, col2 = st.beta_columns([6, 4])
112
 
113
+ if col2.button("Get a random example", help="Get a random example from the 100 "):
114
  sample = dummy_data.sample(1).reset_index()
115
  state.image_file = sample.loc[0, "image_file"]
116
  state.question = sample.loc[0, "question"].strip("- ")
134
  # Display Image
135
  col1.image(state.image, use_column_width="auto")
136
 
137
+ new_col1, new_col2 = st.beta_columns([5,5])
138
  # Display Question
139
+ question = new_col1.text_input(label="Question", value=state.question)
140
+ new_col1.markdown(
141
  f"""**English Translation**: {question if state.question_lang_id == "en" else translate(question, 'en')}"""
142
  )
143
 
 
 
144
  question_inputs = get_text_attributes(question)
145
 
146
  # Select Language
147
  options = ["en", "de", "es", "fr"]
148
+ state.answer_lang_id = new_col2.selectbox(
149
  "Answer Language",
150
  index=options.index(state.answer_lang_id),
151
  options=options,
152
  format_func=lambda x: code_to_name[x],
153
  )
154
+
155
+ new_col2.markdown("**Actual Answer in English**: " + answer_reverse_mapping[str(state.answer_label)])
156
+
157
  # Display Top-5 Predictions
158
  with st.spinner("Loading model..."):
159
  model = load_model(checkpoints[0])
164
  translated_labels = translate_labels(labels, state.answer_lang_id)
165
  fig = plotly_express_horizontal_bar_plot(values, translated_labels)
166
  st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_logo.png DELETED
Binary file (5.64 kB)
misc/mvqa-logo-2.png ADDED
misc/mvqa-logo-white.png ADDED
misc/mvqa-logo.png CHANGED
sections/acknowledgements.md CHANGED
@@ -1,4 +1,6 @@
1
  # Acknowledgements
2
  We thank [Nilakshan Kunananthaseelan](https://huggingface.co/knilakshan20) for helping us whenever he could get a chance. We also thank [Abheesht Sharma](https://huggingface.co/abheesht) for helping in the discussions in the initial phases. [Luke Melas](https://github.com/lukemelas) helped us get the CC-12M data on our TPU-VMs and we are very grateful to him.
3
 
4
- This project would not be possible without the help of [Patrick](https://huggingface.co/patrickvonplaten) and [Suraj](https://huggingface.co/valhalla) who met with us and helped review our approach and guided us throughout the project.
 
 
1
  # Acknowledgements
2
  We thank [Nilakshan Kunananthaseelan](https://huggingface.co/knilakshan20) for helping us whenever he could get a chance. We also thank [Abheesht Sharma](https://huggingface.co/abheesht) for helping in the discussions in the initial phases. [Luke Melas](https://github.com/lukemelas) helped us get the CC-12M data on our TPU-VMs and we are very grateful to him.
3
 
4
+ This project would not be possible without the help of [Patrick](https://huggingface.co/patrickvonplaten) and [Suraj](https://huggingface.co/valhalla) who met with us and helped review our approach and guided us throughout the project.
5
+
6
+ Lastly, we thank the Google Team for helping answer our queries on the Slack channel, and for providing us TPU-VMs.
sections/intro.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ This demo uses a [ViTBert model checkpoint](https://huggingface.co/flax-community/multilingual-vqa-pt-60k-ft/tree/main/ckpt-5999) fine-tuned on a [MarianMT](https://huggingface.co/transformers/model_doc/marian.html)-translated version of the [VQA v2 dataset](https://visualqa.org/challenge.html). The fine-tuning is performed afterpre-training using text-only Masked LM on approximately 10 million image-text pairs taken from the [Conceptual 12M dataset](https://github.com/google-research-datasets/conceptual-12m) translated using [MBart](https://huggingface.co/transformers/model_doc/mbart.html). The translations are performed in the following four languages: English, French, German and Spanish.
2
+
3
+ The model predicts one out of 3129 classes in English which can be found [here](https://huggingface.co/spaces/flax-community/Multilingual-VQA/blob/main/answer_reverse_mapping.json), and then the translated versions are provided based on the language chosen as `Answer Language`. The question can be present or written in any of the following: English, French, German and Spanish.
4
+
5
+ For more details, click on `Usage` or `Article` 🤗 below.
sections/usage.md CHANGED
@@ -8,8 +8,4 @@
8
 
9
  - Lastly, once can choose the `Answer Language` which also uses a saved dictionary created using `mtranslate` library for the 3129 answer options.
10
 
11
- - The top-5 predictions are displayed below and their respective confidence scores are shown in form of a bar plot.
12
-
13
- For more info, scroll to the end of this app.
14
-
15
-
8
 
9
  - Lastly, once can choose the `Answer Language` which also uses a saved dictionary created using `mtranslate` library for the 3129 answer options.
10
 
11
+ - The top-5 predictions are displayed below and their respective confidence scores are shown in form of a bar plot.