Spaces:

flax-community
/

Multilingual-VQA

Runtime error

App Files Files Community

gchhablani commited on Jul 21, 2021

Commit

405f2d4

•

1 Parent(s): 61c3dfa

Add MLM task

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +42 -173
apps/mlm.py +109 -0
{model → apps/model}/__init__.py +0 -0
{model → apps/model}/flax_clip_vision_bert/__init__.py +0 -0
{model → apps/model}/flax_clip_vision_bert/configuration_clip_vision_bert.py +0 -0
{model → apps/model}/flax_clip_vision_bert/modeling_clip_vision_bert.py +0 -0
utils.py → apps/utils.py +6 -5
apps/vqa.py +131 -0
cc12m_data/.DS_Store +0 -0
cc12m_data/images_vqa/.DS_Store +0 -0
cc12m_data/images_vqa/00212055---Wax_cylinder_in_Dictaphone.jpg +0 -0
cc12m_data/images_vqa/00315853---041bdd212f5b5d3d30cbc4ccf523f1a3.jpg +0 -0
cc12m_data/images_vqa/00328633---Metal+chips+fly+in+a+high+speed+turning+operation+performed+on+a+computer+numerical+control+turning+center+%28photo+courtesy+of+Cincinnati+Milacron%29..jpg +0 -0
cc12m_data/images_vqa/00491934---I6FTIDWLJRFPHAK4ZSZH4RQGDA.jpg +0 -0
cc12m_data/images_vqa/00507360---MushroomRisotto1.jpg +0 -0
cc12m_data/images_vqa/00602376---%20essay-example-writing-comparison-compare-contrast-how-to-write-poem-examples-of%20-1024x768.jpg +0 -0
cc12m_data/images_vqa/00606341---dog-coloring-book-detailed-dogs-page2.jpg +0 -0
cc12m_data/images_vqa/00697411---dream-house-swimming-pool-large-133359636.jpg +0 -0
cc12m_data/images_vqa/00923733---white-commercial-van-road-motion-blurred-d-illustration-custom-designed-brandless-87900010.jpg +0 -0
cc12m_data/images_vqa/01023838---fundraising-photo.jpg +0 -0
cc12m_data/images_vqa/01053356---522a16b60d3f226fff652671cdde6011.jpg +0 -0
cc12m_data/images_vqa/01157077---female-fruit-picker-worker-basket-woodcut-illustration-wearing-bandana-holding-viewed-side-set-white-61675986.jpg +0 -0
cc12m_data/images_vqa/01275377---Young-the-Giant.jpg +0 -0
cc12m_data/images_vqa/01327794---40250345161_452dc56b11_z.jpg +0 -0
cc12m_data/images_vqa/01648721---170420062908YDYA.jpg +0 -0
cc12m_data/images_vqa/01760795---The-Size-of-the-buildings-in-Shekou-are-in-direct-relation-to-the-time-it-takes-to-accomplish-tasks.jpg +0 -0
cc12m_data/images_vqa/01761366---fresh-salad-flying-vegetables-ingredients-isolated-white-background-48747892.jpg +0 -0
cc12m_data/images_vqa/01772764---business-woman-winner-standing-first-600w-254762824.jpg +0 -0
cc12m_data/images_vqa/01813337---cd4df5cb43d087533e89b12c9805409e.jpg +0 -0
cc12m_data/images_vqa/02034916---XKC6GGK5NDECNBAD5WAQUWOO5U.jpg +0 -0
cc12m_data/images_vqa/02175876---DL2-4i4.jpg +0 -0
cc12m_data/images_vqa/02217469---mount-macedon-victoria-australia-macedon-regional-park-region-photographed-by-karen-robinson-_march-29-2020_042-1.jpg +0 -0
cc12m_data/images_vqa/02243845---heritage-heritage-matte-stainless-steel-sink-undermount-5_2048x.jpg +0 -0
cc12m_data/images_vqa/02335328---margaret-and-alexander-potters-houses-1948.jpg +0 -0
cc12m_data/images_vqa/02520451---Gower-1.jpg +0 -0
cc12m_data/images_vqa/02912250---a-black-panther-has-been-spotted-in-weald-park-brentwood-essex-britain-shutterstock-editorial-618335e.jpg +0 -0
cc12m_data/images_vqa/03257347---looking-farther-afield-article-size.jpg +0 -0
cc12m_data/images_vqa/03271226---beneath-the-borealis-092517-a-very-bear-y-summer-kennicott-valley-virga.jpg +0 -0
cc12m_data/images_vqa/03307717---tumblr_m9d4xkRM5n1rypkpio1_1280.jpg +0 -0
cc12m_data/images_vqa/03360735---Warm-Bacon-Dip-EasyLowCarb-2.jpg +0 -0
cc12m_data/images_vqa/03394023---m_5e36e15f2169682519441e34.jpg +0 -0
cc12m_data/images_vqa/03401066---160328-capitol-police-mn-1530_bd68b01f1d7f1c3ab99eafa503930569.fit-760w.jpg +0 -0
cc12m_data/images_vqa/03598306---20400805522_fba017bc51_b.jpg +0 -0
cc12m_data/images_vqa/03618296---A+pink+and+grey+woven+baskets+sits+on+top+of+a+clear+side+table.jpg +0 -0
cc12m_data/images_vqa/04331097---108_1504859395_24.jpg +0 -0
cc12m_data/images_vqa/04334412---Pants-All-match-Professional-Harlan-Women-s-Loose-Skinny-High-Waist-New-2019-Suit-Summer-Leisure-Pants-2077.jpg +0 -0
cc12m_data/images_vqa/04358571---41-Travelex.jpg +0 -0
cc12m_data/images_vqa/04361362---square-stone-benches-around-fire-pit-outside-residential-building-sunny-day-pathways-plants-can-also-be-seen-homes-171086572.jpg +0 -0
cc12m_data/images_vqa/04530023---49305383277_29d4a34f37_h.jpg +0 -0
cc12m_data/images_vqa/04749808---thinkstockphotos-1858212351.jpg +0 -0

app.py CHANGED Viewed

@@ -1,183 +1,52 @@
-import json
 import os
-from io import BytesIO
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
 import streamlit as st
-from mtranslate import translate
-from PIL import Image
-from streamlit.elements import markdown
-from model.flax_clip_vision_bert.modeling_clip_vision_bert import (
-    FlaxCLIPVisionBertForSequenceClassification,
-)
-from session import _get_state
-from utils import (
-    get_text_attributes,
-    get_top_5_predictions,
-    get_transformed_image,
-    plotly_express_horizontal_bar_plot,
-    translate_labels,
-)
-state = _get_state()
-@st.cache(persist=True)
-def load_model(ckpt):
-    return FlaxCLIPVisionBertForSequenceClassification.from_pretrained(ckpt)
-@st.cache(persist=True)
-def predict(transformed_image, question_inputs):
-    return np.array(model(pixel_values=transformed_image, **question_inputs)[0][0])
-def softmax(logits):
-    return np.exp(logits) / np.sum(np.exp(logits), axis=0)
 def read_markdown(path, parent="./sections/"):
     with open(os.path.join(parent, path)) as f:
         return f.read()
-checkpoints = ["./ckpt/vqa/ckpt-60k-5999"]  # TODO: Maybe add more checkpoints?
-dummy_data = pd.read_csv("dummy_vqa_multilingual.tsv", sep="\t")
-code_to_name = {
-    "en": "English",
-    "fr": "French",
-    "de": "German",
-    "es": "Spanish",
-}
-with open("answer_reverse_mapping.json") as f:
-    answer_reverse_mapping = json.load(f)
-st.set_page_config(
-    page_title="Multilingual VQA",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-    page_icon="./misc/mvqa-logo-3-white.png",
-)
-st.title("Multilingual Visual Question Answering")
-st.write(
-    "[Gunjan Chhablani](https://huggingface.co/gchhablani), [Bhavitvya Malik](https://huggingface.co/bhavitvyamalik)"
-)
-image_col, intro_col = st.beta_columns([3, 8])
-image_col.image("./misc/mvqa-logo-3-white.png", use_column_width="always")
-intro_col.write(read_markdown("intro.md"))
-with st.beta_expander("Usage"):
-    st.write(read_markdown("usage.md"))
-with st.beta_expander("Article"):
-    st.write(read_markdown("abstract.md"))
-    st.write(read_markdown("caveats.md"))
-    st.write("## Methodology")
-    col1, col2 = st.beta_columns([1,1])
-    col1.image(
-        "./misc/article/resized/Multilingual-VQA.png",
-        caption="Masked LM model for Image-text Pretraining.",
     )
-    col2.markdown(read_markdown("pretraining.md"))
-    st.markdown(read_markdown("finetuning.md"))
-    st.write(read_markdown("challenges.md"))
-    st.write(read_markdown("social_impact.md"))
-    st.write(read_markdown("references.md"))
-    st.write(read_markdown("checkpoints.md"))
-    st.write(read_markdown("acknowledgements.md"))
-first_index = 20
-# Init Session State
-if state.image_file is None:
-    state.image_file = dummy_data.loc[first_index, "image_file"]
-    state.question = dummy_data.loc[first_index, "question"].strip("- ")
-    state.answer_label = dummy_data.loc[first_index, "answer_label"]
-    state.question_lang_id = dummy_data.loc[first_index, "lang_id"]
-    state.answer_lang_id = dummy_data.loc[first_index, "lang_id"]
-    image_path = os.path.join("resized_images", state.image_file)
-    image = plt.imread(image_path)
-    state.image = image
-# col1, col2, col3 = st.beta_columns([3,3,3])
-if st.button(
-    "Get a random example",
-    help="Get a random example from the 100 `seeded` image-text pairs.",
-):
-    sample = dummy_data.sample(1).reset_index()
-    state.image_file = sample.loc[0, "image_file"]
-    state.question = sample.loc[0, "question"].strip("- ")
-    state.answer_label = sample.loc[0, "answer_label"]
-    state.question_lang_id = sample.loc[0, "lang_id"]
-    state.answer_lang_id = sample.loc[0, "lang_id"]
-    image_path = os.path.join("resized_images", state.image_file)
-    image = plt.imread(image_path)
-    state.image = image
-# col2.write("OR")
-# uploaded_file = col2.file_uploader(
-#     "Upload your image",
-#     type=["png", "jpg", "jpeg"],
-#     help="Upload a file of your choosing.",
-# )
-# if uploaded_file is not None:
-#     state.image_file = os.path.join("images/val2014", uploaded_file.name)
-#     state.image = np.array(Image.open(uploaded_file))
-transformed_image = get_transformed_image(state.image)
-new_col1, new_col2 = st.beta_columns([5, 5])
-# Display Image
-new_col1.image(state.image, use_column_width="always")
-# Display Question
-question = new_col2.text_input(
-    label="Question",
-    value=state.question,
-    help="Type your question regarding the image above in one of the four languages.",
-)
-new_col2.markdown(
-    f"""**English Translation**: {question if state.question_lang_id == "en" else translate(question, 'en')}"""
-)
-question_inputs = get_text_attributes(question)
-# Select Language
-options = ["en", "de", "es", "fr"]
-state.answer_lang_id = new_col2.selectbox(
-    "Answer Language",
-    index=options.index(state.answer_lang_id),
-    options=options,
-    format_func=lambda x: code_to_name[x],
-    help="The language to be used to show the top-5 labels.",
-)
-actual_answer = answer_reverse_mapping[str(state.answer_label)]
-new_col2.markdown(
-    "**Actual Answer**: "
-    + translate_labels([actual_answer], state.answer_lang_id)[0]
-    + " ("
-    + actual_answer
-    + ")"
-)
-# Display Top-5 Predictions
-with st.spinner("Loading model..."):
-    model = load_model(checkpoints[0])
-with st.spinner("Predicting..."):
-    logits = predict(transformed_image, dict(question_inputs))
-logits = softmax(logits)
-labels, values = get_top_5_predictions(logits, answer_reverse_mapping)
-translated_labels = translate_labels(labels, state.answer_lang_id)
-fig = plotly_express_horizontal_bar_plot(values, translated_labels)
-st.plotly_chart(fig, use_container_width=True)

+from apps import mlm, vqa
 import os
 import streamlit as st
+from multiapp import MultiApp
 def read_markdown(path, parent="./sections/"):
     with open(os.path.join(parent, path)) as f:
         return f.read()
+def main():
+    st.set_page_config(
+        page_title="Multilingual VQA",
+        layout="wide",
+        initial_sidebar_state="collapsed",
+        page_icon="./misc/mvqa-logo-3-white.png",
     )
+    st.title("Multilingual Visual Question Answering")
+    st.write(
+        "[Gunjan Chhablani](https://huggingface.co/gchhablani), [Bhavitvya Malik](https://huggingface.co/bhavitvyamalik)"
+    )
+    image_col, intro_col = st.beta_columns([3, 8])
+    image_col.image("./misc/mvqa-logo-3-white.png", use_column_width="always")
+    intro_col.write(read_markdown("intro.md"))
+    with st.beta_expander("Usage"):
+        st.write(read_markdown("usage.md"))
+    with st.beta_expander("Article"):
+        st.write(read_markdown("abstract.md"))
+        st.write(read_markdown("caveats.md"))
+        st.write("## Methodology")
+        col1, col2 = st.beta_columns([1,1])
+        col1.image(
+            "./misc/article/Multilingual-VQA.png",
+            caption="Masked LM model for Image-text Pretraining.",
+        )
+        col2.markdown(read_markdown("pretraining.md"))
+        st.markdown(read_markdown("finetuning.md"))
+        st.write(read_markdown("challenges.md"))
+        st.write(read_markdown("social_impact.md"))
+        st.write(read_markdown("references.md"))
+        st.write(read_markdown("checkpoints.md"))
+        st.write(read_markdown("acknowledgements.md"))
+    app = MultiApp()
+    app.add_app("Visual Question Answering", vqa.app)
+    app.add_app("Mask Filling", mlm.app)
+    app.run()
+if __name__ == "__main__":
+    main()

apps/mlm.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from .utils import (
+    get_text_attributes,
+    get_top_5_predictions,
+    get_transformed_image,
+    plotly_express_horizontal_bar_plot,
+    translate_labels,
+    bert_tokenizer
+)
+import streamlit as st
+import numpy as np
+import pandas as pd
+import os
+import matplotlib.pyplot as plt
+from session import _get_state
+from .model.flax_clip_vision_bert.modeling_clip_vision_bert import (
+    FlaxCLIPVisionBertForMaskedLM,
+)
+def softmax(logits):
+    return np.exp(logits) / np.sum(np.exp(logits), axis=0)
+def app():
+    state = _get_state()
+    @st.cache(persist=False)
+    def predict(transformed_image, caption_inputs):
+        outputs = state.model(pixel_values=transformed_image, **caption_inputs)
+        indices = np.where(caption_inputs['input_ids']==bert_tokenizer.mask_token_id)
+        preds = outputs.logits[indices][0]
+        sorted_indices = np.argsort(preds)[::-1] # Get reverse sorted scores
+        top_5_indices = sorted_indices[:5]
+        top_5_tokens = bert_tokenizer.convert_ids_to_tokens(top_5_indices)
+        top_5_scores = np.array(preds[top_5_indices])
+        return top_5_tokens, top_5_scores
+    @st.cache(persist=False)
+    def load_model(ckpt):
+        return FlaxCLIPVisionBertForMaskedLM.from_pretrained(ckpt)
+    mlm_checkpoints = ['flax-community/clip-vision-bert-cc12m-70k']
+    dummy_data = pd.read_csv("cc12m_data/vqa_val.tsv", sep="\t")
+    first_index = 20
+    # Init Session State
+    if state.image_file is None:
+        state.image_file = dummy_data.loc[first_index, "image_file"]
+        caption = dummy_data.loc[first_index, "caption"].strip("- ")
+        ids = bert_tokenizer(caption)
+        ids[np.random.randint(0, len(ids))] = bert_tokenizer.mask_token_id
+        state.caption = bert_tokenizer.decode(ids)
+        state.caption_lang_id = dummy_data.loc[first_index, "lang_id"]
+        image_path = os.path.join("cc12m_data/images_vqa", state.image_file)
+        image = plt.imread(image_path)
+        state.image = image
+    if state.model is None:
+        # Display Top-5 Predictions
+        with st.spinner("Loading model..."):
+            state.model = load_model(mlm_checkpoints[0])
+    if st.button(
+        "Get a random example",
+        help="Get a random example from the 100 `seeded` image-text pairs.",
+    ):
+        sample = dummy_data.sample(1).reset_index()
+        state.image_file = sample.loc[0, "image_file"]
+        caption = sample.loc[0, "caption"].strip("- ")
+        ids = bert_tokenizer(caption)
+        ids[np.random.randint(0, len(ids))] = bert_tokenizer.mask_token_id
+        state.caption = bert_tokenizer.decode(ids)
+        state.caption_lang_id = sample.loc[0, "lang_id"]
+        image_path = os.path.join("cc12m_data/images_vqa", state.image_file)
+        image = plt.imread(image_path)
+        state.image = image
+    transformed_image = get_transformed_image(state.image)
+    new_col1, new_col2 = st.beta_columns([5, 5])
+    # Display Image
+    new_col1.image(state.image, use_column_width="always")
+    # Display caption
+    new_col2.write("Write your text with exactly one [MASK] token.")
+    caption = new_col2.text_input(
+        label="Text",
+        value=state.caption,
+        help="Type your masked caption regarding the image above in one of the four languages.",
+    )
+    caption_inputs = get_text_attributes(caption)
+    # Display Top-5 Predictions
+    with st.spinner("Predicting..."):
+        logits = predict(transformed_image, dict(caption_inputs))
+    logits = softmax(logits)
+    labels, values = get_top_5_predictions(logits)
+    fig = plotly_express_horizontal_bar_plot(values, labels)
+    st.plotly_chart(fig, use_container_width=True)

{model → apps/model}/__init__.py RENAMED Viewed

File without changes

{model → apps/model}/flax_clip_vision_bert/__init__.py RENAMED Viewed

File without changes

{model → apps/model}/flax_clip_vision_bert/configuration_clip_vision_bert.py RENAMED Viewed

File without changes

{model → apps/model}/flax_clip_vision_bert/modeling_clip_vision_bert.py RENAMED Viewed

File without changes

utils.py → apps/utils.py RENAMED Viewed

@@ -3,8 +3,7 @@ import json
 import numpy as np
 import plotly.express as px
 import torch
-from PIL import Image
-from torchvision.io import ImageReadMode, read_image
 from torchvision.transforms import CenterCrop, ConvertImageDtype, Normalize, Resize
 from torchvision.transforms.functional import InterpolationMode
 from transformers import BertTokenizerFast
@@ -41,15 +40,17 @@ def get_transformed_image(image):
 bert_tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-uncased")
 def get_text_attributes(text):
     return bert_tokenizer([text], return_token_type_ids=True, return_tensors="np")
-def get_top_5_predictions(logits, answer_reverse_mapping):
     indices = np.argsort(logits)[-5:]
     values = logits[indices]
-    labels = [answer_reverse_mapping[str(i)] for i in indices]
     return labels, values

 import numpy as np
 import plotly.express as px
 import torch
+from torchvision.io import read_image
 from torchvision.transforms import CenterCrop, ConvertImageDtype, Normalize, Resize
 from torchvision.transforms.functional import InterpolationMode
 from transformers import BertTokenizerFast
 bert_tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-uncased")
 def get_text_attributes(text):
     return bert_tokenizer([text], return_token_type_ids=True, return_tensors="np")
+def get_top_5_predictions(logits, answer_reverse_mapping=None):
     indices = np.argsort(logits)[-5:]
     values = logits[indices]
+    if answer_reverse_mapping is not None:
+        labels = [answer_reverse_mapping[str(i)] for i in indices]
+    else:
+        labels = bert_tokenizer.convert_ids_to_tokens(indices)
     return labels, values

apps/vqa.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from .utils import (
+    get_text_attributes,
+    get_top_5_predictions,
+    get_transformed_image,
+    plotly_express_horizontal_bar_plot,
+    translate_labels,
+)
+import streamlit as st
+import numpy as np
+import pandas as pd
+import os
+import matplotlib.pyplot as plt
+import json
+from mtranslate import translate
+from session import _get_state
+from .model.flax_clip_vision_bert.modeling_clip_vision_bert import (
+    FlaxCLIPVisionBertForSequenceClassification,
+)
+def softmax(logits):
+    return np.exp(logits) / np.sum(np.exp(logits), axis=0)
+def app():
+    state = _get_state()
+    @st.cache(persist=True)
+    def predict(transformed_image, question_inputs):
+        return np.array(state.model(pixel_values=transformed_image, **question_inputs)[0][0])
+    @st.cache(persist=True)
+    def load_model(ckpt):
+        return FlaxCLIPVisionBertForSequenceClassification.from_pretrained(ckpt)
+    vqa_checkpoints = ["flax-community/clip-vision-bert-vqa-ft-6k"]  # TODO: Maybe add more checkpoints?
+    dummy_data = pd.read_csv("dummy_vqa_multilingual.tsv", sep="\t")
+    code_to_name = {
+        "en": "English",
+        "fr": "French",
+        "de": "German",
+        "es": "Spanish",
+    }
+    with open("answer_reverse_mapping.json") as f:
+        answer_reverse_mapping = json.load(f)
+    first_index = 20
+    # Init Session State
+    if state.image_file is None:
+        state.image_file = dummy_data.loc[first_index, "image_file"]
+        state.question = dummy_data.loc[first_index, "question"].strip("- ")
+        state.answer_label = dummy_data.loc[first_index, "answer_label"]
+        state.question_lang_id = dummy_data.loc[first_index, "lang_id"]
+        state.answer_lang_id = dummy_data.loc[first_index, "lang_id"]
+        image_path = os.path.join("resized_images", state.image_file)
+        image = plt.imread(image_path)
+        state.image = image
+    if state.model is None:
+        # Display Top-5 Predictions
+        with st.spinner("Loading model..."):
+            state.model = load_model(vqa_checkpoints[0])
+    if st.button(
+        "Get a random example",
+        help="Get a random example from the 100 `seeded` image-text pairs.",
+    ):
+        sample = dummy_data.sample(1).reset_index()
+        state.image_file = sample.loc[0, "image_file"]
+        state.question = sample.loc[0, "question"].strip("- ")
+        state.answer_label = sample.loc[0, "answer_label"]
+        state.question_lang_id = sample.loc[0, "lang_id"]
+        state.answer_lang_id = sample.loc[0, "lang_id"]
+        image_path = os.path.join("resized_images", state.image_file)
+        image = plt.imread(image_path)
+        state.image = image
+    transformed_image = get_transformed_image(state.image)
+    new_col1, new_col2 = st.beta_columns([5, 5])
+    # Display Image
+    new_col1.image(state.image, use_column_width="always")
+    # Display Question
+    question = new_col2.text_input(
+        label="Question",
+        value=state.question,
+        help="Type your question regarding the image above in one of the four languages.",
+    )
+    new_col2.markdown(
+        f"""**English Translation**: {question if state.question_lang_id == "en" else translate(question, 'en')}"""
+    )
+    question_inputs = get_text_attributes(question)
+    # Select Language
+    options = ["en", "de", "es", "fr"]
+    state.answer_lang_id = new_col2.selectbox(
+        "Answer Language",
+        index=options.index(state.answer_lang_id),
+        options=options,
+        format_func=lambda x: code_to_name[x],
+        help="The language to be used to show the top-5 labels.",
+    )
+    actual_answer = answer_reverse_mapping[str(state.answer_label)]
+    new_col2.markdown(
+        "**Actual Answer**: "
+        + translate_labels([actual_answer], state.answer_lang_id)[0]
+        + " ("
+        + actual_answer
+        + ")"
+    )
+    with st.spinner("Predicting..."):
+        logits = predict(transformed_image, dict(question_inputs))
+    logits = softmax(logits)
+    labels, values = get_top_5_predictions(logits, answer_reverse_mapping)
+    translated_labels = translate_labels(labels, state.answer_lang_id)
+    fig = plotly_express_horizontal_bar_plot(values, translated_labels)
+    st.plotly_chart(fig, use_container_width=True)

cc12m_data/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

cc12m_data/images_vqa/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

cc12m_data/images_vqa/00212055---Wax_cylinder_in_Dictaphone.jpg ADDED Viewed

cc12m_data/images_vqa/00315853---041bdd212f5b5d3d30cbc4ccf523f1a3.jpg ADDED Viewed

cc12m_data/images_vqa/00328633---Metal+chips+fly+in+a+high+speed+turning+operation+performed+on+a+computer+numerical+control+turning+center+%28photo+courtesy+of+Cincinnati+Milacron%29..jpg ADDED Viewed

cc12m_data/images_vqa/00491934---I6FTIDWLJRFPHAK4ZSZH4RQGDA.jpg ADDED Viewed

cc12m_data/images_vqa/00507360---MushroomRisotto1.jpg ADDED Viewed

cc12m_data/images_vqa/00602376---%20essay-example-writing-comparison-compare-contrast-how-to-write-poem-examples-of%20-1024x768.jpg ADDED Viewed

cc12m_data/images_vqa/00606341---dog-coloring-book-detailed-dogs-page2.jpg ADDED Viewed

cc12m_data/images_vqa/00697411---dream-house-swimming-pool-large-133359636.jpg ADDED Viewed

cc12m_data/images_vqa/00923733---white-commercial-van-road-motion-blurred-d-illustration-custom-designed-brandless-87900010.jpg ADDED Viewed

cc12m_data/images_vqa/01023838---fundraising-photo.jpg ADDED Viewed

cc12m_data/images_vqa/01053356---522a16b60d3f226fff652671cdde6011.jpg ADDED Viewed

cc12m_data/images_vqa/01157077---female-fruit-picker-worker-basket-woodcut-illustration-wearing-bandana-holding-viewed-side-set-white-61675986.jpg ADDED Viewed

cc12m_data/images_vqa/01275377---Young-the-Giant.jpg ADDED Viewed

cc12m_data/images_vqa/01327794---40250345161_452dc56b11_z.jpg ADDED Viewed

cc12m_data/images_vqa/01648721---170420062908YDYA.jpg ADDED Viewed

cc12m_data/images_vqa/01760795---The-Size-of-the-buildings-in-Shekou-are-in-direct-relation-to-the-time-it-takes-to-accomplish-tasks.jpg ADDED Viewed

cc12m_data/images_vqa/01761366---fresh-salad-flying-vegetables-ingredients-isolated-white-background-48747892.jpg ADDED Viewed

cc12m_data/images_vqa/01772764---business-woman-winner-standing-first-600w-254762824.jpg ADDED Viewed

cc12m_data/images_vqa/01813337---cd4df5cb43d087533e89b12c9805409e.jpg ADDED Viewed

cc12m_data/images_vqa/02034916---XKC6GGK5NDECNBAD5WAQUWOO5U.jpg ADDED Viewed

cc12m_data/images_vqa/02175876---DL2-4i4.jpg ADDED Viewed

cc12m_data/images_vqa/02217469---mount-macedon-victoria-australia-macedon-regional-park-region-photographed-by-karen-robinson-_march-29-2020_042-1.jpg ADDED Viewed

cc12m_data/images_vqa/02243845---heritage-heritage-matte-stainless-steel-sink-undermount-5_2048x.jpg ADDED Viewed

cc12m_data/images_vqa/02335328---margaret-and-alexander-potters-houses-1948.jpg ADDED Viewed

cc12m_data/images_vqa/02520451---Gower-1.jpg ADDED Viewed

cc12m_data/images_vqa/02912250---a-black-panther-has-been-spotted-in-weald-park-brentwood-essex-britain-shutterstock-editorial-618335e.jpg ADDED Viewed

cc12m_data/images_vqa/03257347---looking-farther-afield-article-size.jpg ADDED Viewed

cc12m_data/images_vqa/03271226---beneath-the-borealis-092517-a-very-bear-y-summer-kennicott-valley-virga.jpg ADDED Viewed

cc12m_data/images_vqa/03307717---tumblr_m9d4xkRM5n1rypkpio1_1280.jpg ADDED Viewed

cc12m_data/images_vqa/03360735---Warm-Bacon-Dip-EasyLowCarb-2.jpg ADDED Viewed

cc12m_data/images_vqa/03394023---m_5e36e15f2169682519441e34.jpg ADDED Viewed

cc12m_data/images_vqa/03401066---160328-capitol-police-mn-1530_bd68b01f1d7f1c3ab99eafa503930569.fit-760w.jpg ADDED Viewed

cc12m_data/images_vqa/03598306---20400805522_fba017bc51_b.jpg ADDED Viewed

cc12m_data/images_vqa/03618296---A+pink+and+grey+woven+baskets+sits+on+top+of+a+clear+side+table.jpg ADDED Viewed

cc12m_data/images_vqa/04331097---108_1504859395_24.jpg ADDED Viewed

cc12m_data/images_vqa/04334412---Pants-All-match-Professional-Harlan-Women-s-Loose-Skinny-High-Waist-New-2019-Suit-Summer-Leisure-Pants-2077.jpg ADDED Viewed

cc12m_data/images_vqa/04358571---41-Travelex.jpg ADDED Viewed

cc12m_data/images_vqa/04361362---square-stone-benches-around-fire-pit-outside-residential-building-sunny-day-pathways-plants-can-also-be-seen-homes-171086572.jpg ADDED Viewed

cc12m_data/images_vqa/04530023---49305383277_29d4a34f37_h.jpg ADDED Viewed

cc12m_data/images_vqa/04749808---thinkstockphotos-1858212351.jpg ADDED Viewed