Spaces:

flax-community
/

Multilingual-VQA

Runtime error

File size: 12,683 Bytes

44c11f2
 
c399665
ba57ea8
53ddc87
f54892c
b3c9da2
b929de0
919efff
cd12ada
b3c9da2
919efff
b3c9da2
919efff
b3c9da2
919efff
 
b3c9da2
919efff
b3c9da2
919efff
 
29e00f0
44c11f2
b3c9da2
44c11f2
8527e35
919efff
8527e35
919efff
8527e35
b929de0
63672a5
919efff
 
b3c9da2
8527e35
919efff
8527e35
919efff
8527e35
b929de0
63672a5
919efff
b3c9da2
44c11f2
919efff
b3c9da2
287b7cd
919efff
b3c9da2
 
53ddc87
b3c9da2
53ddc87
b3c9da2
53ddc87
919efff
b3c9da2
44c11f2
919efff
b3c9da2
53ddc87
b3c9da2
53ddc87
919efff
b3c9da2
919efff
546443e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80225b5
 
 
 
 
919efff

import streamlit as st
from apps.utils import read_markdown
from .streamlit_tensorboard import st_tensorboard, kill_tensorboard
from .utils import Toc
def app(state=None):
    #kill_tensorboard()
    toc = Toc()
    st.info("Welcome to our Multilingual-VQA demo. Please use the navigation sidebar to move to our demo, or scroll below to read all about our project. 🤗 In case the sidebar isn't properly rendered, please change to a smaller window size and back to full screen.")
    
    st.header("Table of contents")
    toc.placeholder()
    
    toc.header("Introduction and Motivation")
    st.write(read_markdown("intro/intro.md"))
    toc.subheader("Novel Contributions")
    st.write(read_markdown("intro/contributions.md"))
    
    toc.header("Methodology")

    toc.subheader("Pre-training")
    st.write(read_markdown("pretraining/intro.md"))
    # col1, col2 = st.beta_columns([5,5])
    st.image(
        "./misc/article/Multilingual-VQA.png",
        caption="Masked LM model for Image-text Pre-training.",
    )
    toc.subsubheader("MLM Dataset")
    st.write(read_markdown("pretraining/data.md"))
    toc.subsubheader("MLM Model")
    st.write(read_markdown("pretraining/model.md"))
    toc.subsubheader("MLM Training Logs")
    st.info("In case the TensorBoard logs are not displayed, please visit this link: https://huggingface.co/flax-community/multilingual-vqa-pt-ckpts/tensorboard")
    st_tensorboard(logdir='./logs/pretrain_logs', port=6006)
    
    
    toc.subheader("Finetuning")
    toc.subsubheader("VQA Dataset")
    st.write(read_markdown("finetuning/data.md"))
    toc.subsubheader("VQA Model")
    st.write(read_markdown("finetuning/model.md"))
    toc.subsubheader("VQA Training Logs")
    st.info("In case the TensorBoard logs are not displayed, please visit this link: https://huggingface.co/flax-community/multilingual-vqa-pt-60k-ft/tensorboard")
    st_tensorboard(logdir='./logs/finetune_logs', port=6007)
    
    toc.header("Challenges and Technical Difficulties")
    st.write(read_markdown("challenges.md"))
    
    toc.header("Limitations")
    st.write(read_markdown("limitations.md"))
    
    toc.header("Conclusion, Future Work, and Social Impact")
    toc.subheader("Conclusion")
    st.write(read_markdown("conclusion_future_work/conclusion.md"))
    toc.subheader("Future Work")
    st.write(read_markdown("conclusion_future_work/future_work.md"))
    toc.subheader("Social Impact")
    st.write(read_markdown("conclusion_future_work/social_impact.md"))
    
    toc.header("References")
    st.write(read_markdown("references.md"))

    toc.header("Checkpoints")
    st.write(read_markdown("checkpoints/checkpoints.md"))
    toc.subheader("Other Checkpoints")
    st.write(read_markdown("checkpoints/other_checkpoints.md"))
    
    toc.header("Acknowledgements")
    st.write(read_markdown("acknowledgements.md"))

    toc.header("VQA Examples")
    toc.subheader("Color Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])

    col1.image("./sections/examples/men_riding_horses.jpeg", use_column_width="auto", width=300)
    col1.write("**Custom Question**: What color are the horses?")
    col1.write("**Predicted Answer**: brown✅")

    col2.image("./sections/examples/cat_color.jpeg", use_column_width="auto", width=300)
    col2.write("**Custom Question**: What color is the cat?")
    col2.write("**Predicted Answer**: white✅")

    col3.image("./sections/examples/men_happy.jpeg", use_column_width="auto", width=300)
    col3.write("**Custom Question**: What color is the man's jacket?")
    col3.write("**Predicted Answer**: black⚫")

    col1.image("./sections/examples/car_color.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: What color is the car?")
    col1.write("**Predicted Answer**: blue❎")

    col2.image("./sections/examples/coat_color.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: What color is this person's coat?")
    col2.write("**Predicted Answer**: blue✅")

    toc.subheader("Counting Questions")

    col1, col2, col3 = st.beta_columns([1,1, 1])

    col1.image("./sections/examples/giraffe_zebra.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: How many zebras are there?")
    col1.write("**Predicted Answer**: 0❎")

    col2.image("./sections/examples/giraffe_zebra.jpeg", use_column_width="auto", width=300)
    col2.write("**Custom Question**: How many giraffes are there?")
    col2.write("**Predicted Answer**: 2❎")

    col3.image("./sections/examples/teddy.jpeg", use_column_width="auto", width=300)
    col3.write("**Custom Question**: How many teddy bears are present in the image?")
    col3.write("**Predicted Answer**: 3✅")

    col1.image("./sections/examples/candle_count.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: ¿Cuantas velas hay en el cupcake?")
    col1.write("**English Translation**: How many candles are in the cupcake?")
    col1.write("**Predicted Answer**: 0❎")

    col1.image("./sections/examples/people_picture.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: ¿A cuánta gente le están tomando una foto?")
    col1.write("**English Translation**: How many people are you taking a picture of?")
    col1.write("**Predicted Answer**: 10❎")

    toc.subheader("Size/Shape Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/vase.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**:  What shape is the vase? ")
    col1.write("**Predicted Answer**: round✅")
    

    toc.subheader("Yes/No Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])

    col1.image("./sections/examples/teddy.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: Sind das drei Teddybären?")
    col1.write("**English Translation**: Are those teddy bears?")
    col1.write("**Predicted Answer**: Ja (yes)✅")

    col2.image("./sections/examples/winter.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: ¿Se lo tomaron en invierno?")
    col2.write("**English Translation**: Did they take it in winter?")
    col2.write("**Predicted Answer**: si (yes)✅")

    col3.image("./sections/examples/clock.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: Is the clock ornate? ")
    col3.write("**Predicted Answer**: yes✅")

    col1.image("./sections/examples/decorated_building.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: Ist das Gebäude orniert?")
    col1.write("**English Translation**: Is the building decorated?")
    col1.write("**Predicted Answer**: Ja (yes)✅")
    
    col2.image("./sections/examples/commuter_train.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: Ist das ein Pendler-Zug?")
    col2.write("**English Translation**: Is that a commuter train?")
    col2.write("**Predicted Answer**: Ja (yes)❎")

    col3.image("./sections/examples/is_in_a_restaurant.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: Elle est dans un restaurant?")
    col3.write("**English Translation**: Is she in a restaurant?")
    col3.write("**Predicted Answer**: Oui (yes)❎")

    col1.image("./sections/examples/giraffe_eyes.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: Est-ce que l'œil de la girafe est fermé?")
    col1.write("**English Translation**: Are the giraffe's eyes closed?")
    col1.write("**Predicted Answer**: Oui (yes)❎")

    toc.subheader("Negatives Test")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/men_happy.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: Is the man happy?")
    col2.write("**Predicted Answer**: Yes✅")

    col3.write("**Actual Question**: Is the man not happy?")
    col3.write("**Predicted Answer**: Yes❎")

    col2.write("**Actual Question**: Is the man sad?")
    col2.write("**Predicted Answer**: No✅")

    col3.write("**Actual Question**: Is the man not sad?")
    col3.write("**Predicted Answer**: No❎")

    col2.write("**Actual Question**: Is the man unhappy?")
    col2.write("**Predicted Answer**: No✅")

    col3.write("**Actual Question**: Is the man not unhappy?")
    col3.write("**Predicted Answer**: No❎")

    toc.subheader("Multilinguality Test")

    toc.subsubheader("Color Question")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/truck_color.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: What color is the building?")
    col2.write("**Predicted Answer**: red✅")

    col3.write("**Actual Question**: Welche Farbe hat das Gebäude?")
    col3.write("**English Translation**: What color is the building?")
    col3.write("**Predicted Answer**: rot (red)✅")

    col2.write("**Actual Question**: ¿De qué color es el edificio?")
    col2.write("**English Translation**: What color is the building?")
    col2.write("**Predicted Answer**: rojo (red)✅")

    col3.write("**Actual Question**: De quelle couleur est le bâtiment ?")
    col3.write("**English Translation**: What color is the building?")
    col3.write("**Predicted Answer**: rouge (red)✅")

    toc.subsubheader("Counting Question")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/bear.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: How many bears do you see?")
    col2.write("**Predicted Answer**: 1✅")

    col3.write("**Actual Question**: Wie viele Bären siehst du?")
    col3.write("**English Translation**: How many bears do you see?")
    col3.write("**Predicted Answer**: 1✅")

    col2.write("**Actual Question**: ¿Cuántos osos ves?")
    col2.write("**English Translation**: How many bears do you see?")
    col2.write("**Predicted Answer**: 1✅")

    col3.write("**Actual Question**: Combien d'ours voyez-vous ?")
    col3.write("**English Translation**: How many bears do you see?")
    col3.write("**Predicted Answer**: 1✅")

    toc.subsubheader("Misc Question")
    col1, col2, col3 = st.beta_columns([1,1,1])
    col1.image("./sections/examples/bench.jpeg", use_column_width="auto", width=300)

    col2.write("**Actual Question**: Where is the bench?")
    col2.write("**Predicted Answer**: field✅")

    col3.write("**Actual Question**: Où est le banc ?")
    col3.write("**English Translation**: Where is the bench?")
    col3.write("**Predicted Answer**: domaine (field)✅")

    col2.write("**Actual Question**: ¿Dónde está el banco?")
    col2.write("**English Translation**: Where is the bench?")
    col2.write("**Predicted Answer**: campo (field)✅")

    col3.write("**Actual Question**: Wo ist die Bank?")
    col3.write("**English Translation**: Where is the bench?")
    col3.write("**Predicted Answer**: Feld (field)✅")


    toc.subheader("Misc Questions")
    col1, col2, col3 = st.beta_columns([1,1,1])

    col1.image("./sections/examples/tennis.jpeg", use_column_width="auto", width=300)
    col1.write("**Actual Question**: ¿Qué clase de juego está viendo la multitud?")
    col1.write("**English Translation**: What kind of game is the crowd watching?")
    col1.write("**Predicted Answer**: tenis (tennis)✅")

    col2.image("./sections/examples/men_body_suits.jpeg", use_column_width="auto", width=300)
    col2.write("**Custom Question**: What are the men wearing?")
    col2.write("**Predicted Answer**: wetsuits✅")

    col3.image("./sections/examples/bathroom.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: ¿A qué habitación perteneces?")
    col3.write("**English Translation**: What room do you belong to?")
    col3.write("**Predicted Answer**: bano (bathroom)✅")

    col1.image("./sections/examples/men_riding_horses.jpeg", use_column_width="auto", width=300)
    col1.write("**Custom Question**: What are the men riding?")
    col1.write("**Predicted Answer**: horses✅")

    col2.image("./sections/examples/inside_outside.jpeg", use_column_width="auto", width=300)
    col2.write("**Actual Question**: Was this taken inside or outside?")
    col2.write("**Predicted Answer**: inside✅")

    col3.image("./sections/examples/dog_looking_at.jpeg", use_column_width="auto", width=300)
    col3.write("**Actual Question**: Was guckt der Hund denn so?")
    col3.write("**English Translation**: What is the dog looking at?")
    col3.write("**Predicted Answer**: Frisbeescheibe (frisbee)❎")

    toc.generate()