File size: 1,276 Bytes
a938ff4
e289356
2c8f495
405f2d4
fb3c77c
2c8f495
405f2d4
2c8f495
405f2d4
d8dbe3e
405f2d4
eb34410
405f2d4
0808df5
69e32d1
d8dbe3e
405f2d4
 
 
0808df5
4d8488a
7f529a4
 
2c8f495
44c11f2
405f2d4
 
a938ff4
405f2d4
2c8f495
405f2d4
 
2c8f495
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from apps import mlm, vqa, article, examples
import streamlit as st
from session import _get_state
from multiapp import MultiApp
from apps.utils import read_markdown

def main():
    state = _get_state()
    st.set_page_config(
        page_title="Multilingual VQA",
        layout="wide",
        initial_sidebar_state="auto",
        page_icon="./misc/mvqa-logo-3-white.png",
    )

    st.title("Multilingual Visual Question Answering")
    st.write(
        "[Gunjan Chhablani](https://huggingface.co/gchhablani), [Bhavitvya Malik](https://huggingface.co/bhavitvyamalik)"
    )

    st.sidebar.title("Multilingual VQA")
    logo = st.sidebar.image("./misc/mvqa-logo-3-white.png")
    st.sidebar.write("Multilingual VQA addresses the challenge of visual question answering in a multilingual setting. Here, we fuse CLIP Vision transformer into BERT and perform pre-training and fine-tuning on translated versions of Conceptual-12M and VQAv2 datasets. Please use the radio buttons below to navigate.")
    app = MultiApp(state)
    app.add_app("Article", article.app)
    app.add_app("Visual Question Answering", vqa.app)
    app.add_app("Mask Filling", mlm.app)
    app.add_app("Examples", examples.app)
    app.run()
    state.sync()

if __name__ == "__main__":
    main()