File size: 1,797 Bytes
94680d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f8acb9
94680d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
881c5af
94680d6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st


PATH_PLOTS = "./plots"

LANGUAGES = {
    "Arabic": "ar",
    "Basque": "eu",
    "Bengali": "bn",
    "Catalan": "ca",
    "Chinese": "zh",
    "English": "en",
    "French": "fr",
    "Hindi": "hi",
    "Indonesian": "id",
    "Portuguese": "pt",
    "Spanish": "es",
    "Urdu": "ur",
    "Vietnamese": "vi",
}

FILTERS = [
    "number of words",
    "character repetition ratio",
    "word repetition ratio",
    "special character ratio",
    "closed class word ratio",
    "flagged word ratio",
    "perplexity score",
]


class Visualization:
    def __init__(self):
        pass

    def set_title(self):
        st.title("Visualization of the distributions of the filter values for the BigScience Corpus")
    
    def choose_language(self):
        chosen_language = st.sidebar.selectbox(
            "Language",
            options=list(LANGUAGES.keys()),
            index=5 # English
        )
        self.chosen_language = LANGUAGES[chosen_language]

    def choose_filter(self):
        chosen_filter = st.sidebar.selectbox(
            "Filter on the",
            options=FILTERS,
            index=0
        )
        self.chosen_filter = chosen_filter.replace(" ", "_")

    def display_plot(self):
        path_image = f"{PATH_PLOTS}/{self.chosen_language}_{self.chosen_filter}.png"

        col1, col2, col3 = st.columns([1,6,1])
        with col1:
            st.write("")
        with col2:
            st.image(path_image)
        with col3:
            st.write("")

    def visualization(self):
        self.set_title()
        self.choose_language()
        self.choose_filter()
        self.display_plot()


if __name__ == "__main__":
    st.set_page_config(layout="wide")
    visualization = Visualization()
    visualization.visualization()