HugoLaurencon's picture
first commit
2ee1fd2
raw
history blame
1.8 kB
import streamlit as st
PATH_PLOTS = "./plots"
LANGUAGES = {
"Arabic": "ar",
"Basque": "eu",
"Bengali": "bn",
"Catalan": "ca",
"Chinese": "zh",
"English": "en",
"French": "fr",
"Hindi": "hi",
"Indonesian": "id",
"Portuguese": "pt",
"Spanish": "es",
"Urdu": "ur",
"Vietnamese": "vi",
}
FILTERS = [
"number of words",
"character repetition ratio",
"word repetition ratio",
"special character ratio",
"closed class word ratio",
"flagged word ratio",
"perplexity score",
]
class Visualization:
def __init__(self):
pass
def set_title(self):
st.title("Visualization of the distributions of the filter values for the BigScience Corpus")
def choose_language(self):
chosen_language = st.sidebar.selectbox(
"Language",
options=list(LANGUAGES.keys()),
index=5 # English
)
self.chosen_language = LANGUAGES[chosen_language]
def choose_filter(self):
chosen_filter = st.sidebar.selectbox(
"Filter on the",
options=FILTERS,
index=0
)
self.chosen_filter = chosen_filter.replace(" ", "_")
def display_plot(self):
path_image = f"{PATH_PLOTS}/{self.chosen_language}_{self.chosen_filter}.png"
col1, col2, col3 = st.columns([1,6,1])
with col1:
st.write("")
with col2:
st.image(path_image)
with col3:
st.write("")
def visualization(self):
self.set_title()
self.choose_language()
self.choose_filter()
self.display_plot()
if __name__ == "__main__":
st.set_page_config(layout="wide")
visualization = Visualization()
visualization.visualization()