import streamlit as st
from datasets import load_dataset
import pandas as pd
import plotly.graph_objects as go
from transformers import pipeline

@st.cache_data
def fetch_counts():
    dataset = load_dataset("atlasia/darija-translation", split="train")
    dataset = pd.DataFrame(dataset)
    n_eng = len(dataset["en"].dropna())
    n_fr = len(dataset["fr"].dropna())
    n = len(dataset)
    return {"n_eng": n_eng, "n_fr": n_fr, "n": n}

def terjman(input_text: str) -> str:
    pipe = pipeline("text2text-generation", model="atlasia/Terjman-Large")
    result = pipe(input_text, max_length=512)
    return result[0]["generated_text"]

def transliterate(input_text: str) -> str:
    pipe = pipeline("text2text-generation", model="atlasia/Transliteration-Moroccan-Darija")
    result = pipe(input_text, max_length=50)
    return result[0]["generated_text"]

if __name__ == "__main__":
    counts = fetch_counts()
    n_goal = 100000
    total_submissions = counts["n"]
    st.text("")  # center text
    st.markdown(
        """

Contribute now to help build a better Darija dataset for all Moroccans Contribute here:

""", unsafe_allow_html=True, ) st.divider() # with st.container() as c: # # add a block where users can input text and get a translation # st.markdown( # """ #

🔠Keyboard: Transliterate Letters from Latin to Arabic

# """, # unsafe_allow_html=True, # ) # col1, col2 = st.columns(2) # with col1: # input_text = st.text_area(":grey[Enter a word/letter in English ⬇]", "") # button = st.button("Transliterate") # with col2: # if button: # with st.spinner('Transliterating...'): # translation = transliterate(input_text) # st.text_area( # ":grey[Transliteration]", # translation, # ) # st.divider() with st.container() as c: # add a block where users can input text and get a translation st.markdown( """

💬Terjman: Translate to Darija

""", unsafe_allow_html=True, ) st.caption( """

This model has been developed thanks to your contributions. While it's not perfect yet, your continued input is key for making it better.

""", unsafe_allow_html=True, ) col1, col2 = st.columns(2) with col1: input_text = st.text_area(":grey[Enter a sentence in English ⬇]", "") button = st.button("Translate") with col2: if button: with st.spinner('Translating...'): translation = terjman(input_text) st.text_area( ":grey[Translation in Darija]", translation, ) # add a separator st.divider() st.markdown( """

📊 Data statistics

""", unsafe_allow_html=True, ) # make progress chart fig = go.Figure( go.Indicator( domain={"x": [0, 1], "y": [0, 1]}, value=total_submissions, mode="gauge+number+delta", title={"text": "Number of translations"}, delta={"reference": 42000}, gauge={ "axis": {"range": [0, n_goal]}, "steps": [ {"range": [0, total_submissions], "color": "gray"}, ], "threshold": { "line": {"color": "green", "width": 4}, "thickness": 0.75, "value": n_goal / 2, }, }, ) ) st.plotly_chart(fig, use_container_width=True) labels = ["English", "French"] values = [counts["n_eng"], counts["n_fr"]] # change color to blue and white fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.2, 0])]) fig.update_traces(marker=dict(colors=["#46607b", "#FFFFFF"])) st.plotly_chart(fig, use_container_width=True)