imomayiz's picture
removed transliteration
b6e02e5 verified
import streamlit as st
from datasets import load_dataset
import pandas as pd
import plotly.graph_objects as go
from transformers import pipeline
@st.cache_data
def fetch_counts():
dataset = load_dataset("atlasia/darija-translation", split="train")
dataset = pd.DataFrame(dataset)
n_eng = len(dataset["en"].dropna())
n_fr = len(dataset["fr"].dropna())
n = len(dataset)
return {"n_eng": n_eng, "n_fr": n_fr, "n": n}
def terjman(input_text: str) -> str:
pipe = pipeline("text2text-generation", model="atlasia/Terjman-Large")
result = pipe(input_text, max_length=512)
return result[0]["generated_text"]
def transliterate(input_text: str) -> str:
pipe = pipeline("text2text-generation", model="atlasia/Transliteration-Moroccan-Darija")
result = pipe(input_text, max_length=50)
return result[0]["generated_text"]
if __name__ == "__main__":
st.image("atlasia_white_wtext_nobg.png")
counts = fetch_counts()
n_goal = 100000
total_submissions = counts["n"]
st.text("")
# center text
st.markdown(
"""
<h1 style='text-align: center; font-size: 20px;'>
Contribute now to help build a better Darija dataset for all Moroccans
Contribute here: <a href="https://atlasia.ma" target="_blank">https://atlasia.ma</a>
</h1>
""",
unsafe_allow_html=True,
)
st.divider()
# with st.container() as c:
# # add a block where users can input text and get a translation
# st.markdown(
# """
# <div style='text-align: center;'>
# <h3>🔠Keyboard: Transliterate Letters from Latin to Arabic</h3
# >
# </div>
# """,
# unsafe_allow_html=True,
# )
# col1, col2 = st.columns(2)
# with col1:
# input_text = st.text_area(":grey[Enter a word/letter in English ⬇]", "")
# button = st.button("Transliterate")
# with col2:
# if button:
# with st.spinner('Transliterating...'):
# translation = transliterate(input_text)
# st.text_area(
# ":grey[Transliteration]",
# translation,
# )
# st.divider()
with st.container() as c:
# add a block where users can input text and get a translation
st.markdown(
"""
<div style='text-align: center;'>
<h3>💬Terjman: Translate to Darija</h3
>
</div>
""",
unsafe_allow_html=True,
)
st.caption(
"""
<div style='text-align: center;'>
<h3>This model has been developed thanks to your contributions.
While it's not perfect yet, your continued input is key for making it better.</h3>
</div>
""",
unsafe_allow_html=True,
)
col1, col2 = st.columns(2)
with col1:
input_text = st.text_area(":grey[Enter a sentence in English ⬇]", "")
button = st.button("Translate")
with col2:
if button:
with st.spinner('Translating...'):
translation = terjman(input_text)
st.text_area(
":grey[Translation in Darija]",
translation,
)
# add a separator
st.divider()
st.markdown(
"""
<div style='text-align: center;'>
<h3>📊 Data statistics</h3
>
</div>
""",
unsafe_allow_html=True,
)
# make progress chart
fig = go.Figure(
go.Indicator(
domain={"x": [0, 1], "y": [0, 1]},
value=total_submissions,
mode="gauge+number+delta",
title={"text": "Number of translations"},
delta={"reference": 42000},
gauge={
"axis": {"range": [0, n_goal]},
"steps": [
{"range": [0, total_submissions], "color": "gray"},
],
"threshold": {
"line": {"color": "green", "width": 4},
"thickness": 0.75,
"value": n_goal / 2,
},
},
)
)
st.plotly_chart(fig, use_container_width=True)
labels = ["English", "French"]
values = [counts["n_eng"], counts["n_fr"]]
# change color to blue and white
fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.2, 0])])
fig.update_traces(marker=dict(colors=["#46607b", "#FFFFFF"]))
st.plotly_chart(fig, use_container_width=True)