import streamlit as st from datasets import load_dataset import csv import datetime as dt import random import os from huggingface_hub import Repository HF_API_KEY = os.environ.get("HF_TOKEN", None) REPO_ID = "imomayiz/darija-english" DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}" DATA_FILE = os.path.join("submissions", "submissions.csv") submissions_repo = Repository( local_dir="submissions", clone_from=DATASET_REPO_URL, use_auth_token=HF_API_KEY ) def load_data(repo_id): dataset = load_dataset(f'{repo_id}', name='sentences', split='sentences') return dataset def fetch_sentence(dataset, column_name="darija_ar"): # Get a random sentence random_sentence_index = random.randint(0, len(dataset) - 1) random_sentence = dataset[random_sentence_index][column_name] return random_sentence def store_submission(sentence: str, translation: str, translation_fr: str): if sentence and (translation or translation_fr): with open(DATA_FILE, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=["darija", "eng", "darija_ar", "time"]) writer.writerow( {"darija_ar": sentence, "eng": translation, "darija": translation_fr, "time": str(dt.datetime.now())} ) commit_url = submissions_repo.push_to_hub( commit_message="Add new submission", token=HF_API_KEY ) print(commit_url) # Load the dataset dataset = load_data(REPO_ID) def main(): if "sentence" not in st.session_state: st.session_state.sentence = fetch_sentence(dataset) if 'translation_input' not in st.session_state: st.session_state.translation_input = "" if 'translation_input_fr' not in st.session_state: st.session_state.translation_input_fr = "" if 'display_new' not in st.session_state: st.session_state.display_new = False st.title("Translate From Arabic to English") st.markdown( """This mini-app allows you to contribute to the **darija-english** dataset as part of [DODa](https://darija-open-dataset.github.io/) project. To contribute, simply translate the given sentence from Arabic to English. The translated sentence will be submitted to the dataset [here](https://huggingface.co/datasets/imomayiz/darija-english).""" ) st.text("") st.write(f"""

{st.session_state.sentence}.

""", unsafe_allow_html=True) # Display new sentence button st.session_state.display_new = st.button("New Sentence", on_click=fetch_sentence, args=(dataset,)) # Input field for translation translation_input_placeholder = st.empty() with translation_input_placeholder.container(): translation_input = st.text_input("Enter translation to english: ", st.session_state.translation_input) st.session_state.translation_input = translation_input # Input field for translation translation_input_placeholder_fr = st.empty() with translation_input_placeholder_fr.container(): translation_input_fr = st.text_input( "Enter translation to darija in latin characters: ", st.session_state.translation_input_fr ) st.session_state.translation_input_fr = translation_input_fr # Submit button if st.button("Submit Translation"): if translation_input: st.success("Translation submitted successfully!") elif translation_input_fr: st.success("Translation submitted successfully!") else: st.warning("Please enter a translation before submitting.") store_submission(st.session_state.sentence, st.session_state.translation_input, st.session_state.translation_input_fr) if __name__ == "__main__": main()