import streamlit as st import datetime as dt import random import json import os from huggingface_hub import CommitScheduler from datasets import load_dataset today = dt.datetime.now().strftime("%Y%m%d_%H%M") REPO_ID = "imomayiz/darija-english" DATASET_REPO_URL = f"https://huggingface.co/datasets/{REPO_ID}" submissions_folder = "submissions" submissions_file = os.path.join(submissions_folder, f"submissions_{today}.json") @st.cache_data def load_data(repo_id, column_name="darija_ar"): dataset = load_dataset( f'{repo_id}', name='sentences', split='sentences') dataset = dataset.select_columns(column_name) return dataset def fetch_sentence(dataset, column_name="darija_ar"): # Get a random sentence random_sentence_index = random.randint(0, len(dataset) - 1) random_sentence = dataset[random_sentence_index][column_name] st.session_state.sentence = random_sentence st.session_state.translation_input = "" st.session_state.translation_input_fr = "" return random_sentence def store_submission( scheduler: CommitScheduler, sentence: str, translation: str, translation_fr: str ): """ Append input/outputs and user feedback to a JSON Lines file using a thread lock to avoid concurrent writes from different users. """ ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f") with scheduler.lock: with open(submissions_file, "a", encoding='utf-8') as f: f.write(json.dumps({ "darija": translation_fr, "eng": translation, "darija_ar": sentence, "timestamp": ts}, ensure_ascii=False )) f.write("\n") st.success( f"""Translation submitted successfully. You will see your commit in a few minutes at {DATASET_REPO_URL}/tree/main/{submissions_folder}. You can submit another translation or check the dataset.""" )