import pandas as pd import streamlit as st import time import random import os # File Path DATA_FILENAME = "Dr-En-space-test.csv" DATA_PATH = os.path.join("data", DATA_FILENAME) st.set_page_config(layout="wide") # Load data def load_data(): return pd.read_csv(DATA_PATH) def save_data(data): data.to_csv(DATA_PATH, index=False) def skip_correction(): noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist() if noncorrected_sentences: st.session_state.orig_sentence = random.choice(noncorrected_sentences) st.session_state.orig_translation = st.session_state.data[st.session_state.data.sentence == st.session_state.orig_sentence]['translation'] else: st.session_state.orig_sentence = "No more sentences to be corrected" st.session_state.orig_translation = "No more sentences to be corrected" st.title("Darija Translation Corpus Collection") if "data" not in st.session_state: st.session_state.data = load_data() if "sentence" not in st.session_state: untranslated_sentences = st.session_state.data[st.session_state.data['translated'] == False]['sentence'].tolist() if untranslated_sentences: st.session_state.sentence = random.choice(untranslated_sentences) else: st.session_state.sentence = "No more sentences to translate" if "orig_translation" not in st.session_state: noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist() noncorrected_translations = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['translation'].tolist() if noncorrected_sentences: st.session_state.orig_sentence = random.choice(noncorrected_sentences) st.session_state.orig_translation = st.session_state.data.loc[st.session_state.data.sentence == st.session_state.orig_sentence]['translation'].values[0] else: st.session_state.orig_sentence = "No more sentences to be corrected" st.session_state.orig_translation = "No more sentences to be corrected" if "user_translation" not in st.session_state: st.session_state.user_translation = "" with st.sidebar: st.subheader("About") st.markdown("""This is app is designed to collect Darija translation corpus.""") tab1, tab2 = st.tabs(["Translation", "Correction"]) with tab1: with st.container(): st.subheader("Original Text:") st.write('
{}
'.format(st.session_state.sentence), unsafe_allow_html=True) st.subheader("Translation:") st.session_state.user_translation = st.text_area("Enter your translation here:", value=st.session_state.user_translation) if st.button("💾 Save"): if st.session_state.user_translation: st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.sentence, 'translation'] = st.session_state.user_translation st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.sentence, 'translated'] = True save_data(st.session_state.data) st.session_state.user_translation = "" # Reset the input value after saving # st.toast("Saved!", icon="👏") st.success("Saved!") # Update the sentence for the next iteration. untranslated_sentences = st.session_state.data[st.session_state.data['translated'] == False]['sentence'].tolist() if untranslated_sentences: st.session_state.sentence = random.choice(untranslated_sentences) else: st.session_state.sentence = "No more sentences to translate" time.sleep(0.5) # Rerun the app st.rerun() with tab2: with st.container(): st.subheader("Original Darija Text:") st.write('
{}
'.format(st.session_state.orig_sentence), unsafe_allow_html=True) with st.container(): st.subheader("Original English Translation:") st.write('
{}
'.format(st.session_state.orig_translation), unsafe_allow_html=True) st.subheader("Corrected Darija Translation:") corrected_translation = st.text_area("Enter the corrected Darija translation here:") if st.button("💾 Save Translation"): if corrected_translation: st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'translation'] = corrected_translation st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'correction'] = corrected_translation st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'corrected'] = True save_data(st.session_state.data) st.success("Saved!") # Update the sentence for the next iteration. noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist() # noncorrected_sentences = st.session_state.data[st.session_state.data['corrected'] == False]['sentence'].tolist() if noncorrected_sentences: st.session_state.orig_sentence = random.choice(noncorrected_sentences) st.session_state.orig_translation = st.session_state.data[st.session_state.data.sentence == st.session_state.orig_sentence]['translation'] else: st.session_state.orig_translation = "No more sentences to be corrected" corrected_translation = "" # Reset the input value after saving st.button("⏩ Skip to the Next Pair", key="skip_button", on_click=skip_correction)