Aiyagh commited on
Commit
fc1a8b2
1 Parent(s): 6323399

add changes

Browse files
Files changed (2) hide show
  1. app.py +131 -0
  2. data/Dr-En-space-test.csv +0 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import time
4
+ import random
5
+ import os
6
+
7
+
8
+ # File Path
9
+ DATA_FILENAME = "Dr-En-space-test.csv"
10
+ DATA_PATH = os.path.join("data", DATA_FILENAME)
11
+
12
+ st.set_page_config(layout="wide")
13
+
14
+ # Load data
15
+ def load_data():
16
+ return pd.read_csv(DATA_PATH)
17
+
18
+ def save_data(data):
19
+ data.to_csv(DATA_PATH, index=False)
20
+
21
+ def skip_correction():
22
+ noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist()
23
+ if noncorrected_sentences:
24
+ st.session_state.orig_sentence = random.choice(noncorrected_sentences)
25
+ st.session_state.orig_translation = st.session_state.data[st.session_state.data.sentence == st.session_state.orig_sentence]['translation']
26
+ else:
27
+ st.session_state.orig_sentence = "No more sentences to be corrected"
28
+ st.session_state.orig_translation = "No more sentences to be corrected"
29
+
30
+ st.title("Darija Translation Corpus Collection")
31
+
32
+ if "data" not in st.session_state:
33
+ st.session_state.data = load_data()
34
+
35
+ if "sentence" not in st.session_state:
36
+ untranslated_sentences = st.session_state.data[st.session_state.data['translated'] == False]['sentence'].tolist()
37
+ if untranslated_sentences:
38
+ st.session_state.sentence = random.choice(untranslated_sentences)
39
+ else:
40
+ st.session_state.sentence = "No more sentences to translate"
41
+
42
+ if "orig_translation" not in st.session_state:
43
+ noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist()
44
+ noncorrected_translations = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['translation'].tolist()
45
+
46
+ if noncorrected_sentences:
47
+ st.session_state.orig_sentence = random.choice(noncorrected_sentences)
48
+ st.session_state.orig_translation = st.session_state.data.loc[st.session_state.data.sentence == st.session_state.orig_sentence]['translation'].values[0]
49
+ else:
50
+ st.session_state.orig_sentence = "No more sentences to be corrected"
51
+ st.session_state.orig_translation = "No more sentences to be corrected"
52
+
53
+
54
+
55
+ if "user_translation" not in st.session_state:
56
+ st.session_state.user_translation = ""
57
+
58
+
59
+ with st.sidebar:
60
+ st.subheader("About")
61
+ st.markdown("""This is app is designed to collect Darija translation corpus.""")
62
+
63
+ tab1, tab2 = st.tabs(["Translation", "Correction"])
64
+
65
+ with tab1:
66
+ with st.container():
67
+ st.subheader("Original Text:")
68
+
69
+ st.write('<div style="height: 150px; overflow: auto; border: 2px solid #ddd; padding: 10px; border-radius: 5px;">{}</div>'.format(st.session_state.sentence), unsafe_allow_html=True)
70
+
71
+
72
+ st.subheader("Translation:")
73
+ st.session_state.user_translation = st.text_area("Enter your translation here:", value=st.session_state.user_translation)
74
+
75
+ if st.button("💾 Save"):
76
+ if st.session_state.user_translation:
77
+ st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.sentence, 'translation'] = st.session_state.user_translation
78
+ st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.sentence, 'translated'] = True
79
+ save_data(st.session_state.data)
80
+
81
+ st.session_state.user_translation = "" # Reset the input value after saving
82
+
83
+ # st.toast("Saved!", icon="👏")
84
+ st.success("Saved!")
85
+
86
+ # Update the sentence for the next iteration.
87
+ untranslated_sentences = st.session_state.data[st.session_state.data['translated'] == False]['sentence'].tolist()
88
+ if untranslated_sentences:
89
+ st.session_state.sentence = random.choice(untranslated_sentences)
90
+
91
+ else:
92
+ st.session_state.sentence = "No more sentences to translate"
93
+
94
+ time.sleep(0.5)
95
+ # Rerun the app
96
+ st.rerun()
97
+
98
+ with tab2:
99
+ with st.container():
100
+ st.subheader("Original Darija Text:")
101
+ st.write('<div style="height: 150px; overflow: auto; border: 2px solid #ddd; padding: 10px; border-radius: 5px;">{}</div>'.format(st.session_state.orig_sentence), unsafe_allow_html=True)
102
+
103
+ with st.container():
104
+ st.subheader("Original English Translation:")
105
+ st.write('<div style="height: 150px; overflow: auto; border: 2px solid #ddd; padding: 10px; border-radius: 5px;">{}</div>'.format(st.session_state.orig_translation), unsafe_allow_html=True)
106
+
107
+ st.subheader("Corrected Darija Translation:")
108
+ corrected_translation = st.text_area("Enter the corrected Darija translation here:")
109
+
110
+ if st.button("💾 Save Translation"):
111
+ if corrected_translation:
112
+ st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'translation'] = corrected_translation
113
+ st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'correction'] = corrected_translation
114
+ st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'corrected'] = True
115
+ save_data(st.session_state.data)
116
+
117
+ st.success("Saved!")
118
+
119
+ # Update the sentence for the next iteration.
120
+ noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist()
121
+ # noncorrected_sentences = st.session_state.data[st.session_state.data['corrected'] == False]['sentence'].tolist()
122
+ if noncorrected_sentences:
123
+ st.session_state.orig_sentence = random.choice(noncorrected_sentences)
124
+ st.session_state.orig_translation = st.session_state.data[st.session_state.data.sentence == st.session_state.orig_sentence]['translation']
125
+
126
+ else:
127
+ st.session_state.orig_translation = "No more sentences to be corrected"
128
+
129
+ corrected_translation = "" # Reset the input value after saving
130
+
131
+ st.button("⏩ Skip to the Next Pair", key="skip_button", on_click=skip_correction)
data/Dr-En-space-test.csv ADDED
The diff for this file is too large to render. See raw diff