Spaces:
Build error
Build error
emails and country included
Browse files- app.py +49 -27
- app3.py +0 -39
- article.py +4 -5
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from ctypes.wintypes import LANGID
|
2 |
from email.policy import default
|
|
|
3 |
import os
|
4 |
import csv
|
5 |
import random
|
@@ -18,8 +19,10 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
18 |
NUMBER_DIR = './number'
|
19 |
number_files = [f.name for f in os.scandir(NUMBER_DIR)]
|
20 |
|
|
|
21 |
|
22 |
DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
|
|
|
23 |
REPOSITORY_DIR = "data"
|
24 |
LOCAL_DIR = 'data_local'
|
25 |
os.makedirs(LOCAL_DIR,exist_ok=True)
|
@@ -48,7 +51,7 @@ with open('app.css','r') as f:
|
|
48 |
|
49 |
|
50 |
|
51 |
-
def save_record(language,text,record,number,age,gender,accent,number_history,current_number,done_recording):
|
52 |
number_history = number_history or [0]
|
53 |
|
54 |
# Save text and its corresponding record to flag
|
@@ -56,7 +59,6 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
|
|
56 |
speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
|
57 |
speaker_metadata['age'] = age if age !='' else ''
|
58 |
speaker_metadata['accent'] = accent if accent!='' else ''
|
59 |
-
import pdb;pdb.set_trace()
|
60 |
default_record = None
|
61 |
if not done_recording:
|
62 |
if language!=None and language!='Choose language' and record is not None and number is not None:
|
@@ -77,7 +79,8 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
|
|
77 |
'language_name':language,'language_id':lang_id,
|
78 |
'number':current_number, 'text':text,'frequency':record[0],
|
79 |
'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
|
80 |
-
'accent': speaker_metadata['accent']
|
|
|
81 |
}
|
82 |
|
83 |
dump_json(metadata,json_file_path)
|
@@ -102,7 +105,7 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
|
|
102 |
token=HF_TOKEN
|
103 |
)
|
104 |
|
105 |
-
output = f'Recording successfully saved!'
|
106 |
|
107 |
# Choose the next number
|
108 |
number_history.append(current_number)
|
@@ -112,9 +115,34 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
|
|
112 |
|
113 |
next_number_image = f'number/{next_number}.jpg'
|
114 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
done_recording=True
|
116 |
next_number = 0 # the default number
|
117 |
-
next_number_image = f'number/best.gif'
|
|
|
118 |
output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
|
119 |
return output_string,next_number_image,number_history,next_number,done_recording,default_record
|
120 |
|
@@ -129,6 +157,7 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
|
|
129 |
# return output_string, previous image and state
|
130 |
return output_string, number,number_history,current_number,done_recording,default_record
|
131 |
else:
|
|
|
132 |
# Stop submitting recording (best.gif is displaying)
|
133 |
output = '🙌 You have finished all recording! Thank You. You can reload to start again (maybe in another language).'
|
134 |
output_string = "<div class='finished'>"+output+"</div>"
|
@@ -187,40 +216,33 @@ markdown="""
|
|
187 |
|
188 |
> Record numbers 0-9 in your African language.
|
189 |
|
190 |
-
1.
|
191 |
-
2.
|
192 |
-
3.
|
193 |
-
4.
|
194 |
-
5.
|
195 |
-
6. Click
|
196 |
-
7.
|
197 |
-
8.
|
198 |
-
|
199 |
-
SORTED_LANGUAGES = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())])
|
200 |
-
LANGAUGES_CHOOSE = """
|
201 |
-
<label for="langs"> Choose your language </label>
|
202 |
-
<input type="text" id="langs" name="AfricanLanguages" list="languagesList">
|
203 |
-
|
204 |
-
<datalist id='languagesList'>
|
205 |
"""
|
206 |
-
for lang in SORTED_LANGUAGES:
|
207 |
-
LANGAUGES_CHOOSE+= f"<option> {lang} </option> \n"
|
208 |
-
LANGAUGES_CHOOSE+="</datalist>"
|
209 |
|
210 |
|
211 |
# Interface design begins
|
212 |
block = gr.Blocks(css=BLOCK_CSS)
|
213 |
with block:
|
214 |
gr.Markdown(markdown)
|
|
|
215 |
with gr.Tabs():
|
216 |
|
217 |
with gr.TabItem('Record'):
|
218 |
with gr.Row():
|
219 |
-
|
220 |
-
|
221 |
age = gr.inputs.Textbox(placeholder='e.g. 21',label="Your age (optional)",default='')
|
222 |
gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")
|
223 |
-
accent = gr.inputs.Textbox(label="Accent (optional)",default='')
|
|
|
224 |
|
225 |
number = gr.Image('number/0.jpg',image_mode="L")
|
226 |
text = gr.inputs.Textbox(placeholder='e.g. `one` is `otu` in Igbo or `ọkan` in Yoruba',label="How is the number called in your language (optional)")
|
@@ -233,7 +255,7 @@ with block:
|
|
233 |
save = gr.Button("Submit")
|
234 |
|
235 |
|
236 |
-
save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
|
237 |
|
238 |
with gr.TabItem('Listen') as listen_tab:
|
239 |
gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
|
|
|
1 |
from ctypes.wintypes import LANGID
|
2 |
from email.policy import default
|
3 |
+
import pycountry
|
4 |
import os
|
5 |
import csv
|
6 |
import random
|
|
|
19 |
NUMBER_DIR = './number'
|
20 |
number_files = [f.name for f in os.scandir(NUMBER_DIR)]
|
21 |
|
22 |
+
DEFAULT_LIST_OF_COUNTRIES = [country.name for country in pycountry.countries]
|
23 |
|
24 |
DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
|
25 |
+
EMAILS_REPO_URL="https://huggingface.co/datasets/chrisjay/african-digits-recording-sprint-email"
|
26 |
REPOSITORY_DIR = "data"
|
27 |
LOCAL_DIR = 'data_local'
|
28 |
os.makedirs(LOCAL_DIR,exist_ok=True)
|
|
|
51 |
|
52 |
|
53 |
|
54 |
+
def save_record(language,text,record,number,age,gender,accent,number_history,current_number,country,email,done_recording):
|
55 |
number_history = number_history or [0]
|
56 |
|
57 |
# Save text and its corresponding record to flag
|
|
|
59 |
speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
|
60 |
speaker_metadata['age'] = age if age !='' else ''
|
61 |
speaker_metadata['accent'] = accent if accent!='' else ''
|
|
|
62 |
default_record = None
|
63 |
if not done_recording:
|
64 |
if language!=None and language!='Choose language' and record is not None and number is not None:
|
|
|
79 |
'language_name':language,'language_id':lang_id,
|
80 |
'number':current_number, 'text':text,'frequency':record[0],
|
81 |
'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
|
82 |
+
'accent': speaker_metadata['accent'],
|
83 |
+
'country':country
|
84 |
}
|
85 |
|
86 |
dump_json(metadata,json_file_path)
|
|
|
105 |
token=HF_TOKEN
|
106 |
)
|
107 |
|
108 |
+
output = f'Recording successfully saved! On to the next one...'
|
109 |
|
110 |
# Choose the next number
|
111 |
number_history.append(current_number)
|
|
|
115 |
|
116 |
next_number_image = f'number/{next_number}.jpg'
|
117 |
else:
|
118 |
+
email_metadata_name = get_unique_name()
|
119 |
+
EMAIL_SAVE_FILE = os.path.join(LOCAL_DIR,f"{email_metadata_name}.json")
|
120 |
+
# Write metadata.json to file
|
121 |
+
email_metadata = {'id':email_metadata_name,'email':email,
|
122 |
+
'language_name':language,'language_id':lang_id,
|
123 |
+
'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
|
124 |
+
'accent': speaker_metadata['accent'],
|
125 |
+
'country':country
|
126 |
+
}
|
127 |
+
|
128 |
+
dump_json(email_metadata,EMAIL_SAVE_FILE)
|
129 |
+
|
130 |
+
# Upload the metadata
|
131 |
+
repo_json_path = os.path.join('emails',f"{email_metadata_name}.json")
|
132 |
+
_ = upload_file(path_or_fileobj = EMAIL_SAVE_FILE,
|
133 |
+
path_in_repo =repo_json_path,
|
134 |
+
repo_id='chrisjay/african-digits-recording-sprint-email',
|
135 |
+
repo_type='dataset',
|
136 |
+
token=HF_TOKEN
|
137 |
+
)
|
138 |
+
# Delete the email from local repo
|
139 |
+
if os.path.exists(EMAIL_SAVE_FILE):
|
140 |
+
os.remove(EMAIL_SAVE_FILE)
|
141 |
+
#-------------------
|
142 |
done_recording=True
|
143 |
next_number = 0 # the default number
|
144 |
+
next_number_image = f'number/best.gif'
|
145 |
+
output = "You have finished all recording! You can reload to start again."
|
146 |
output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
|
147 |
return output_string,next_number_image,number_history,next_number,done_recording,default_record
|
148 |
|
|
|
157 |
# return output_string, previous image and state
|
158 |
return output_string, number,number_history,current_number,done_recording,default_record
|
159 |
else:
|
160 |
+
|
161 |
# Stop submitting recording (best.gif is displaying)
|
162 |
output = '🙌 You have finished all recording! Thank You. You can reload to start again (maybe in another language).'
|
163 |
output_string = "<div class='finished'>"+output+"</div>"
|
|
|
216 |
|
217 |
> Record numbers 0-9 in your African language.
|
218 |
|
219 |
+
1. Fill in your email. This is completely optional. We need this to track your progress for the prize.
|
220 |
+
2. Choose your African language
|
221 |
+
3. Fill in the speaker metadata (age, gender, accent). This is optional but important to build better speech models.
|
222 |
+
4. You will see the image of a number __(this is the number you will record)__.
|
223 |
+
5. Fill in the word of that number (optional)
|
224 |
+
6. Click record and say the number in your African language.
|
225 |
+
7. Click ‘Submit’. It will save your record and go to the next number.
|
226 |
+
8. Repeat 4-7
|
227 |
+
9. Leave a ❤ in the Space, if you found it fun.
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
"""
|
|
|
|
|
|
|
229 |
|
230 |
|
231 |
# Interface design begins
|
232 |
block = gr.Blocks(css=BLOCK_CSS)
|
233 |
with block:
|
234 |
gr.Markdown(markdown)
|
235 |
+
email = gr.inputs.Textbox(placeholder='your email',label="Email (if you want join the sprint)",default='')
|
236 |
with gr.Tabs():
|
237 |
|
238 |
with gr.TabItem('Record'):
|
239 |
with gr.Row():
|
240 |
+
|
241 |
+
language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())]),label="Choose language",default="Choose language")
|
242 |
age = gr.inputs.Textbox(placeholder='e.g. 21',label="Your age (optional)",default='')
|
243 |
gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")
|
244 |
+
accent = gr.inputs.Textbox(label="Accent (optional)",default='')
|
245 |
+
country = gr.Dropdown(choices=[''] + sorted(DEFAULT_LIST_OF_COUNTRIES),type='value',default=None,label="Country you are recording from (optional)")
|
246 |
|
247 |
number = gr.Image('number/0.jpg',image_mode="L")
|
248 |
text = gr.inputs.Textbox(placeholder='e.g. `one` is `otu` in Igbo or `ọkan` in Yoruba',label="How is the number called in your language (optional)")
|
|
|
255 |
save = gr.Button("Submit")
|
256 |
|
257 |
|
258 |
+
save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,country,email,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
|
259 |
|
260 |
with gr.TabItem('Listen') as listen_tab:
|
261 |
gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
|
app3.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import gradio as gr
|
3 |
-
|
4 |
-
|
5 |
-
#HF_TOKEN = os.environ.get("HF_TOKEN")
|
6 |
-
#print("is none?", HF_TOKEN is None)
|
7 |
-
|
8 |
-
def get_record(language,text,record):
|
9 |
-
# Save text and its corresponding record to flag
|
10 |
-
|
11 |
-
text =text.strip()
|
12 |
-
|
13 |
-
#output_string = "<html> <body> <div class='output'>"+f'Record for text {text} successfully saved to dataset! Thank You.'+"</div> </body> </html>"
|
14 |
-
output_string = f'Record for text - {text} - successfully saved to dataset! Thank You.'
|
15 |
-
return output_string
|
16 |
-
|
17 |
-
title = 'African Crowdsource Speech'
|
18 |
-
description = 'A platform to contribute to your African language by recording your voice'
|
19 |
-
|
20 |
-
markdown = """# African Crowdsource Speech
|
21 |
-
|
22 |
-
A platform to contribute to your African language by recording your voice
|
23 |
-
"""
|
24 |
-
|
25 |
-
# Get a dropdown of all African languages
|
26 |
-
|
27 |
-
# Interface design begins
|
28 |
-
#import pdb; pdb.set_trace()
|
29 |
-
iface = gr.Interface(fn=get_record,
|
30 |
-
inputs=[gr.inputs.Textbox(placeholder='Choose your language'),
|
31 |
-
gr.inputs.Textbox(placeholder='Write your text'),
|
32 |
-
gr.inputs.Audio(source="microphone",label='Record your voice')
|
33 |
-
],
|
34 |
-
outputs = "text",
|
35 |
-
title=title,
|
36 |
-
description=description,
|
37 |
-
theme='huggingface'
|
38 |
-
)
|
39 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
article.py
CHANGED
@@ -14,11 +14,10 @@ This dataset will boost speech technologies (like speech-to-text, text-to-speech
|
|
14 |
|
15 |
**About the dataset**
|
16 |
|
17 |
-
The data (metadat,text, and audio recording) are uploaded to [a public Hugging Face dataset](https://huggingface.co/datasets/chrisjay/crowd-speech-africa).
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
If for some reason you want to remove your entry, please reach out by email.
|
22 |
|
23 |
**Contact**
|
24 |
|
|
|
14 |
|
15 |
**About the dataset**
|
16 |
|
17 |
+
- The data (metadat,text, and audio recording) are uploaded to [a public Hugging Face dataset](https://huggingface.co/datasets/chrisjay/crowd-speech-africa).
|
18 |
+
- We do not collect your name, address or other sensitive information.
|
19 |
+
- If for some reason you want to remove your entry, please reach out by email.
|
20 |
+
- Your email, if given, is used only to keep track of your progress in order to give the prizes to the top scorers. They are temporarily stored in [this private dataset](https://huggingface.co/datasets/chrisjay/african-digits-recording-sprint-email) and immediately deleted after the sprint.
|
|
|
21 |
|
22 |
**Contact**
|
23 |
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
pandas
|
2 |
-
scipy
|
|
|
|
1 |
pandas
|
2 |
+
scipy
|
3 |
+
pycountry
|