Spaces:

chrisjay
/

afro-speech

Build error

App Files Files Community

chrisjay commited on May 16, 2022

Commit

73257d5

•

1 Parent(s): af6c493

emails and country included

Browse files

Files changed (4) hide show

app.py +49 -27
app3.py +0 -39
article.py +4 -5
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from ctypes.wintypes import LANGID
 from email.policy import default
 import os
 import csv
 import random
@@ -18,8 +19,10 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 NUMBER_DIR = './number'
 number_files = [f.name for f in os.scandir(NUMBER_DIR)]
 DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
 REPOSITORY_DIR = "data"
 LOCAL_DIR = 'data_local'
 os.makedirs(LOCAL_DIR,exist_ok=True)
@@ -48,7 +51,7 @@ with open('app.css','r') as f:
-def save_record(language,text,record,number,age,gender,accent,number_history,current_number,done_recording):
     number_history = number_history or [0]
     # Save text and its corresponding record to flag
@@ -56,7 +59,6 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
     speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
     speaker_metadata['age'] = age if age !='' else ''
     speaker_metadata['accent'] = accent if accent!='' else ''
-    import pdb;pdb.set_trace()
     default_record = None
     if not done_recording:
         if language!=None and language!='Choose language' and record is not None and number is not None:
@@ -77,7 +79,8 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
                         'language_name':language,'language_id':lang_id,
                         'number':current_number, 'text':text,'frequency':record[0],
                         'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
-                        'accent': speaker_metadata['accent']
                         }
             dump_json(metadata,json_file_path)
@@ -102,7 +105,7 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
                         token=HF_TOKEN
                     )
-            output = f'Recording successfully saved!'
             # Choose the next number
             number_history.append(current_number)
@@ -112,9 +115,34 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
                 next_number_image = f'number/{next_number}.jpg'
             else:
                 done_recording=True
                 next_number = 0 # the default number
-                next_number_image = f'number/best.gif'
             output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
             return output_string,next_number_image,number_history,next_number,done_recording,default_record
@@ -129,6 +157,7 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
         # return output_string, previous image and state
         return output_string, number,number_history,current_number,done_recording,default_record
     else:
         # Stop submitting recording (best.gif is displaying)
         output = '🙌 You have finished all recording! Thank You. You can reload to start again (maybe in another language).'
         output_string = "<div class='finished'>"+output+"</div>"
@@ -187,40 +216,33 @@ markdown="""
 > Record numbers 0-9 in your African language.
-1. Choose your African language
-2. Fill in the speaker metadata (age, gender, accent). This is optional but important to build better speech models.
-3. You will see the image of a number __(this is the number you will record)__.
-4. Fill in the word of that number (optional)
-5. Click record and say the number in your African language.
-6. Click ‘Submit’. It will save your record and go to the next number.
-7. Repeat 4-7
-8. Leave a ❤ in the Space, if you found it fun.
-"""
-SORTED_LANGUAGES = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())])
-LANGAUGES_CHOOSE = """
-<label for="langs"> Choose your language </label>
-<input type="text" id="langs" name="AfricanLanguages" list="languagesList">
-<datalist id='languagesList'>
 """
-for lang in SORTED_LANGUAGES:
-    LANGAUGES_CHOOSE+= f"<option> {lang} </option> \n"
-LANGAUGES_CHOOSE+="</datalist>"
 # Interface design begins
 block = gr.Blocks(css=BLOCK_CSS)
 with block:
     gr.Markdown(markdown)
     with gr.Tabs():
         with gr.TabItem('Record'):
             with gr.Row():
-                language = gr.HTML(LANGAUGES_CHOOSE)
-                #language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())]),label="Choose language",default="Choose language")
                 age = gr.inputs.Textbox(placeholder='e.g. 21',label="Your age (optional)",default='')
                 gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")
-                accent = gr.inputs.Textbox(label="Accent (optional)",default='')
             number = gr.Image('number/0.jpg',image_mode="L")
             text = gr.inputs.Textbox(placeholder='e.g. `one` is `otu` in Igbo or `ọkan` in Yoruba',label="How is the number called in your language (optional)")
@@ -233,7 +255,7 @@ with block:
             save = gr.Button("Submit")
-            save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
         with gr.TabItem('Listen') as listen_tab:
             gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")

 from ctypes.wintypes import LANGID
 from email.policy import default
+import pycountry
 import os
 import csv
 import random
 NUMBER_DIR = './number'
 number_files = [f.name for f in os.scandir(NUMBER_DIR)]
+DEFAULT_LIST_OF_COUNTRIES = [country.name for country in pycountry.countries]
 DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
+EMAILS_REPO_URL="https://huggingface.co/datasets/chrisjay/african-digits-recording-sprint-email"
 REPOSITORY_DIR = "data"
 LOCAL_DIR = 'data_local'
 os.makedirs(LOCAL_DIR,exist_ok=True)
+def save_record(language,text,record,number,age,gender,accent,number_history,current_number,country,email,done_recording):
     number_history = number_history or [0]
     # Save text and its corresponding record to flag
     speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
     speaker_metadata['age'] = age if age !='' else ''
     speaker_metadata['accent'] = accent if accent!='' else ''
     default_record = None
     if not done_recording:
         if language!=None and language!='Choose language' and record is not None and number is not None:
                         'language_name':language,'language_id':lang_id,
                         'number':current_number, 'text':text,'frequency':record[0],
                         'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
+                        'accent': speaker_metadata['accent'],
+                        'country':country
                         }
             dump_json(metadata,json_file_path)
                         token=HF_TOKEN
                     )
+            output = f'Recording successfully saved! On to the next one...'
             # Choose the next number
             number_history.append(current_number)
                 next_number_image = f'number/{next_number}.jpg'
             else:
+                email_metadata_name = get_unique_name()
+                EMAIL_SAVE_FILE = os.path.join(LOCAL_DIR,f"{email_metadata_name}.json")
+                # Write metadata.json to file
+                email_metadata = {'id':email_metadata_name,'email':email,
+                            'language_name':language,'language_id':lang_id,
+                            'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
+                            'accent': speaker_metadata['accent'],
+                            'country':country
+                            }
+                dump_json(email_metadata,EMAIL_SAVE_FILE)
+                # Upload the metadata
+                repo_json_path = os.path.join('emails',f"{email_metadata_name}.json")
+                _ = upload_file(path_or_fileobj = EMAIL_SAVE_FILE,
+                            path_in_repo =repo_json_path,
+                            repo_id='chrisjay/african-digits-recording-sprint-email',
+                            repo_type='dataset',
+                            token=HF_TOKEN
+                        )
+                # Delete the email from local repo
+                if os.path.exists(EMAIL_SAVE_FILE):
+                    os.remove(EMAIL_SAVE_FILE)
+                #-------------------
                 done_recording=True
                 next_number = 0 # the default number
+                next_number_image = f'number/best.gif'
+                output = "You have finished all recording! You can reload to start again."
             output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
             return output_string,next_number_image,number_history,next_number,done_recording,default_record
         # return output_string, previous image and state
         return output_string, number,number_history,current_number,done_recording,default_record
     else:
         # Stop submitting recording (best.gif is displaying)
         output = '🙌 You have finished all recording! Thank You. You can reload to start again (maybe in another language).'
         output_string = "<div class='finished'>"+output+"</div>"
 > Record numbers 0-9 in your African language.
+1. Fill in your email. This is completely optional. We need this to track your progress for the prize.
+2. Choose your African language
+3. Fill in the speaker metadata (age, gender, accent). This is optional but important to build better speech models.
+4. You will see the image of a number __(this is the number you will record)__.
+5. Fill in the word of that number (optional)
+6. Click record and say the number in your African language.
+7. Click ‘Submit’. It will save your record and go to the next number.
+8. Repeat 4-7
+9. Leave a ❤ in the Space, if you found it fun.
 """
 # Interface design begins
 block = gr.Blocks(css=BLOCK_CSS)
 with block:
     gr.Markdown(markdown)
+    email = gr.inputs.Textbox(placeholder='your email',label="Email (if you want join the sprint)",default='')
     with gr.Tabs():
         with gr.TabItem('Record'):
             with gr.Row():
+                language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())]),label="Choose language",default="Choose language")
                 age = gr.inputs.Textbox(placeholder='e.g. 21',label="Your age (optional)",default='')
                 gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")
+                accent = gr.inputs.Textbox(label="Accent (optional)",default='')
+                country = gr.Dropdown(choices=[''] + sorted(DEFAULT_LIST_OF_COUNTRIES),type='value',default=None,label="Country you are recording from (optional)")
             number = gr.Image('number/0.jpg',image_mode="L")
             text = gr.inputs.Textbox(placeholder='e.g. `one` is `otu` in Igbo or `ọkan` in Yoruba',label="How is the number called in your language (optional)")
             save = gr.Button("Submit")
+            save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,country,email,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
         with gr.TabItem('Listen') as listen_tab:
             gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")

app3.py DELETED Viewed

@@ -1,39 +0,0 @@
-import os
-import gradio as gr
-#HF_TOKEN = os.environ.get("HF_TOKEN")
-#print("is none?", HF_TOKEN is None)
-def get_record(language,text,record):
-    # Save text and its corresponding record to flag
-    text =text.strip()
-    #output_string = "<html> <body> <div class='output'>"+f'Record for text {text} successfully saved to dataset! Thank You.'+"</div> </body> </html>"
-    output_string = f'Record for text - {text} - successfully saved to dataset! Thank You.'
-    return output_string
-title = 'African Crowdsource Speech'
-description = 'A platform to contribute to your African language by recording your voice'
-markdown = """# African Crowdsource Speech
-A platform to contribute to your African language by recording your voice
-"""
-# Get a dropdown of all African languages
-# Interface design begins
-#import pdb; pdb.set_trace()
-iface = gr.Interface(fn=get_record,
-            inputs=[gr.inputs.Textbox(placeholder='Choose your language'),
-            gr.inputs.Textbox(placeholder='Write your text'),
-                gr.inputs.Audio(source="microphone",label='Record your voice')
-                    ],
-            outputs = "text",
-            title=title,
-            description=description,
-            theme='huggingface'
-                    )
-iface.launch()

article.py CHANGED Viewed

@@ -14,11 +14,10 @@ This dataset will boost speech technologies (like speech-to-text, text-to-speech
 **About the dataset**
-The data (metadat,text, and audio recording) are uploaded to [a public Hugging Face dataset](https://huggingface.co/datasets/chrisjay/crowd-speech-africa).
-We do not collect your name, address or other sensitive information.
-If for some reason you want to remove your entry, please reach out by email.
 **Contact**

 **About the dataset**
+- The data (metadat,text, and audio recording) are uploaded to [a public Hugging Face dataset](https://huggingface.co/datasets/chrisjay/crowd-speech-africa).
+- We do not collect your name, address or other sensitive information.
+- If for some reason you want to remove your entry, please reach out by email.
+- Your email, if given, is used only to keep track of your progress in order to give the prizes to the top scorers. They are temporarily stored in [this private dataset](https://huggingface.co/datasets/chrisjay/african-digits-recording-sprint-email) and immediately deleted after the sprint.
 **Contact**

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 pandas
-scipy

 pandas
+scipy
+pycountry