Spaces:
Build error
Build error
File size: 4,636 Bytes
45585f3 391decb 45585f3 391decb 45585f3 391decb 45585f3 391decb 45585f3 b829e6a 391decb b1ea4b4 391decb 45585f3 391decb b829e6a 391decb b829e6a 63ddac0 391decb b829e6a ef70edd 45585f3 391decb b1ea4b4 391decb 45585f3 391decb 45585f3 fb86baf 391decb b1ea4b4 391decb fb86baf 45585f3 fb86baf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
import csv
import pandas as pd
import gradio as gr
import huggingface_hub
import scipy.io.wavfile as wavf
from huggingface_hub import Repository
from IPython.display import Audio
from IPython.core.display import display
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
DATA_FILENAME = "data.csv"
AUDIO_PATH = os.path.join("data",'wav')
DATA_FILE = os.path.join("data", DATA_FILENAME)
# Get a dropdown of all African languages
DEFAULT_LANGS = {'Igbo':'ibo','Yoruba':'yor','Hausa':'hau'}
repo = Repository(
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
repo.git_pull()
os.makedirs(AUDIO_PATH,exist_ok=True)
def push_record():
# Push the wav to a folder and reference the location
commit_url = repo.push_to_hub()
output = f'Recordings successfully pushed!'
output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
return output_string
def save_record(language,text,record):
# Save text and its corresponding record to flag
if language!=None and language!='Choose language':
lang_id = DEFAULT_LANGS[language]
text =text.strip()
# Write audio to file
audio_output_filename = os.path.join(AUDIO_PATH,f'{len(os.listdir(AUDIO_PATH))}.wav')
wavf.write(audio_output_filename,record[0],record[1])
if not os.path.exists(DATA_FILE):
# Add header (necessary for listen)
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["language", "audio","text"])
writer.writeheader()
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["language", "audio","text"])
writer.writerow(
{"language": lang_id, "audio": audio_output_filename,"text": text}
)
output = f'Recording successfully saved! Click `Push` when you are done to send your recordings to the repo.'
else:
output = 'Language must be specified!'
output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
return output_string
def display_records():
df = pd.read_csv(DATA_FILE)
#df['audio'] = df['audio'].apply(lambda x: display(Audio(x,autoplay=True)))
langs=df['language'].values
audios = df['audio'].values
texts=df['text'].values
html = """<div>
<table style="width:100%;border=1;text-align: center">
<tr>
<th>language</th>
<th>audio</th>
<th>text</th>
</tr>"""
for lang, audio, text in zip(langs,audios,texts):
html+= f"""<tr>
<td>{lang}</td>
<td><audio controls><source src="{audio}" type="audio/wav"> </audio></td>
<td>{text}</td>
</tr>"""
html+="</table></div>"
return html
#return langs[0],audios[0],texts[0]
#return df
title = 'African Crowdsource Speech'
description = 'A platform to contribute to your African language by recording your voice'
markdown = """# Africa Crowdsource Speech
### a platform to contribute to your African language by recording your voice"""
# Interface design begins
block = gr.Blocks()
with block:
gr.Markdown(markdown)
with gr.Tabs():
with gr.TabItem('Record'):
#with gr.Row():
language = gr.inputs.Dropdown(choices = list(DEFAULT_LANGS.keys()),label="language",default="Choose language")
text = gr.inputs.Textbox(placeholder='Write your text',label="text to record")
record = gr.inputs.Audio(source="microphone",label='Record your voice')
output_result = gr.outputs.HTML()
save = gr.Button("Save")
push = gr.Button('Push')
save.click(save_record, inputs=[language,text,record],outputs=output_result)
push.click(push_record, inputs=[],outputs=output_result)
with gr.TabItem('Listen') as listen_tab:
gr.Markdown("Listen to the recordings contributed. You can find them [here](https://huggingface.co/datasets/chrisjay/crowd-speech-africa).")
listen = gr.Button("Listen")
listen.click(display_records,inputs=[],outputs=gr.outputs.HTML() )
block.launch(enable_queue=True) |