Spaces:

chrisjay
/

afro-speech

Build error

File size: 7,570 Bytes

45585f3
391decb
 
45585f3
694e002
391decb
694e002
 
 
 
45585f3
391decb
45585f3
391decb
 
694e002
 
 
 
 
b829e6a
694e002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b829e6a
391decb
 
 
b1ea4b4
391decb
 
 
 
694e002
 
391decb
45585f3
694e002
 
 
 
 
 
 
 
b829e6a
 
391decb
 
694e002
 
 
 
391decb
b829e6a
694e002
 
 
 
 
 
 
 
391decb
694e002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391decb
694e002
 
52064f2
 
 
b829e6a
 
694e002
 
ef70edd
45585f3
391decb
 
694e002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1ea4b4
 
694e002
b1ea4b4
 
 
 
 
 
 
 
 
 
 
 
 
893eec1
694e002
 
391decb
694e002
391decb
 
 
694e002
 
45585f3
 
 
fb86baf
 
 
391decb
 
 
52064f2
694e002
 
 
 
 
 
 
 
 
52064f2
 
694e002
 
 
 
391decb
694e002
391decb
 
b5ee8ce
391decb
b1ea4b4
391decb
45585f3
52064f2

import os
import csv
import pandas as pd
import gradio as gr
from utils import *
import scipy.io.wavfile as wavf
from huggingface_hub import Repository, upload_file




HF_TOKEN = os.environ.get("HF_TOKEN")


DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
#DATA_FILENAME = "data.csv"
REPOSITORY_DIR = "data"
LOCAL_DIR = 'data_local'
#DATA_FILE = os.path.join("data", DATA_FILENAME)
os.makedirs(LOCAL_DIR,exist_ok=True)
# Get a dropdown of all African languages
#DEFAULT_LANGS = {'Igbo':'ibo','Yoruba':'yor','Hausa':'hau'}

GENDER = ['Choose Gender','Male','Female','Other','Prefer not to say']
NUMBERS = [i for i in range(21)]


#------------------Work on Languages--------------------
DEFAULT_LANGS =   {}
languages = read_json_lines('clean_languages.json')
languages_lower=[l for l in languages]

_ = [DEFAULT_LANGS.update({l['full'].lower():l['id'].lower()}) for l in languages_lower]
#_ = [DEFAULT_LANGS.update({l_other.lower():[l['id'].lower()]}) for l in languages_lower for l_other in l['others'] if l_other.lower()!=l['full'].lower()]

#------------------Work on Languages--------------------

repo = Repository(
    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
repo.git_pull()




def save_record(language,text,record,number,age,gender,accent,number_history):
    number_history = number_history or [0]

    # Save text and its corresponding record to flag
    speaker_metadata={}
    speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
    speaker_metadata['age'] = age if age !='' else ''
    speaker_metadata['accent'] = accent if accent!='' else ''


    if language!=None and language!='Choose language' and record is not None and number is not None:
        language = language.lower()
        lang_id = DEFAULT_LANGS[language]
        text =text.strip()
        
        # Write audio to file
        audio_name = get_unique_name()
        SAVE_FILE_DIR = os.path.join(LOCAL_DIR,audio_name)
        os.makedirs(SAVE_FILE_DIR,exist_ok=True)
        audio_output_filename = os.path.join(SAVE_FILE_DIR,'audio.wav')
        wavf.write(audio_output_filename,record[0],record[1])

        # Write metadata.json to file
        json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
        metadata= {'id':audio_name,'file_name':'audio.wav',
                    'language_name':language,'language_id':lang_id,
                    'number':number, 'text':text,'frequency':record[0],
                    'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
                    'accent': speaker_metadata['accent']
                    }
        
        dump_json(metadata,json_file_path)  
              
        # Simply upload the audio file and metadata using the hub's upload_file
        # Upload the audio
        repo_audio_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'audio.wav'))
        
        _ = upload_file(path_or_fileobj = audio_output_filename,
                    path_in_repo =repo_audio_path,
                    repo_id='chrisjay/crowd-speech-africa',
                    repo_type='dataset',
                    token=HF_TOKEN
                 ) 

        # Upload the metadata
        repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'metadata.jsonl'))
        _ = upload_file(path_or_fileobj = json_file_path,
                    path_in_repo =repo_json_path,
                    repo_id='chrisjay/crowd-speech-africa',
                    repo_type='dataset',
                    token=HF_TOKEN
                )        
        
        output = f'Recording successfully saved!'
    
    if number is None:
        output = "Number must be specified!"
    if record is None:
        output="No recording found!"
    if language is None or language=='Choose language':
        output = 'Language must be specified!'     
    output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"

    # return output_string, next image and state 
    return output_string


def display_records():
    repo.git_pull()
    REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
    repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []

    audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
    audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/chrisjay/crowd-speech-africa/resolve/main/data/') for a in audio_repo]
    metadata_repo = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings]
    audios_all =  audio_repo
    metadata_all = metadata_repo

 
    langs=[m['language_name'] for m in metadata_all]
    audios = [a for a in audios_all]
    texts = [m['text'] for m in metadata_all]


    html = """<div>
                <table style="width:100%; text-align:center">
                    <tr>
                        <th>language</th>
                        <th>audio</th>
                        <th>text</th>
                    </tr>""" 
    for lang, audio, text in zip(langs,audios,texts):
        html+= f"""<tr>
                    <td>{lang}</td>
                    <td><audio controls><source src="{audio}" type="audio/wav"> </audio></td>
                    <td>{text}</td>
                </tr>"""      
    html+="</table></div>"
    return html
 
NUMBER_DIR = './number' 
number_files = [f.name for f in os.scandir(NUMBER_DIR)]

NUMBERS = [{'image':os.path.join(NUMBER_DIR,f),'number':int(f.split('.')[0])} for f in number_files] 



markdown = """<div style="text-align: center"><p style="font-size: 40px"> Africa Crowdsource Speech </p> <br>
This is a platform to contribute to your African language by recording your voice </div>"""


# Interface design begins
block = gr.Blocks()
with block:
    gr.Markdown(markdown)
    with gr.Tabs():
        
        with gr.TabItem('Record'):
            with gr.Row():
                language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())]),label="Choose language",default="Choose language")
                age = gr.inputs.Textbox(placeholder='e.g. 21',label="Your age (optional)",default='')  
                gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")  
                accent = gr.inputs.Textbox(label="Accent (optional)",default='')  

            number = gr.Image('number/0.jpg',image_mode="L") 
            #number = gr.inputs.Radio(choices=NUMBERS, type="value", default=None, label="Choose your number")
            text = gr.inputs.Textbox(placeholder='e.g. `one` is `otu` in Igbo or `ọkan` in Yoruba',label="Number in your language")
            record = gr.inputs.Audio(source="microphone",label='Record your voice')
            
            output_result = gr.outputs.HTML()
            state = gr.inputs.State()
            
            save = gr.Button("Submit")
                
            
            save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state],outputs=[output_result,number,state])

        with gr.TabItem('Listen') as listen_tab:
            gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
            listen = gr.Button("Listen")
            listen.click(display_records,inputs=[],outputs=gr.outputs.HTML() )
            

block.launch()