File size: 7,570 Bytes
45585f3
391decb
 
45585f3
694e002
391decb
694e002
 
 
 
45585f3
391decb
45585f3
391decb
 
694e002
 
 
 
 
b829e6a
694e002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b829e6a
391decb
 
 
b1ea4b4
391decb
 
 
 
694e002
 
391decb
45585f3
694e002
 
 
 
 
 
 
 
b829e6a
 
391decb
 
694e002
 
 
 
391decb
b829e6a
694e002
 
 
 
 
 
 
 
391decb
694e002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391decb
694e002
 
52064f2
 
 
b829e6a
 
694e002
 
ef70edd
45585f3
391decb
 
694e002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1ea4b4
 
694e002
b1ea4b4
 
 
 
 
 
 
 
 
 
 
 
 
893eec1
694e002
 
391decb
694e002
391decb
 
 
694e002
 
45585f3
 
 
fb86baf
 
 
391decb
 
 
52064f2
694e002
 
 
 
 
 
 
 
 
52064f2
 
694e002
 
 
 
391decb
694e002
391decb
 
b5ee8ce
391decb
b1ea4b4
391decb
45585f3
52064f2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import os
import csv
import pandas as pd
import gradio as gr
from utils import *
import scipy.io.wavfile as wavf
from huggingface_hub import Repository, upload_file




HF_TOKEN = os.environ.get("HF_TOKEN")


DATASET_REPO_URL = "https://huggingface.co/datasets/chrisjay/crowd-speech-africa"
#DATA_FILENAME = "data.csv"
REPOSITORY_DIR = "data"
LOCAL_DIR = 'data_local'
#DATA_FILE = os.path.join("data", DATA_FILENAME)
os.makedirs(LOCAL_DIR,exist_ok=True)
# Get a dropdown of all African languages
#DEFAULT_LANGS = {'Igbo':'ibo','Yoruba':'yor','Hausa':'hau'}

GENDER = ['Choose Gender','Male','Female','Other','Prefer not to say']
NUMBERS = [i for i in range(21)]


#------------------Work on Languages--------------------
DEFAULT_LANGS =   {}
languages = read_json_lines('clean_languages.json')
languages_lower=[l for l in languages]

_ = [DEFAULT_LANGS.update({l['full'].lower():l['id'].lower()}) for l in languages_lower]
#_ = [DEFAULT_LANGS.update({l_other.lower():[l['id'].lower()]}) for l in languages_lower for l_other in l['others'] if l_other.lower()!=l['full'].lower()]

#------------------Work on Languages--------------------

repo = Repository(
    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
repo.git_pull()




def save_record(language,text,record,number,age,gender,accent,number_history):
    number_history = number_history or [0]

    # Save text and its corresponding record to flag
    speaker_metadata={}
    speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
    speaker_metadata['age'] = age if age !='' else ''
    speaker_metadata['accent'] = accent if accent!='' else ''


    if language!=None and language!='Choose language' and record is not None and number is not None:
        language = language.lower()
        lang_id = DEFAULT_LANGS[language]
        text =text.strip()
        
        # Write audio to file
        audio_name = get_unique_name()
        SAVE_FILE_DIR = os.path.join(LOCAL_DIR,audio_name)
        os.makedirs(SAVE_FILE_DIR,exist_ok=True)
        audio_output_filename = os.path.join(SAVE_FILE_DIR,'audio.wav')
        wavf.write(audio_output_filename,record[0],record[1])

        # Write metadata.json to file
        json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
        metadata= {'id':audio_name,'file_name':'audio.wav',
                    'language_name':language,'language_id':lang_id,
                    'number':number, 'text':text,'frequency':record[0],
                    'age': speaker_metadata['age'],'gender': speaker_metadata['gender'],
                    'accent': speaker_metadata['accent']
                    }
        
        dump_json(metadata,json_file_path)  
              
        # Simply upload the audio file and metadata using the hub's upload_file
        # Upload the audio
        repo_audio_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'audio.wav'))
        
        _ = upload_file(path_or_fileobj = audio_output_filename,
                    path_in_repo =repo_audio_path,
                    repo_id='chrisjay/crowd-speech-africa',
                    repo_type='dataset',
                    token=HF_TOKEN
                 ) 

        # Upload the metadata
        repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'metadata.jsonl'))
        _ = upload_file(path_or_fileobj = json_file_path,
                    path_in_repo =repo_json_path,
                    repo_id='chrisjay/crowd-speech-africa',
                    repo_type='dataset',
                    token=HF_TOKEN
                )        
        
        output = f'Recording successfully saved!'
    
    if number is None:
        output = "Number must be specified!"
    if record is None:
        output="No recording found!"
    if language is None or language=='Choose language':
        output = 'Language must be specified!'     
    output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"

    # return output_string, next image and state 
    return output_string


def display_records():
    repo.git_pull()
    REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
    repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []

    audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
    audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/chrisjay/crowd-speech-africa/resolve/main/data/') for a in audio_repo]
    metadata_repo = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings]
    audios_all =  audio_repo
    metadata_all = metadata_repo

 
    langs=[m['language_name'] for m in metadata_all]
    audios = [a for a in audios_all]
    texts = [m['text'] for m in metadata_all]


    html = """<div>
                <table style="width:100%; text-align:center">
                    <tr>
                        <th>language</th>
                        <th>audio</th>
                        <th>text</th>
                    </tr>""" 
    for lang, audio, text in zip(langs,audios,texts):
        html+= f"""<tr>
                    <td>{lang}</td>
                    <td><audio controls><source src="{audio}" type="audio/wav"> </audio></td>
                    <td>{text}</td>
                </tr>"""      
    html+="</table></div>"
    return html
 
NUMBER_DIR = './number' 
number_files = [f.name for f in os.scandir(NUMBER_DIR)]

NUMBERS = [{'image':os.path.join(NUMBER_DIR,f),'number':int(f.split('.')[0])} for f in number_files] 



markdown = """<div style="text-align: center"><p style="font-size: 40px"> Africa Crowdsource Speech </p> <br>
This is a platform to contribute to your African language by recording your voice </div>"""


# Interface design begins
block = gr.Blocks()
with block:
    gr.Markdown(markdown)
    with gr.Tabs():
        
        with gr.TabItem('Record'):
            with gr.Row():
                language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(DEFAULT_LANGS.keys())]),label="Choose language",default="Choose language")
                age = gr.inputs.Textbox(placeholder='e.g. 21',label="Your age (optional)",default='')  
                gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")  
                accent = gr.inputs.Textbox(label="Accent (optional)",default='')  

            number = gr.Image('number/0.jpg',image_mode="L") 
            #number = gr.inputs.Radio(choices=NUMBERS, type="value", default=None, label="Choose your number")
            text = gr.inputs.Textbox(placeholder='e.g. `one` is `otu` in Igbo or `ọkan` in Yoruba',label="Number in your language")
            record = gr.inputs.Audio(source="microphone",label='Record your voice')
            
            output_result = gr.outputs.HTML()
            state = gr.inputs.State()
            
            save = gr.Button("Submit")
                
            
            save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state],outputs=[output_result,number,state])

        with gr.TabItem('Listen') as listen_tab:
            gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
            listen = gr.Button("Listen")
            listen.click(display_records,inputs=[],outputs=gr.outputs.HTML() )
            

block.launch()