Spaces:

meyabase
/

oshiwambo-speech-greetings

Runtime error

App Files Files Community

jpandeinge commited on Apr 16, 2023

Commit

2598cef

1 Parent(s): 2f4793a

initial commit

Browse files

Files changed (12) hide show

app.css +38 -0
app.py +248 -0
greetings/.DS_Store +0 -0
greetings/best.gif +0 -0
greetings/oshikwanyama/0.png +0 -0
greetings/oshikwanyama/1.png +0 -0
greetings/oshikwanyama/2.png +0 -0
greetings/oshindonga/0.png +0 -0
greetings/oshindonga/1.png +0 -0
greetings/oshindonga/2.png +0 -0
requirements.txt +11 -0
utils.py +43 -0

app.css ADDED Viewed

	@@ -0,0 +1,38 @@

+.infoPoint h1 {
+    font-size: 30px;
+    text-decoration: bold;
+    }
+a {
+    text-decoration: underline;
+    color: #1f3b54 ;
+}
+.finished {
+    color:rgb(9, 102, 169);
+     font-size:13px
+}
+table {
+    margin: 25px 0;
+    font-size: 0.9em;
+    font-family: sans-serif;
+    min-width: 400px;
+    max-width: 400px;
+    box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
+}
+table th,
+table td {
+    padding: 12px 15px;
+}
+tr {
+text-align: left;
+}
+thead tr {
+text-align: left;
+}

app.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import os
+import csv
+import random
+import pandas as pd
+import numpy as np
+import gradio as gr
+from collections import Counter
+from utils import *
+import matplotlib.pyplot as plt
+import scipy.io.wavfile as wavf
+from huggingface_hub import Repository, upload_file
+HF_TOKEN = os.environ.get("HF_TOKEN")
+GREETINGS_DIR = './greetings'
+greeting_files = [f.name for f in os.scandir(GREETINGS_DIR)]
+DATASET_REPO_URL = "https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings"
+REPOSITORY_DIR = "data"
+LOCAL_DIR = 'data_local'
+os.makedirs(LOCAL_DIR,exist_ok=True)
+GENDER = ['Choose Gender','Male','Female','Other','Prefer not to say']
+#------------------Work on Languages--------------------
+languages = ["oshindonga", "oshikwanyama"]
+language_id = ["ng","kj"]
+#------------------Work on Languages--------------------
+repo = Repository(
+    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
+)
+repo.git_pull()
+with open('app.css','r') as f:
+    BLOCK_CSS = f.read()
+def save_record(language,record,greeting,gender,accent,greeting_history,current_greeting,done_recording):
+    # set default
+    greeting_history = greeting_history if greeting_history is not None else [0]
+    current_greeting = current_greeting if current_greeting is not None else 0 # 0 is the default greeting
+    done_recording = done_recording if done_recording is not None else False
+    #----
+    # Save text and its corresponding record to flag
+    speaker_metadata={}
+    speaker_metadata['gender'] = gender if gender!=GENDER[0] else ''
+    speaker_metadata['accent'] = accent if accent!='' else ''
+    default_record = None
+    if not done_recording:
+        if language!=None and language!='Choose language' and record is not None and greeting is not None:  #
+            language = language.lower()
+            lang_id = language_id[languages.index(language)]
+            # Write audio to file
+            audio_name = get_unique_name()
+            SAVE_FILE_DIR = os.path.join(LOCAL_DIR,audio_name)
+            os.makedirs(SAVE_FILE_DIR,exist_ok=True)
+            audio_output_filename = os.path.join(SAVE_FILE_DIR,'audio.wav')
+            wavf.write(audio_output_filename,record[0],record[1])
+            # Write metadata.json to file
+            json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
+            metadata= {
+                        'id':audio_name,
+                        'file_name':'audio.wav',
+                        'language_name':language,
+                        'language_id':lang_id,
+                        'greeting':current_greeting,
+                        'frequency':record[0],
+                        'gender': speaker_metadata['gender'],
+                        'accent': speaker_metadata['accent'],
+                    }
+            dump_json(metadata,json_file_path)
+            # Upload the audio
+            repo_audio_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'audio.wav'))
+            _ = upload_file(path_or_fileobj = audio_output_filename,
+                        path_in_repo =repo_audio_path,
+                        repo_id='meyabase/crowd-oshiwambo-speech-greetings',
+                        repo_type='dataset',
+                        token=HF_TOKEN
+                    )
+            # Upload the metadata
+            repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join(audio_name,'metadata.jsonl'))
+            _ = upload_file(path_or_fileobj = json_file_path,
+                        path_in_repo =repo_json_path,
+                        repo_id='meyabase/crowd-oshiwambo-speech-greetings',
+                        repo_type='dataset',
+                        token=HF_TOKEN
+                    )
+            output = f'Recording successfully saved! On to the next one...'
+            # Choose the next greeting
+            greeting_history.append(current_greeting)
+            # check the language selected and choose the next greeting based on the images available
+            if language=='oshindonga':
+                greeting_choices = [greet for greet in [i for i in range(3)] if greet not in greeting_history]
+                if greeting_choices!=[]:
+                    next_greeting = random.choice(greeting_choices)
+                    next_greeting_image = f'greetings/{language}/{next_greeting}.png'
+                else:
+                    done_recording=True
+                    next_greeting = 0
+                    next_greeting_image = 'greetings/best.gif'
+                    output = "You have finished all recording! You can reload to start again."
+            elif language=='oshikwanyama':
+                greeting_choices = [greet for greet in [i for i in range(3)] if greet not in greeting_history]
+                if greeting_choices!=[]:
+                    next_greeting = random.choice(greeting_choices)
+                    next_greeting_image = f'greetings/{language}/{next_greeting}.png'
+                else:
+                    done_recording=True
+                    next_greeting = 0
+                    next_greeting_image = 'greetings/best.gif'
+                    output = "You have finished all recording! You can reload to start again."
+            output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
+            return output_string,next_greeting_image,greeting_history,next_greeting,done_recording,default_record
+        if greeting is None:
+            output = "greeting must be specified!"
+        if record is None:
+            output="No recording found!"
+        if language is None or language=='Choose language':
+            output = 'Language must be specified!'
+        output_string = "<html> <body> <div class='output' style='color:green; font-size:13px'>"+output+"</div> </body> </html>"
+        # return output_string, previous image and state
+        return output_string, greeting,greeting_history,current_greeting,done_recording,default_record
+    else:
+        # Stop submitting recording (best.gif is displaying)
+        output = '🙌 You have finished all recording! Thank You. You can reload to start again.'
+        output_string = "<div class='finished'>"+output+"</div>"
+        next_greeting = 0 # the default greeting
+        next_greeting_image = 'greetings/best.gif'
+        return output_string,next_greeting_image,greeting_history,next_greeting,done_recording,default_record
+def get_metadata_json(path):
+    try:
+        return read_json_lines(path)[0]
+    except Exception:
+        return []
+def get_metadata_of_dataset():
+    repo.git_pull()
+    REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
+    repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []
+    audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
+    audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings/resolve/main/data/') for a in audio_repo]
+    metadata_all = [get_metadata_json(os.path.join(f,'metadata.jsonl')) for f in repo_recordings]
+    metadata_all = [m for m in metadata_all if m!=[]]
+    return metadata_all
+def display_records():
+    repo.git_pull()
+    REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
+    repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []
+    audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
+    audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/meyabase/crowd-oshiwambo-speech-greetings/resolve/main/data/') for a in audio_repo]
+    metadata_repo = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings]
+    audios_all =  audio_repo
+    metadata_all = metadata_repo
+    langs=[m['language_name'] for m in metadata_all]
+    audios = [a for a in audios_all]
+    texts = [m['text'] for m in metadata_all]
+    greetings = [m['greeting'] for m in metadata_all]
+    html = f"""<div class="infoPoint">
+                <h1> Hooray! We have collected {len(metadata_all)} samples!</h1>
+                <table style="width:100%; text-align:center">
+                    <tr>
+                        <th>language</th>
+                        <th>audio</th>
+                        <th>greeting</th>
+                        <th>text</th>
+                    </tr>"""
+    for lang, audio, text,greet_ in zip(langs,audios,texts,greetings):
+        html+= f"""<tr>
+                    <td>{lang}</td>
+                    <td><audio controls><source src="{audio}" type="audio/wav"> </audio></td>
+                    <td>{greet_}</td>
+                    <td>{text}</td>
+                </tr>"""
+    html+="</table></div>"
+    return html
+markdown = """<div style="text-align: center"><p style="font-size: 40px"> 🔊 <b>Oshiwambo Speech Greetings</b></p> <br>
+This is a platform to contribute to your Oshiwambo greeting for the speech recognition task. <br> </div>"""
+record_markdown = """
+<br> Record greetings in your language and help us build a dataset for speech recognition in Oshiwambo. <br>
+"""
+# # Interface design begins
+block = gr.Blocks(css=BLOCK_CSS)
+with block:
+    gr.Markdown(markdown)
+    with gr.Tabs():
+        with gr.TabItem('Record'):
+            gr.Markdown(record_markdown)
+            with gr.Row():
+                language = gr.inputs.Dropdown(choices = sorted([lang_.title() for lang_ in list(languages)]), label="Choose language", default=languages[0].title())
+                gender = gr.inputs.Dropdown(choices=GENDER, type="value", default=None, label="Gender (optional)")
+                accent = gr.inputs.Textbox(label="Accent (optional)", default='', placeholder="e.g. oshikwanyama, oshindonga, oshimbadja, oshingadjera, etc.")
+            # define a default greeting first for each language
+            greeting = gr.Image(f'greetings/{languages[0].lower()}/0.png', image_mode="L")
+            greeting_history = gr.Variable() # stores the history of greetings
+            record = gr.Audio(source="microphone", label='Record your voice')
+            output_result = gr.outputs.HTML()
+            state = gr.Variable()
+            current_greeting = gr.Variable()
+            done_recording = gr.Variable() # Signifies when to stop submitting records even if `submit`` is clicked
+            save = gr.Button("Submit")
+            save.click(save_record, inputs=[language,record,greeting,gender,accent,state,current_greeting,done_recording],outputs=[output_result,greeting,state,current_greeting,done_recording,record])
+block.launch()

greetings/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

greetings/best.gif ADDED Viewed

greetings/oshikwanyama/0.png ADDED Viewed

greetings/oshikwanyama/1.png ADDED Viewed

greetings/oshikwanyama/2.png ADDED Viewed

greetings/oshindonga/0.png ADDED Viewed

greetings/oshindonga/1.png ADDED Viewed

greetings/oshindonga/2.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pandas
+scipy
+pycountry
+numpy
+matplotlib
+datasets
+transformers
+librosa
+torch
+huggingface-hub>=0.9.1
+torchaudio

utils.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import json
+import hashlib
+import random
+import string
+def get_unique_name():
+    return ''.join([random.choice(string.ascii_letters
+            + string.digits) for n in range(32)])
+def read_json_lines(file):
+    with open(file,'r',encoding="utf8") as f:
+        lines = f.readlines()
+        data=[]
+        for l in lines:
+            data.append(json.loads(l))
+        return data
+def json_dump(thing):
+    return json.dumps(thing,
+                        ensure_ascii=False,
+                        sort_keys=True,
+                        indent=None,
+                        separators=(',', ':'))
+def get_hash(thing): # stable-hashing
+    return str(hashlib.md5(json_dump(thing).encode('utf-8')).hexdigest())
+def dump_json(thing,file):
+    with open(file,'w+',encoding="utf8") as f:
+        json.dump(thing,f)
+def read_json_lines(file):
+    with open(file,'r',encoding="utf8") as f:
+        lines = f.readlines()
+        data=[]
+        for l in lines:
+            data.append(json.loads(l))
+        return data