|
import gradio as gr |
|
import subprocess,os |
|
from datasets import load_dataset, Audio |
|
import datas,ctcalign,graph |
|
from numpy import random |
|
|
|
|
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def setup(): |
|
r0 = subprocess.run(["pwd"], capture_output=True, text=True) |
|
print('PWD::', r0.stdout) |
|
r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) |
|
print(r1.stdout) |
|
subprocess.run(["unzip", "./master.zip"]) |
|
subprocess.run(["mv", "REAPER-master", "REAPER"]) |
|
subprocess.run(["rm", "./master.zip"]) |
|
os.chdir('./REAPER') |
|
subprocess.run(["mkdir", "build"]) |
|
os.chdir('./build') |
|
r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) |
|
print(r2.stdout) |
|
r3 = subprocess.run(["make"], capture_output=True, text=True) |
|
print(r3.stdout) |
|
|
|
os.chdir('../..') |
|
r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) |
|
print('LS::', r9.stdout) |
|
|
|
|
|
|
|
setup() |
|
|
|
def load_lang(langname): |
|
if langname=="Icelandic": |
|
df = datas.ds_i |
|
lang_aligner = datas.a_i |
|
elif langname =="Faroese": |
|
df = datas.ds_f |
|
lang_aligner = datas.a_f |
|
|
|
df = df.data.to_pandas() |
|
df = df.drop(columns=['audio', 'speaker_id','duration']) |
|
return (df[:15], lang_aligner) |
|
|
|
|
|
def f1(langname,lang_aligner): |
|
if langname=="Icelandic": |
|
ds = datas.ds_i |
|
elif langname =="Faroese": |
|
ds = datas.ds_f |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
maxdat=len(ds) |
|
|
|
ds = ds.select([random.randint(maxdat-1)]) |
|
|
|
sound_path = ds['audio'][0]['path'] |
|
transcript = ds['normalized_text'][0] |
|
|
|
return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path) |
|
|
|
|
|
bl = gr.Blocks() |
|
|
|
with bl: |
|
|
|
lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language") |
|
|
|
align_func = gr.State() |
|
|
|
with gr.Row(): |
|
|
|
databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate') |
|
|
|
|
|
with gr.Row(): |
|
btn1 = gr.Button(value="The random prosody button") |
|
btn1.style(full_width=False, size="sm") |
|
audio1 = gr.Audio(interactive=False) |
|
|
|
pl1 = gr.Plot() |
|
|
|
btn1.click(f1, [lloadr,align_func], [pl1,audio1]) |
|
|
|
|
|
|
|
|
|
lloadr.change(load_lang,lloadr,[databrowser,align_func]) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
# ABOUT |
|
This is a work-in-progress demo. |
|
|
|
Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr). |
|
|
|
After you select a language, a few example sentences from the corpus are displayed. |
|
|
|
Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above. |
|
|
|
[ABOUT REAPER PITCH TRACKING - TODO] |
|
|
|
[ABOUT RMSE INTENSITY - TODO] |
|
|
|
[ABOUT CTC ALIGNMENT - TODO] |
|
|
|
caitlinr@ru.is / https://github.com/catiR/ |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
bl.launch() |