import gradio as gr import subprocess,os from datasets import load_dataset, Audio import datas,ctcalign,graph from numpy import random import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt def setup(): r0 = subprocess.run(["pwd"], capture_output=True, text=True) print('PWD::', r0.stdout) r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) print(r1.stdout) subprocess.run(["unzip", "./master.zip"]) subprocess.run(["mv", "REAPER-master", "REAPER"]) subprocess.run(["rm", "./master.zip"]) os.chdir('./REAPER') subprocess.run(["mkdir", "build"]) os.chdir('./build') r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) print(r2.stdout) r3 = subprocess.run(["make"], capture_output=True, text=True) print(r3.stdout) os.chdir('../..') r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) print('LS::', r9.stdout) #print('about to setup') setup() def load_lang(langname): if langname=="Icelandic": df = datas.ds_i lang_aligner = datas.a_i elif langname =="Faroese": df = datas.ds_f lang_aligner = datas.a_f df = df.data.to_pandas() df = df.drop(columns=['audio', 'speaker_id','duration']) return (df[:15], lang_aligner) #(df, df[:50]) def f1(langname,lang_aligner): if langname=="Icelandic": ds = datas.ds_i elif langname =="Faroese": ds = datas.ds_f #fig = plt.figure(figsize=(10,4)) #plt.axline((0,0),slope=1,color="darkgray") #plt.xlabel("Vowel length (ms)") #plt.ylabel("Consonant length (ms)") maxdat=len(ds) ds = ds.select([random.randint(maxdat-1)]) #print([th for th in ds.sample()]) sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself transcript = ds['normalized_text'][0] #print('PLACE A:',lang_aligner) return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path) bl = gr.Blocks() with bl: lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language")#, info="Loading the dataset takes some time") align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]')) with gr.Row(): #invisidata = gr.DataFrame(interactive=False, visible=False) databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate') with gr.Row(): btn1 = gr.Button(value="The random prosody button") btn1.style(full_width=False, size="sm") audio1 = gr.Audio(interactive=False) pl1 = gr.Plot() btn1.click(f1, [lloadr,align_func], [pl1,audio1]) lloadr.change(load_lang,lloadr,[databrowser,align_func]) gr.Markdown( """ # ABOUT This is a work-in-progress demo. Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr). After you select a language, a few example sentences from the corpus are displayed. Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above. [ABOUT REAPER PITCH TRACKING - TODO] [ABOUT RMSE INTENSITY - TODO] [ABOUT CTC ALIGNMENT - TODO] caitlinr@ru.is / https://github.com/catiR/ """ ) if __name__ == "__main__": bl.launch()