Spaces:

clr
/

w2v2asr

Sleeping

App Files Files Community

clr commited on Dec 9, 2022

Commit

5adac55

•

1 Parent(s): d0b4e13

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -11

app.py CHANGED Viewed

@@ -15,22 +15,34 @@ processor_is = Wav2Vec2Processor.from_pretrained(MODEL_IS)
 model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
 processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
-def recc(audio_file,model,processor):
-    wav, sr = sf.read(audio_file, dtype=np.float32)
     if len(wav.shape) == 2:
         wav = wav.mean(1)
     if sr != 16000:
         wlen = int(wav.shape[0] / sr * 16000)
         wav = signal.resample(wav, wlen)
     with torch.inference_mode():
-        input_values = processor(wav,sampling_rate=16000).input_values[0]
         input_values = torch.tensor(input_values, device=device).unsqueeze(0)
-        logits = model(input_values).logits
         pred_ids = torch.argmax(logits, dim=-1)
-        xcp = processor.batch_decode(pred_ids)
         return xcp
 bl = gr.Blocks()
 with bl:
@@ -50,16 +62,16 @@ with bl:
     with gr.Tabs():
         with gr.TabItem("Icelandic"):
             with gr.Row():
-                audio_filei = gr.Audio(type="filepath")
-                text_outputi = gr.Textbox()
-            text_buttoni = gr.Button("Recognise")
-            text_buttoni.click(recc, inputs=[audio_filei,model_is,processor_is], outputs=text_outputi)
         with gr.TabItem("Faroese"):
             with gr.Row():
                 audio_file = gr.Audio(type="filepath")
                 text_output = gr.Textbox()
             text_button = gr.Button("Recognise")
-            text_button.click(recc, inputs=[audio_file,model_fo,processor_fo], outputs=text_output)
 bl.launch()

 model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
 processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
+def readwav(a_f):
+    wav, sr = sf.read(a_f, dtype=np.float32)
     if len(wav.shape) == 2:
         wav = wav.mean(1)
     if sr != 16000:
         wlen = int(wav.shape[0] / sr * 16000)
         wav = signal.resample(wav, wlen)
+    return wav
+def recis(audio_file):
+    wav = readwav(audio_file)
     with torch.inference_mode():
+        input_values = processor_is(wav,sampling_rate=16000).input_values[0]
         input_values = torch.tensor(input_values, device=device).unsqueeze(0)
+        logits = model_is(input_values).logits
         pred_ids = torch.argmax(logits, dim=-1)
+        xcp = processor_is.batch_decode(pred_ids)
         return xcp
+def recfo(audio_file):
+    wav = readwav(audio_file)
+    with torch.inference_mode():
+        input_values = processor_fo(wav,sampling_rate=16000).input_values[0]
+        input_values = torch.tensor(input_values, device=device).unsqueeze(0)
+        logits = model_fo(input_values).logits
+        pred_ids = torch.argmax(logits, dim=-1)
+        xcp = processor_fo.batch_decode(pred_ids)
+        return xcp
 bl = gr.Blocks()
 with bl:
     with gr.Tabs():
         with gr.TabItem("Icelandic"):
             with gr.Row():
+                audio_file = gr.Audio(type="filepath")
+                text_output = gr.Textbox()
+            text_button = gr.Button("Recognise")
+            text_button.click(recis, inputs=audio_file, outputs=text_output)
         with gr.TabItem("Faroese"):
             with gr.Row():
                 audio_file = gr.Audio(type="filepath")
                 text_output = gr.Textbox()
             text_button = gr.Button("Recognise")
+            text_button.click(recfo, inputs=audio_file, outputs=text_output)
 bl.launch()