Spaces:

gdnartea
/

Chatty_Ashe

Runtime error

gdnartea commited on May 1, 2024

Commit

9bebeaf

verified ·

1 Parent(s): 13c0360

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,31 +1,16 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
-from nemo.collections.asr.models import EncDecMultiTaskModel
-# load speech to text model
-canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
-canary_model.eval()
-canary_model.to('cpu')
-# update decode params
-canary_model.change_decoding_strategy(None)
-decode_cfg = canary_model.cfg.decoding
-decode_cfg.beam.beam_size = 1
-canary_model.change_decoding_strategy(decode_cfg)
-def convert_speech(speech):
-    # Convert the speech to text
-    transcription = canary_model.transcribe(
-        speech,
-        logprobs=False,
-    )
-    return transcription
-iface = gr.Interface(fn=convert_speech, inputs=gr.Audio(source="microphone"), outputs="textbox")
 iface.launch()

 import gradio as gr
 import torch
+from transformers import pipeline
+canary_pipe = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
+def convert_speech(audio):
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+iface = gr.Interface(fn=convert_speech, inputs=gr.Audio(sources="microphone"), outputs="textbox")
 iface.launch()