Spaces:

cotxetj
/

swedish-to-speech-or-text

Runtime error

cotxetj commited on Dec 1, 2023

Commit

37b0e3a

•

1 Parent(s): a426fc1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,9 +2,25 @@ import torch
 from transformers import pipeline, VitsModel, VitsTokenizer
 import numpy as np
 import gradio as gr
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # Load Whisper-small
 pipe = pipeline("automatic-speech-recognition",
                 model="openai/whisper-small",
@@ -20,6 +36,7 @@ tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-fra")
 # Define a function to translate an audio, in english here
 def translate(audio):
     outputs = pipe(audio, max_new_tokens=256,
                    generate_kwargs={"task": "transcribe", "language": "english"})
     return outputs["text"]

 from transformers import pipeline, VitsModel, VitsTokenizer
 import numpy as np
 import gradio as gr
+import whisper
+model = whisper.load_model("small")
+def inference(audio):
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+    _, probs = model.detect_language(mel)
+    options = whisper.DecodingOptions(fp16 = False)
+    result = whisper.decode(model, mel, options)
+    print(result.text)
+    return result.text
 # Load Whisper-small
 pipe = pipeline("automatic-speech-recognition",
                 model="openai/whisper-small",
 # Define a function to translate an audio, in english here
 def translate(audio):
+    return inference(audio)
     outputs = pipe(audio, max_new_tokens=256,
                    generate_kwargs={"task": "transcribe", "language": "english"})
     return outputs["text"]