Spaces:

cotxetj
/

swedish-to-speech-or-text

Runtime error

App Files Files Community

cotxetj commited on Dec 2, 2023

Commit

1567e3c

•

1 Parent(s): 9a63ffb

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -21

app.py CHANGED Viewed

@@ -30,21 +30,6 @@ def parse_codeblock(text):
                 lines[i] = "<br/>" + line.replace("<", "&lt;").replace(">", "&gt;")
     return "".join(lines)
-def inference(audio):
-    audio = whisper.load_audio(audio)
-    print("loading finished")
-    audio = whisper.pad_or_trim(audio)
-    print("audio trimed")
-    mel = whisper.log_mel_spectrogram(audio).to(model.device)
-    print("spectro finished")
-    _, probs = model.detect_language(mel)
-    print("lang detected")
-    options = whisper.DecodingOptions(fp16 = False)
-    result = whisper.decode(model, mel, options)
-    print(result.text)
-    return result.text
 #Load Whisper-small
@@ -60,7 +45,7 @@ tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
 # Define a function to translate an audio, in english here
 def translate(audio):
-    # return inference(audio)
     outputs = pipe(audio, max_new_tokens=256,
                    generate_kwargs={"task": "translate"})
     return outputs["text"]
@@ -174,13 +159,12 @@ def predict(transType, language, audio, audio_mic = None):
         print("debug1:", audio,"debug2", audio_mic)
         if not audio and audio_mic:
             audio = audio_mic
-        st = gr.State([])
-        return "Tell me about the swedish king in 1995!?", gpt_predict("Tell me about the swedish king in 1995!?",st), None
         if transType == "Text":
-            return translate(audio), None
         if transType == "GPT answer":
             req = translate(audio)
-            return gpt_predict(req)
         if transType == "Audio":
             return speech_to_speech_translation(audio)
@@ -209,7 +193,7 @@ demo = gr.Interface(
     ],
     outputs=[
-        gr.Text(label="Text translation"),gr.Text(label="gpt answer"),gr.Audio(label="Audio translation",type = "numpy")
     ],
     title=title,
     description=description,

                 lines[i] = "<br/>" + line.replace("<", "&lt;").replace(">", "&gt;")
     return "".join(lines)
 #Load Whisper-small
 # Define a function to translate an audio, in english here
 def translate(audio):
     outputs = pipe(audio, max_new_tokens=256,
                    generate_kwargs={"task": "translate"})
     return outputs["text"]
         print("debug1:", audio,"debug2", audio_mic)
         if not audio and audio_mic:
             audio = audio_mic
         if transType == "Text":
+            return translate(audio), None, None
         if transType == "GPT answer":
             req = translate(audio)
+            st = gr.State([])
+            return req, gpt_predict(req,st), None
         if transType == "Audio":
             return speech_to_speech_translation(audio)
     ],
     outputs=[
+        gr.Text(label="Text translation"),gr.Text(label="GPT answer"),gr.Audio(label="Audio translation",type = "numpy")
     ],
     title=title,
     description=description,