Spaces:

renatotn7
/

EspacoTeste

Runtime error

renatotn7 commited on Nov 9, 2022

Commit

dcd9326

•

1 Parent(s): 158c775

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,22 @@ import streamlit as st
 import whisper
 from transformers import pipeline
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 if 'processor' not in locals():
  processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
  model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
@@ -16,7 +32,7 @@ st.title("Hot Dog? Or Not? ")
 file_name = st.file_uploader("Upload a hot dog candidate image")
 print(file_name)
 if file_name:
- audio = whisper.load_audio(file_name )
  print('2')
  input_features = processor(audio , return_tensors="pt").input_features
  forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")

 import whisper
 from transformers import pipeline
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
+SAMPLE_RATE = 16000
+def load_audio(file: str, sr: int = SAMPLE_RATE):
+    try:
+        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
+        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
+        out, _ = (
+            ffmpeg.input(file, threads=0)
+            .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
+            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+        )
+    except ffmpeg.Error as e:
+        raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
 if 'processor' not in locals():
  processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
  model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
 file_name = st.file_uploader("Upload a hot dog candidate image")
 print(file_name)
 if file_name:
+ audio = load_audio(file_name )
  print('2')
  input_features = processor(audio , return_tensors="pt").input_features
  forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")