Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,22 @@ import streamlit as st
|
|
2 |
import whisper
|
3 |
from transformers import pipeline
|
4 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
if 'processor' not in locals():
|
6 |
processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
|
7 |
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
|
@@ -16,7 +32,7 @@ st.title("Hot Dog? Or Not? ")
|
|
16 |
file_name = st.file_uploader("Upload a hot dog candidate image")
|
17 |
print(file_name)
|
18 |
if file_name:
|
19 |
-
audio =
|
20 |
print('2')
|
21 |
input_features = processor(audio , return_tensors="pt").input_features
|
22 |
forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")
|
|
|
2 |
import whisper
|
3 |
from transformers import pipeline
|
4 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
5 |
+
SAMPLE_RATE = 16000
|
6 |
+
|
7 |
+
def load_audio(file: str, sr: int = SAMPLE_RATE):
|
8 |
+
|
9 |
+
try:
|
10 |
+
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
11 |
+
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
12 |
+
out, _ = (
|
13 |
+
ffmpeg.input(file, threads=0)
|
14 |
+
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
|
15 |
+
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
16 |
+
)
|
17 |
+
except ffmpeg.Error as e:
|
18 |
+
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
19 |
+
|
20 |
+
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
21 |
if 'processor' not in locals():
|
22 |
processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
|
23 |
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
|
|
|
32 |
file_name = st.file_uploader("Upload a hot dog candidate image")
|
33 |
print(file_name)
|
34 |
if file_name:
|
35 |
+
audio = load_audio(file_name )
|
36 |
print('2')
|
37 |
input_features = processor(audio , return_tensors="pt").input_features
|
38 |
forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")
|