renatotn7 commited on
Commit
dcd9326
1 Parent(s): 158c775

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -2,6 +2,22 @@ import streamlit as st
2
  import whisper
3
  from transformers import pipeline
4
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  if 'processor' not in locals():
6
  processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
7
  model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
@@ -16,7 +32,7 @@ st.title("Hot Dog? Or Not? ")
16
  file_name = st.file_uploader("Upload a hot dog candidate image")
17
  print(file_name)
18
  if file_name:
19
- audio = whisper.load_audio(file_name )
20
  print('2')
21
  input_features = processor(audio , return_tensors="pt").input_features
22
  forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")
 
2
  import whisper
3
  from transformers import pipeline
4
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
5
+ SAMPLE_RATE = 16000
6
+
7
+ def load_audio(file: str, sr: int = SAMPLE_RATE):
8
+
9
+ try:
10
+ # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
11
+ # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
12
+ out, _ = (
13
+ ffmpeg.input(file, threads=0)
14
+ .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
15
+ .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
16
+ )
17
+ except ffmpeg.Error as e:
18
+ raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
19
+
20
+ return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
21
  if 'processor' not in locals():
22
  processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
23
  model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
 
32
  file_name = st.file_uploader("Upload a hot dog candidate image")
33
  print(file_name)
34
  if file_name:
35
+ audio = load_audio(file_name )
36
  print('2')
37
  input_features = processor(audio , return_tensors="pt").input_features
38
  forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")