EspacoTeste / app.py
renatotn7's picture
Update app.py
dcd9326
raw
history blame
1.75 kB
import streamlit as st
import whisper
from transformers import pipeline
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
SAMPLE_RATE = 16000
def load_audio(file: str, sr: int = SAMPLE_RATE):
try:
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
if 'processor' not in locals():
processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-tiny")
pipe = pipeline('sentiment-analysis')
#pipe2 = pipeline(task="image-bestify", model="beihai/GFPGAN-V1.3-whole-image")
text = st.text_area('entre com algum texto')
st.title("Hot Dog? Or Not? ")
file_name = st.file_uploader("Upload a hot dog candidate image")
print(file_name)
if file_name:
audio = load_audio(file_name )
print('2')
input_features = processor(audio , return_tensors="pt").input_features
forced_decoder_ids = processor.get_decoder_prompt_ids(language = "pt", task = "translate")
predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
st.title(transcription )
print('3')
if text:
out=pipe(text)
st.json(out)