EspacoTeste / app.py
renatotn7's picture
Update app.py
cfcc701
import streamlit as st
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
from whisper import utils
import ffmpeg
import os
from transformers import pipeline
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import numpy as np
SAMPLE_RATE = 16000
def load_audio(file: str, sr: int = SAMPLE_RATE):
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
if 'processor' not in locals():
with st.spinner('Wait for it...'):
# processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
model=whisper.load_model("tiny")
#pipe = pipeline('sentiment-analysis')
#pipe2 = pipeline(task="image-bestify", model="beihai/GFPGAN-V1.3-whole-image")
#text = st.text_area('entre com algum texto')
#st.title("Wav a ser transcrito ")
wav_up = st.file_uploader("Upload",type=['wav','ogg','mp3'])
if wav_up is not None:
file_details = {"FileName":wav_up.name,"FileType":wav_up.type}
st.write(file_details)
with open(wav_up.name,"wb") as f:
f.write(wav_up.getbuffer())
st.success("Saved File")
audio = whisper.load_audio(wav_up.name)
audio = whisper.pad_or_trim(audio)
st.audio(wav_up.name, format="audio/wav", start_time=0)
if st.button('Processa'):
if wav_up is not None:
with st.spinner('Wait for it...'):
# print('2')
# transcription=model.transcribe(
# audio,
# language = None
# )
#processado=np.frombuffer(wav_up.getbuffer(), np.int16).flatten().astype(np.float32) / 32768.0
# input_features = processor(audio , return_tensors="pt").input_features
# forced_decoder_ids = processor.get_decoder_prompt_ids(language = None, task = "transcribe")
transcription=model.transcribe(
audio,
language = 'pt'
)
# predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
string1=''
for i, segment in enumerate(transcription, start=1):
# write srt lines
string1=string1+"\n" + f"{str(i)}\n"+ f"{utils.format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> " + f"{utils.format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n"+ f"{segment['text'].strip().replace('-->', '->')}\n";
st.write(string1 )
st.success("Texto Gerado")
# print('3')