Spaces:
Runtime error
Runtime error
import streamlit as st | |
import os | |
os.system("pip install git+https://github.com/openai/whisper.git") | |
import whisper | |
from whisper import utils | |
import ffmpeg | |
import os | |
from transformers import pipeline | |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq | |
import numpy as np | |
SAMPLE_RATE = 16000 | |
def load_audio(file: str, sr: int = SAMPLE_RATE): | |
# This launches a subprocess to decode audio while down-mixing and resampling as necessary. | |
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. | |
out, _ = ( | |
ffmpeg.input(file, threads=0) | |
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr) | |
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) | |
) | |
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 | |
if 'processor' not in locals(): | |
with st.spinner('Wait for it...'): | |
# processor = AutoProcessor.from_pretrained("openai/whisper-tiny") | |
model=whisper.load_model("tiny") | |
#pipe = pipeline('sentiment-analysis') | |
#pipe2 = pipeline(task="image-bestify", model="beihai/GFPGAN-V1.3-whole-image") | |
#text = st.text_area('entre com algum texto') | |
#st.title("Wav a ser transcrito ") | |
wav_up = st.file_uploader("Upload",type=['wav','ogg','mp3']) | |
if wav_up is not None: | |
file_details = {"FileName":wav_up.name,"FileType":wav_up.type} | |
st.write(file_details) | |
with open(wav_up.name,"wb") as f: | |
f.write(wav_up.getbuffer()) | |
st.success("Saved File") | |
audio = whisper.load_audio(wav_up.name) | |
audio = whisper.pad_or_trim(audio) | |
st.audio(wav_up.name, format="audio/wav", start_time=0) | |
if st.button('Processa'): | |
if wav_up is not None: | |
with st.spinner('Wait for it...'): | |
# print('2') | |
# transcription=model.transcribe( | |
# audio, | |
# language = None | |
# ) | |
#processado=np.frombuffer(wav_up.getbuffer(), np.int16).flatten().astype(np.float32) / 32768.0 | |
# input_features = processor(audio , return_tensors="pt").input_features | |
# forced_decoder_ids = processor.get_decoder_prompt_ids(language = None, task = "transcribe") | |
transcription=model.transcribe( | |
audio, | |
language = 'pt' | |
) | |
# predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids) | |
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True) | |
string1='' | |
for i, segment in enumerate(transcription, start=1): | |
# write srt lines | |
string1=string1+"\n" + f"{i}\n"+ f"{utils.format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> " + f"{utils.format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n"+ f"{segment['text'].strip().replace('-->', '->')}\n"; | |
st.write(string1 ) | |
st.success("Texto Gerado") | |
# print('3') | |