Spaces:
Runtime error
Runtime error
File size: 3,002 Bytes
ff2e3f8 f70dae2 6b0e01c 2096575 a94cdbb 488bdb6 ff2e3f8 afd9eb4 420dbaa dcd9326 48f4751 dcd9326 5132feb dcd9326 5132feb dcd9326 5132feb dcd9326 1dccde5 1647498 48f4751 760892a 48f4751 afd9eb4 0701407 71d01f7 0701407 928ff71 ca52925 75acbad 0701407 48f4751 0cc7e88 75acbad 9ed6c43 0701407 75acbad 0701407 fedbdeb 8ce97ae 48f4751 760892a 0701407 48f4751 64cb4e5 3aecd1a 2cd93d3 cfcc701 64cb4e5 3aecd1a 0701407 afd9eb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import streamlit as st
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
from whisper import utils
import ffmpeg
import os
from transformers import pipeline
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import numpy as np
SAMPLE_RATE = 16000
def load_audio(file: str, sr: int = SAMPLE_RATE):
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
if 'processor' not in locals():
with st.spinner('Wait for it...'):
# processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
model=whisper.load_model("tiny")
#pipe = pipeline('sentiment-analysis')
#pipe2 = pipeline(task="image-bestify", model="beihai/GFPGAN-V1.3-whole-image")
#text = st.text_area('entre com algum texto')
#st.title("Wav a ser transcrito ")
wav_up = st.file_uploader("Upload",type=['wav','ogg','mp3'])
if wav_up is not None:
file_details = {"FileName":wav_up.name,"FileType":wav_up.type}
st.write(file_details)
with open(wav_up.name,"wb") as f:
f.write(wav_up.getbuffer())
st.success("Saved File")
audio = whisper.load_audio(wav_up.name)
audio = whisper.pad_or_trim(audio)
st.audio(wav_up.name, format="audio/wav", start_time=0)
if st.button('Processa'):
if wav_up is not None:
with st.spinner('Wait for it...'):
# print('2')
# transcription=model.transcribe(
# audio,
# language = None
# )
#processado=np.frombuffer(wav_up.getbuffer(), np.int16).flatten().astype(np.float32) / 32768.0
# input_features = processor(audio , return_tensors="pt").input_features
# forced_decoder_ids = processor.get_decoder_prompt_ids(language = None, task = "transcribe")
transcription=model.transcribe(
audio,
language = 'pt'
)
# predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
string1=''
for i, segment in enumerate(transcription, start=1):
# write srt lines
string1=string1+"\n" + f"{str(i)}\n"+ f"{utils.format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> " + f"{utils.format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n"+ f"{segment['text'].strip().replace('-->', '->')}\n";
st.write(string1 )
st.success("Texto Gerado")
# print('3')
|