Spaces:

renatotn7
/

EspacoTeste

Runtime error

App Files Files Community

EspacoTeste / app.py

renatotn7

Update app.py

2096575 almost 2 years ago

raw

history blame

3 kB

	import streamlit as st
	import os
	os.system("pip install git+https://github.com/openai/whisper.git")
	import whisper
	from whisper import utils
	import ffmpeg
	import os
	from transformers import pipeline
	from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
	import numpy as np
	SAMPLE_RATE = 16000



	def load_audio(file: str, sr: int = SAMPLE_RATE):


	# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
	# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
	out, _ = (
	ffmpeg.input(file, threads=0)
	.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
	.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
	)


	return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
	if 'processor' not in locals():

	with st.spinner('Wait for it...'):

	# processor = AutoProcessor.from_pretrained("openai/whisper-tiny")
	model=whisper.load_model("tiny")




	#pipe = pipeline('sentiment-analysis')
	#pipe2 = pipeline(task="image-bestify", model="beihai/GFPGAN-V1.3-whole-image")
	#text = st.text_area('entre com algum texto')
	#st.title("Wav a ser transcrito ")

	wav_up = st.file_uploader("Upload",type=['wav','ogg','mp3'])
	if wav_up is not None:
	file_details = {"FileName":wav_up.name,"FileType":wav_up.type}
	st.write(file_details)

	with open(wav_up.name,"wb") as f:
	f.write(wav_up.getbuffer())
	st.success("Saved File")
	audio = whisper.load_audio(wav_up.name)
	audio = whisper.pad_or_trim(audio)

	st.audio(wav_up.name, format="audio/wav", start_time=0)
	if st.button('Processa'):
	if wav_up is not None:

	with st.spinner('Wait for it...'):

	# print('2')
	# transcription=model.transcribe(
	# audio,
	# language = None
	# )
	#processado=np.frombuffer(wav_up.getbuffer(), np.int16).flatten().astype(np.float32) / 32768.0
	# input_features = processor(audio , return_tensors="pt").input_features
	# forced_decoder_ids = processor.get_decoder_prompt_ids(language = None, task = "transcribe")
	transcription=model.transcribe(
	audio,
	language = 'pt'
	)
	# predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)

	#transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
	string1=''
	for i, segment in enumerate(transcription, start=1):
	# write srt lines
	string1=string1+"\n" + f"{i}\n"+ f"{utils.format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> " + f"{utils.format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n"+ f"{segment['text'].strip().replace('-->', '->')}\n";


	st.write(string1 )
	st.success("Texto Gerado")
	# print('3')