import gradio as gr import time import os from pydub import AudioSegment def audio_converter(audio_file:str): audio_input = AudioSegment.from_file(audio_file,'m4a') audio_input_name = os.path.splitext(audio_file)[0] audio_wav_filename = f"{audio_input_name}.wav" audio_input.export(audio_wav_filename, 'wav') return audio_wav_filename def asr_transcriber(audio_file): from transformers import pipeline import torch import random audio_file_wav = audio_converter(audio_file) device_id = "mps" flash = False # Initialize the ASR pipeline pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-large-v3", torch_dtype=torch.float16, device=device_id ) if device_id == "mps": torch.mps.empty_cache() elif not flash: pipe.model = pipe.model.to_bettertransformer() ts = True language = None task = "transcribe" json_output = pipe( audio_file_wav, chunk_length_s=30, batch_size=2, generate_kwargs={"task": task, "language": language}, return_timestamps=ts ) return json_output["text"] with gr.Blocks() as transcriberUI: gr.Markdown( """ # Ola Xara & Solange! Clicar no botao abaixo para selecionar o Audio a ser transcrito! Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-) """) inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"]) transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True) inp.upload(asr_transcriber, inp, transcribe) transcriberUI.launch(share=True)