import whisper import gradio as gr import time import os from typing import BinaryIO, Union, Tuple, List import numpy as np import torch from modules.whisper_base import WhisperBase from modules.whisper_parameter import * class WhisperInference(WhisperBase): def __init__(self): super().__init__( model_dir=os.path.join("models", "Whisper") ) def transcribe(self, audio: Union[str, np.ndarray, torch.Tensor], progress: gr.Progress, *whisper_params, ) -> Tuple[List[dict], float]: """ transcribe method for faster-whisper. Parameters ---------- audio: Union[str, BinaryIO, np.ndarray] Audio path or file binary or Audio numpy array progress: gr.Progress Indicator to show progress directly in gradio. *whisper_params: tuple Gradio components related to Whisper. see whisper_data_class.py for details. Returns ---------- segments_result: List[dict] list of dicts that includes start, end timestamps and transcribed text elapsed_time: float elapsed time for transcription """ start_time = time.time() params = WhisperValues(*whisper_params) if params.model_size != self.current_model_size or self.model is None or self.current_compute_type != params.compute_type: self.update_model(params.model_size, params.compute_type, progress) if params.lang == "Automatic Detection": params.lang = None def progress_callback(progress_value): progress(progress_value, desc="Transcribing..") segments_result = self.model.transcribe(audio=audio, language=params.lang, verbose=False, beam_size=params.beam_size, logprob_threshold=params.log_prob_threshold, no_speech_threshold=params.no_speech_threshold, task="translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe", fp16=True if params.compute_type == "float16" else False, best_of=params.best_of, patience=params.patience, temperature=params.temperature, compression_ratio_threshold=params.compression_ratio_threshold, progress_callback=progress_callback,)["segments"] elapsed_time = time.time() - start_time return segments_result, elapsed_time def update_model(self, model_size: str, compute_type: str, progress: gr.Progress, ): """ Update current model setting Parameters ---------- model_size: str Size of whisper model compute_type: str Compute type for transcription. see more info : https://opennmt.net/CTranslate2/quantization.html progress: gr.Progress Indicator to show progress directly in gradio. """ progress(0, desc="Initializing Model..") self.current_compute_type = compute_type self.current_model_size = model_size self.model = whisper.load_model( name=model_size, device=self.device, download_root=self.model_dir )