Spaces:

IRISLAB
/

VideoTranscription

Running

App Files Files Community

VideoTranscription / modules /faster_whisper_inference.py

IRISLAB

Upload 20 files

785fa6f verified 4 months ago

raw

history blame

5.65 kB

	import os
	import time
	import numpy as np
	from typing import BinaryIO, Union, Tuple, List

	import faster_whisper
	from faster_whisper.vad import VadOptions
	import ctranslate2
	import whisper
	import gradio as gr

	from modules.whisper_parameter import *
	from modules.whisper_base import WhisperBase

	# Temporal fix of the issue : https://github.com/jhj0517/Whisper-WebUI/issues/144
	os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


	class FasterWhisperInference(WhisperBase):
	def __init__(self):
	super().__init__(
	model_dir=os.path.join("models", "Whisper", "faster-whisper")
	)
	self.model_paths = self.get_model_paths()
	self.available_models = self.model_paths.keys()
	self.available_compute_types = ctranslate2.get_supported_compute_types(
	"cuda") if self.device == "cuda" else ctranslate2.get_supported_compute_types("cpu")

	def transcribe(self,
	audio: Union[str, BinaryIO, np.ndarray],
	progress: gr.Progress,
	*whisper_params,
	) -> Tuple[List[dict], float]:
	"""
	transcribe method for faster-whisper.

	Parameters
	----------
	audio: Union[str, BinaryIO, np.ndarray]
	Audio path or file binary or Audio numpy array
	progress: gr.Progress
	Indicator to show progress directly in gradio.
	*whisper_params: tuple
	Gradio components related to Whisper. see whisper_data_class.py for details.

	Returns
	----------
	segments_result: List[dict]
	list of dicts that includes start, end timestamps and transcribed text
	elapsed_time: float
	elapsed time for transcription
	"""
	start_time = time.time()

	params = WhisperValues(*whisper_params)

	if params.model_size != self.current_model_size or self.model is None or self.current_compute_type != params.compute_type:
	self.update_model(params.model_size, params.compute_type, progress)

	if params.lang == "Automatic Detection":
	params.lang = None
	else:
	language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
	params.lang = language_code_dict[params.lang]

	vad_options = VadOptions(
	threshold=params.threshold,
	min_speech_duration_ms=params.min_speech_duration_ms,
	max_speech_duration_s=params.max_speech_duration_s,
	min_silence_duration_ms=params.min_silence_duration_ms,
	window_size_samples=params.window_size_samples,
	speech_pad_ms=params.speech_pad_ms
	)

	segments, info = self.model.transcribe(
	audio=audio,
	language=params.lang,
	task="translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe",
	beam_size=params.beam_size,
	log_prob_threshold=params.log_prob_threshold,
	no_speech_threshold=params.no_speech_threshold,
	best_of=params.best_of,
	patience=params.patience,
	temperature=params.temperature,
	compression_ratio_threshold=params.compression_ratio_threshold,
	vad_filter=params.vad_filter,
	vad_parameters=vad_options
	)
	progress(0, desc="Loading audio..")

	segments_result = []
	for segment in segments:
	progress(segment.start / info.duration, desc="Transcribing..")
	segments_result.append({
	"start": segment.start,
	"end": segment.end,
	"text": segment.text
	})

	elapsed_time = time.time() - start_time
	return segments_result, elapsed_time

	def update_model(self,
	model_size: str,
	compute_type: str,
	progress: gr.Progress
	):
	"""
	Update current model setting

	Parameters
	----------
	model_size: str
	Size of whisper model
	compute_type: str
	Compute type for transcription.
	see more info : https://opennmt.net/CTranslate2/quantization.html
	progress: gr.Progress
	Indicator to show progress directly in gradio.
	"""
	progress(0, desc="Initializing Model..")
	self.current_model_size = self.model_paths[model_size]
	self.current_compute_type = compute_type
	self.model = faster_whisper.WhisperModel(
	device=self.device,
	model_size_or_path=self.current_model_size,
	download_root=self.model_dir,
	compute_type=self.current_compute_type
	)

	def get_model_paths(self):
	"""
	Get available models from models path including fine-tuned model.

	Returns
	----------
	Name list of models
	"""
	model_paths = {model:model for model in whisper.available_models()}
	faster_whisper_prefix = "models--Systran--faster-whisper-"

	existing_models = os.listdir(self.model_dir)
	wrong_dirs = [".locks"]
	existing_models = list(set(existing_models) - set(wrong_dirs))

	webui_dir = os.getcwd()

	for model_name in existing_models:
	if faster_whisper_prefix in model_name:
	model_name = model_name[len(faster_whisper_prefix):]

	if model_name not in whisper.available_models():
	model_paths[model_name] = os.path.join(webui_dir, self.model_dir, model_name)
	return model_paths