aka7774's picture
Update fn.py
0ae9155 verified
raw
history blame
929 Bytes
from faster_whisper import WhisperModel
model = None
model_size = None
def load_model(_model_size):
global model_size, model
if _model_size and model_size != _model_size:
model_size = _model_size
try:
model = WhisperModel(model_size, device="cuda", compute_type="float16")
except:
model = WhisperModel(model_size, device="cpu", compute_type="int8")
def speech_to_text(audio_file, _model_size = None):
global model_size, model
load_model(_model_size)
segments, info = model.transcribe(
audio_file,
language='ja',
beam_size=5,
vad_filter=True,
without_timestamps=False,
)
text_only = ''
text_with_timestamps = ''
for segment in segments:
text_only += f"{segment.text}\n"
text_with_timestamps += f"{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"
return text_only, text_with_timestamps