whisper_wrapper / lib /model.py
Madhuslista's picture
Feature: Add time measurements
8ee91c9
raw
history blame
1.57 kB
#!/usr/bin/python
# -*- coding: utf-8 -*-
import gc
from time import time
import torch
import whisperx as wx
# -->> Tunables <<---------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
if DEVICE == "cuda":
COMPUTE_TYPE = "float16"
else:
COMPUTE_TYPE = "int8"
print("Using device:", DEVICE)
# -->> Definitions <<------------------
# -->> API <<--------------------------
def transcribe_audio(audio_file, audio_path, transcript_folder_path):
# Transcribe the audio
print("Starting transcription...")
print("Loading model...")
time_1 = time()
model = wx.load_model("large-v2", device=DEVICE, compute_type=COMPUTE_TYPE, language="en")
time_2 = time()
print("Loading audio...")
time_3 = time()
audio = wx.load_audio(audio_path)
time_4 = time()
print("Transcribing...")
time_5 = time()
result = model.transcribe(audio, batch_size=BATCH_SIZE)
time_6 = time()
print("Transcription complete!")
print("\nTime Report: ")
print("Loading model: ", round(time_2 - time_1,2), " [s]")
print("Loading audio: ", round(time_4 - time_3,2), " [s]")
print("Transcribing: ", round(time_6 - time_5,2), " [s]")
print("Total: ", round(time_6 - time_1,2), " [s]")
# Save the transcript to a file
text = "\n ".join([i["text"] for i in result["segments"]])
# Free memory
gc.collect()
torch.cuda.empty_cache()
del model
return text
# -->> Execute <<----------------------
# -->> Export <<-----------------------