Spaces:
Sleeping
Sleeping
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import gc | |
from time import time | |
import torch | |
import whisperx as wx | |
# -->> Tunables <<--------------------- | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
BATCH_SIZE = 16 | |
if DEVICE == "cuda": | |
COMPUTE_TYPE = "float16" | |
else: | |
COMPUTE_TYPE = "int8" | |
print("Using device:", DEVICE) | |
# -->> Definitions <<------------------ | |
# -->> API <<-------------------------- | |
def transcribe_audio(audio_file, audio_path, transcript_folder_path): | |
# Transcribe the audio | |
print("Starting transcription...") | |
print("Loading model...") | |
time_1 = time() | |
model = wx.load_model("large-v2", device=DEVICE, compute_type=COMPUTE_TYPE, language="en") | |
time_2 = time() | |
print("Loading audio...") | |
time_3 = time() | |
audio = wx.load_audio(audio_path) | |
time_4 = time() | |
print("Transcribing...") | |
time_5 = time() | |
result = model.transcribe(audio, batch_size=BATCH_SIZE) | |
time_6 = time() | |
print("Transcription complete!") | |
print("\nTime Report: ") | |
print("Loading model: ", round(time_2 - time_1,2), " [s]") | |
print("Loading audio: ", round(time_4 - time_3,2), " [s]") | |
print("Transcribing: ", round(time_6 - time_5,2), " [s]") | |
print("Total: ", round(time_6 - time_1,2), " [s]") | |
# Save the transcript to a file | |
text = "\n ".join([i["text"] for i in result["segments"]]) | |
# Free memory | |
gc.collect() | |
torch.cuda.empty_cache() | |
del model | |
return text | |
# -->> Execute <<---------------------- | |
# -->> Export <<----------------------- | |