import os import whisper import torch import time def transcribe_segments(speakers): print(f"Whisper models {whisper.available_models()}") if torch.cuda.is_available(): print("transcribe_segments Using CUDA") device = "cuda" else: device = "cpu" print("transcribe_segments Using CPU") model = whisper.load_model("tiny.en", device=device) #model = whisper.load_model("medium.en", device="cuda") # model = whisper.load_model("turbo", device="cuda") #model = whisper.load_model("large-v3-turbo", device="cuda") transcripts = [] input_file = "" print("Transcribing ALL segments") total_start = time.time() for speaker in speakers: # {'speaker': speaker, 'start': round(turn.start, 1), # 'end': round(turn.end, 1), 'clipFile':clipName} input_file = speaker['clipFile'] print("TRANSCRIBING " + input_file) start = time.time() transcript = model.transcribe(input_file) print("Elapsed " + str(time.time() - start)) segments = transcript["segments"] outText = "" for segment in segments: outText += segment['text'] transcripts.append(speaker['speaker']+" : "+outText) os.remove(input_file) print("Total Elapsed " + str(time.time() - total_start)) currdir= input_file[0:input_file.index('\\')] os.rmdir(currdir) return transcripts