File size: 779 Bytes
5a444be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import whisper

def has_intersection(t1, t2):
    if t1[1] < t2[0] or t2[1] < t1[0]:
        return False
    else:
        return True

class AudioTranslator():
    def __init__(self, model='base', device='cuda'):
        self.device = device
        self.model = whisper.load_model(model).to(device)

    def __call__(self, video_path):
        print("Extract the audio results.")
        audio_results = self.model.transcribe(video_path)["segments"]
        print("Finished.")
        return audio_results
    
    def match(self, audio_results, start, end):
        transcript = ''
        for res in audio_results:
            if has_intersection((start, end), (res["start"], res["end"])):
                transcript += res['text'] + ' '
        return transcript