File size: 2,446 Bytes
bbbf06e
 
 
 
 
052fe7e
bbbf06e
052fe7e
c0e541b
bbbf06e
 
 
 
 
 
 
052fe7e
 
bbbf06e
 
052fe7e
bbbf06e
 
 
 
 
 
 
25df8a0
 
bbbf06e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
052fe7e
bbbf06e
052fe7e
bbbf06e
 
 
 
 
 
 
 
c0e541b
bbbf06e
 
 
 
 
052fe7e
 
 
bbbf06e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import unittest
import numpy as np
import sys

sys.path.append('../whisper-webui')
#print("Sys path: " + str(sys.path))

from src.whisper.abstractWhisperContainer import LambdaWhisperCallback
from src.vad import AbstractTranscription, TranscriptionConfig, VadSileroTranscription

class TestVad(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestVad, self).__init__(*args, **kwargs)
        self.transcribe_calls = []

    def test_transcript(self):
        mock = MockVadTranscription(mock_audio_length=120)
        config = TranscriptionConfig()

        self.transcribe_calls.clear()
        result = mock.transcribe("mock", LambdaWhisperCallback(lambda segment, _1, _2, _3, _4: self.transcribe_segments(segment)), config)

        self.assertListEqual(self.transcribe_calls, [ 
            [30, 30],
            [100, 100]
        ])

        self.assertListEqual(result['segments'],
            [{'end': 50.0, 'start': 40.0, 'text': 'Hello world '},
            {'end': 120.0, 'start': 110.0, 'text': 'Hello world '}]
        )

    def transcribe_segments(self, segment):
        self.transcribe_calls.append(segment.tolist())

        # Dummy text
        return {
            'text': "Hello world ",
            'segments': [
                {
                    "start": 10.0,
                    "end": 20.0,
                    "text": "Hello world "
                }   
            ],
            'language': ""
        }

class MockVadTranscription(AbstractTranscription):
    def __init__(self, mock_audio_length: float = 1000):
        super().__init__()
        self.mock_audio_length = mock_audio_length

    def get_audio_segment(self, str, start_time: str = None, duration: str = None):
        start_time_seconds = float(start_time.removesuffix("s"))
        duration_seconds = float(duration.removesuffix("s"))

        # For mocking, this just returns a simple numppy array
        return np.array([start_time_seconds, duration_seconds], dtype=np.float64)

    def get_transcribe_timestamps(self, audio: str, config: TranscriptionConfig, start_time: float, duration: float):
        result = []

        result.append( {  'start': 30, 'end': 60 } )
        result.append( {  'start': 100, 'end': 200 } )
        return result
        
    def get_audio_duration(self, audio: str, config: TranscriptionConfig):
        return self.mock_audio_length

if __name__ == '__main__':
    unittest.main()