File size: 1,688 Bytes
b5df735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Transcriber interface definition
"""

from abc import ABC, abstractmethod
from typing import Dict, Any, List, Optional
from dataclasses import dataclass


@dataclass
class TranscriptionSegment:
    """Transcription segment data class"""
    start: float
    end: float
    text: str
    speaker: Optional[str] = None
    confidence: Optional[float] = None


@dataclass
class TranscriptionResult:
    """Transcription result data class"""
    text: str
    segments: List[TranscriptionSegment]
    language: str
    model_used: str
    audio_duration: float
    processing_time: float
    speaker_diarization_enabled: bool = False
    global_speaker_count: int = 0
    error_message: Optional[str] = None


class ITranscriber(ABC):
    """Interface for audio transcription"""
    
    @abstractmethod
    async def transcribe(
        self,
        audio_file_path: str,
        model_size: str = "turbo",
        language: Optional[str] = None,
        enable_speaker_diarization: bool = False
    ) -> TranscriptionResult:
        """
        Transcribe audio file
        
        Args:
            audio_file_path: Path to audio file
            model_size: Model size to use
            language: Language code (None for auto-detect)
            enable_speaker_diarization: Whether to enable speaker detection
            
        Returns:
            TranscriptionResult object
        """
        pass
    
    @abstractmethod
    def get_supported_models(self) -> List[str]:
        """Get list of supported model sizes"""
        pass
    
    @abstractmethod
    def get_supported_languages(self) -> List[str]:
        """Get list of supported language codes"""
        pass