Luigi commited on
Commit
cb6b593
·
1 Parent(s): de0b3d5

add profile_diarization.py

Browse files
Files changed (1) hide show
  1. profile_diarization.py +62 -0
profile_diarization.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Memory profiling script for diarization
4
+ """
5
+
6
+ import sys
7
+ import os
8
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
9
+
10
+ import numpy as np
11
+ import soundfile as sf
12
+ from memory_profiler import profile
13
+ from diarization import init_speaker_embedding_extractor, perform_speaker_diarization_on_utterances
14
+ from asr import transcribe_file
15
+
16
+ def main():
17
+ # Load audio
18
+ audio_path = "example.mp3"
19
+ print(f"Loading audio from {audio_path}")
20
+ audio, sample_rate = sf.read(audio_path)
21
+ print(f"Audio loaded: {len(audio)} samples at {sample_rate}Hz")
22
+
23
+ # Resample to 16kHz if needed
24
+ if sample_rate != 16000:
25
+ from scipy.signal import resample
26
+ audio = resample(audio, int(len(audio) * 16000 / sample_rate))
27
+ sample_rate = 16000
28
+
29
+ # Ensure mono
30
+ if len(audio.shape) > 1:
31
+ audio = audio.mean(axis=1)
32
+
33
+ print(f"Audio prepared: {len(audio)} samples at {sample_rate}Hz")
34
+
35
+ # Perform ASR to get utterances
36
+ print("Starting ASR...")
37
+ utterances = []
38
+ for current, all_utterances in transcribe_file(
39
+ audio_path=audio_path,
40
+ vad_threshold=0.5,
41
+ model_name="moonshine/tiny",
42
+ backend="moonshine"
43
+ ):
44
+ utterances = all_utterances
45
+ print(f"ASR completed: {len(utterances)} utterances")
46
+
47
+ # Initialize embedding extractor
48
+ print("Initializing embedding extractor...")
49
+ embedding_extractor, config_dict = init_speaker_embedding_extractor()
50
+ if embedding_extractor is None:
51
+ print("Failed to initialize embedding extractor")
52
+ return
53
+
54
+ # Perform diarization with profiling
55
+ print("Starting diarization...")
56
+ diarization_result = perform_speaker_diarization_on_utterances(
57
+ audio, sample_rate, utterances, embedding_extractor, config_dict
58
+ )
59
+ print(f"Diarization completed: {len(diarization_result)} results")
60
+
61
+ if __name__ == "__main__":
62
+ main()