Luigi commited on
Commit
e63bfa4
·
1 Parent(s): cb6b593

add memory_profiler

Browse files
Files changed (2) hide show
  1. profile_diarization.py +0 -62
  2. requirements.txt +2 -1
profile_diarization.py DELETED
@@ -1,62 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Memory profiling script for diarization
4
- """
5
-
6
- import sys
7
- import os
8
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
9
-
10
- import numpy as np
11
- import soundfile as sf
12
- from memory_profiler import profile
13
- from diarization import init_speaker_embedding_extractor, perform_speaker_diarization_on_utterances
14
- from asr import transcribe_file
15
-
16
- def main():
17
- # Load audio
18
- audio_path = "example.mp3"
19
- print(f"Loading audio from {audio_path}")
20
- audio, sample_rate = sf.read(audio_path)
21
- print(f"Audio loaded: {len(audio)} samples at {sample_rate}Hz")
22
-
23
- # Resample to 16kHz if needed
24
- if sample_rate != 16000:
25
- from scipy.signal import resample
26
- audio = resample(audio, int(len(audio) * 16000 / sample_rate))
27
- sample_rate = 16000
28
-
29
- # Ensure mono
30
- if len(audio.shape) > 1:
31
- audio = audio.mean(axis=1)
32
-
33
- print(f"Audio prepared: {len(audio)} samples at {sample_rate}Hz")
34
-
35
- # Perform ASR to get utterances
36
- print("Starting ASR...")
37
- utterances = []
38
- for current, all_utterances in transcribe_file(
39
- audio_path=audio_path,
40
- vad_threshold=0.5,
41
- model_name="moonshine/tiny",
42
- backend="moonshine"
43
- ):
44
- utterances = all_utterances
45
- print(f"ASR completed: {len(utterances)} utterances")
46
-
47
- # Initialize embedding extractor
48
- print("Initializing embedding extractor...")
49
- embedding_extractor, config_dict = init_speaker_embedding_extractor()
50
- if embedding_extractor is None:
51
- print("Failed to initialize embedding extractor")
52
- return
53
-
54
- # Perform diarization with profiling
55
- print("Starting diarization...")
56
- diarization_result = perform_speaker_diarization_on_utterances(
57
- audio, sample_rate, utterances, embedding_extractor, config_dict
58
- )
59
- print(f"Diarization completed: {len(diarization_result)} results")
60
-
61
- if __name__ == "__main__":
62
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -16,4 +16,5 @@ ffmpeg-python
16
  feedparser
17
  sherpa_onnx
18
  huggingface_hub
19
- faiss-cpu
 
 
16
  feedparser
17
  sherpa_onnx
18
  huggingface_hub
19
+ faiss-cpu
20
+ memory_profiler