liumaolin commited on
Commit
99e8988
·
1 Parent(s): a453c72

Update `AudioCapture` to support both PyAudio and macOS native AEC+VAD libraries

Browse files

- Add PyAudio-based standard audio capture as an option.
- Enable runtime switching between PyAudio and macOS native libraries based on echo cancellation settings.
- Refactor capture methods for better modularity and maintainability.

src/voice_dialogue/services/audio/capture.py CHANGED
@@ -1,6 +1,9 @@
1
  """
2
- 回声消除音频捕获模块
3
- 使用 AEC (Acoustic Echo Cancellation) 技术的音频采集器
 
 
 
4
  """
5
 
6
  import ctypes
@@ -9,6 +12,7 @@ import time
9
  from multiprocessing import Queue
10
 
11
  import numpy as np
 
12
 
13
  from voice_dialogue.config.paths import LIBRARIES_PATH
14
  from voice_dialogue.core.base import BaseThread
@@ -17,8 +21,10 @@ from voice_dialogue.utils.logger import logger
17
 
18
  class AudioCapture(BaseThread):
19
  """
20
- 回声消除音频捕获器
21
- 使用原生 C 库进行音频捕获,支持回声消除和语音活动检测
 
 
22
  """
23
 
24
  def __init__(
@@ -26,6 +32,15 @@ class AudioCapture(BaseThread):
26
  audio_frames_queue: Queue = None,
27
  enable_echo_cancellation: bool = True,
28
  ):
 
 
 
 
 
 
 
 
 
29
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
30
 
31
  self.audio_frames_queue = audio_frames_queue
@@ -33,43 +48,104 @@ class AudioCapture(BaseThread):
33
  self._enable_echo_cancellation = enable_echo_cancellation
34
 
35
  @property
36
- def is_paused(self):
 
37
  return self._pause_event.is_set()
38
 
39
  def pause(self):
 
40
  self._pause_event.set()
41
 
42
  def resume(self):
 
43
  self._pause_event.clear()
44
 
45
  def run(self):
46
- """主运行循环,持续获取音频数据"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  audio_recorder = ctypes.CDLL(LIBRARIES_PATH / 'libAudioCapture.dylib')
48
  audio_recorder.getAudioData.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_bool)]
49
  audio_recorder.getAudioData.restype = ctypes.POINTER(ctypes.c_ubyte)
50
  audio_recorder.freeAudioData.argtypes = [ctypes.POINTER(ctypes.c_ubyte)]
51
- audio_recorder.startRecord()
52
 
 
53
  self.is_ready = True
54
 
55
  try:
56
  while not self.is_exited:
57
  size = ctypes.c_int(0)
58
  is_voice_active = ctypes.c_bool(False)
59
- # 获取音频数据
60
  data_ptr = audio_recorder.getAudioData(ctypes.byref(size), ctypes.byref(is_voice_active))
61
 
62
  if data_ptr and size.value > 0:
63
  audio_data = bytes(data_ptr[: size.value])
64
- audio_frame = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
 
 
65
 
66
  if not self.is_paused:
 
67
  self.audio_frames_queue.put((audio_frame, is_voice_active.value))
68
 
69
- # 使用完数据后释放内存
70
  audio_recorder.freeAudioData(data_ptr)
71
  else:
72
- # 无数据时等待
73
  time.sleep(0.01)
74
  except Exception as e:
75
  logger.error(f'回声消除音频捕获器运行时发生错误: {e}')
 
1
  """
2
+ 音频捕获模块
3
+
4
+ 提供两种音频采集方式:
5
+ 1. 使用 PyAudio 进行标准音频采集。
6
+ 2. 使用集成了声学回声消除(AEC)和语音活动检测(VAD)的 macOS 原生库进行音频采集。
7
  """
8
 
9
  import ctypes
 
12
  from multiprocessing import Queue
13
 
14
  import numpy as np
15
+ import pyaudio
16
 
17
  from voice_dialogue.config.paths import LIBRARIES_PATH
18
  from voice_dialogue.core.base import BaseThread
 
21
 
22
  class AudioCapture(BaseThread):
23
  """
24
+ 音频捕获器。
25
+
26
+ 根据配置选择使用 PyAudio 或带回声消除(AEC)的 macOS 原生库进行音频采集。
27
+ 作为一个后台线程运行,将捕获的音频帧放入队列中。
28
  """
29
 
30
  def __init__(
 
32
  audio_frames_queue: Queue = None,
33
  enable_echo_cancellation: bool = True,
34
  ):
35
+ """
36
+ 初始化音频捕获器。
37
+
38
+ Args:
39
+ audio_frames_queue (Queue): 用于存放捕获的音频帧的队列。
40
+ enable_echo_cancellation (bool): 是否启用回声消除功能。
41
+ 若为 True,则使用原生库进行捕获;
42
+ 否则,使用 PyAudio。
43
+ """
44
  super().__init__(group, target, name, args, kwargs, daemon=daemon)
45
 
46
  self.audio_frames_queue = audio_frames_queue
 
48
  self._enable_echo_cancellation = enable_echo_cancellation
49
 
50
  @property
51
+ def is_paused(self) -> bool:
52
+ """检查捕获器是否已暂停。"""
53
  return self._pause_event.is_set()
54
 
55
  def pause(self):
56
+ """暂停音频捕获。"""
57
  self._pause_event.set()
58
 
59
  def resume(self):
60
+ """恢复音频捕获。"""
61
  self._pause_event.clear()
62
 
63
  def run(self):
64
+ """
65
+ 线程主循环。
66
+
67
+ 根据 `_enable_echo_cancellation` 标志,分派到相应的捕获方法。
68
+ """
69
+ if self._enable_echo_cancellation:
70
+ self._run_with_aec()
71
+ else:
72
+ self._run()
73
+
74
+ def _run(self):
75
+ """
76
+ 使用 PyAudio 进行标准的音频采集。
77
+
78
+ 此方法不提供回声消除或语音活动检测。
79
+ """
80
+ p = pyaudio.PyAudio()
81
+ chunk = 512
82
+ sample_rate = 16000
83
+ stream = p.open(
84
+ format=pyaudio.paInt16,
85
+ channels=1,
86
+ rate=sample_rate,
87
+ input=True,
88
+ frames_per_buffer=chunk,
89
+ )
90
+
91
+ logger.info("使用 PyAudio 开始音频采集...")
92
+ self.is_ready = True
93
+
94
+ try:
95
+ while not self.is_exited:
96
+ data = stream.read(chunk)
97
+
98
+ if self.is_paused:
99
+ time.sleep(0.01)
100
+ continue
101
+
102
+ # 将音频数据转换为 [-1.0, 1.0] 范围内的浮点数
103
+ audio_frame = np.frombuffer(data, dtype=np.int16).astype(np.float32) / np.iinfo(np.int16).max
104
+ self.audio_frames_queue.put(audio_frame)
105
+
106
+ except Exception as e:
107
+ logger.error(f'PyAudio 音频捕获器运行时发生错误: {e}')
108
+ finally:
109
+ logger.info("停止 PyAudio 音频采集...")
110
+ stream.stop_stream()
111
+ stream.close()
112
+ p.terminate()
113
+
114
+ def _run_with_aec(self):
115
+ """
116
+ 使用 macOS 原生库进行音频捕获。
117
+
118
+ 此方法通过 ctypes 调用外部动态库,支持声学回声消除(AEC)和语音活动检测(VAD)。
119
+ """
120
  audio_recorder = ctypes.CDLL(LIBRARIES_PATH / 'libAudioCapture.dylib')
121
  audio_recorder.getAudioData.argtypes = [ctypes.POINTER(ctypes.c_int), ctypes.POINTER(ctypes.c_bool)]
122
  audio_recorder.getAudioData.restype = ctypes.POINTER(ctypes.c_ubyte)
123
  audio_recorder.freeAudioData.argtypes = [ctypes.POINTER(ctypes.c_ubyte)]
 
124
 
125
+ audio_recorder.startRecord()
126
  self.is_ready = True
127
 
128
  try:
129
  while not self.is_exited:
130
  size = ctypes.c_int(0)
131
  is_voice_active = ctypes.c_bool(False)
132
+ # 从原生库获取音频数据
133
  data_ptr = audio_recorder.getAudioData(ctypes.byref(size), ctypes.byref(is_voice_active))
134
 
135
  if data_ptr and size.value > 0:
136
  audio_data = bytes(data_ptr[: size.value])
137
+ # 将音频数据转换为 [-1.0, 1.0] 范围内的浮点数
138
+ audio_frame = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / np.iinfo(
139
+ np.int16).max
140
 
141
  if not self.is_paused:
142
+ # 将音频帧和语音活动状态一同放入队列
143
  self.audio_frames_queue.put((audio_frame, is_voice_active.value))
144
 
145
+ # 释放原生库分配的内存
146
  audio_recorder.freeAudioData(data_ptr)
147
  else:
148
+ # 无数据时短暂休眠,避免CPU空转
149
  time.sleep(0.01)
150
  except Exception as e:
151
  logger.error(f'回声消除音频捕获器运行时发生错误: {e}')