daihui.zhang commited on
Commit
69bc449
·
1 Parent(s): d4a5f4a

add translate

Browse files
Files changed (1) hide show
  1. transcribe/whisper_llm_serve.py +16 -13
transcribe/whisper_llm_serve.py CHANGED
@@ -116,6 +116,7 @@ class PywhisperInference:
116
  whisper_model = None
117
  llm_model = None
118
  vad_model = None
 
119
 
120
  @classmethod
121
  def initializer(cls, event:mp.Event, warmup=True):
@@ -133,7 +134,14 @@ class PywhisperInference:
133
  # init llamacpp
134
  cls.llm_model = QwenTranslator(config.LLM_MODEL_PATH, config.LLM_SYS_PROMPT)
135
  cls.vad_model = VoiceActivityDetector()
 
136
  event.set()
 
 
 
 
 
 
137
 
138
  @classmethod
139
  def warmup(cls, warmup_steps=1):
@@ -190,8 +198,8 @@ class PyWhiperCppServe(ServeClientBase):
190
  # 进程初始化后再开始收音频
191
  self._ready_state = mp.Event()
192
  self._pool = PPool(
193
- max_workers=1, initializer=partial(PywhisperInference.initializer, event=self._ready_state))
194
-
195
  logger.info('Create a process to process audio.')
196
  self.send_ready_state()
197
  # self.load_frame_thread = threading.Thread(target=self.load_frame_from_queue)
@@ -203,8 +211,7 @@ class PyWhiperCppServe(ServeClientBase):
203
  self.trans_thread.start()
204
 
205
  def send_ready_state(self):
206
- while not self._ready_state:
207
- time.sleep(0.1)
208
 
209
  self.websocket.send(json.dumps({
210
  "uid": self.client_uid,
@@ -238,7 +245,6 @@ class PyWhiperCppServe(ServeClientBase):
238
  else:
239
  self.frames_np = np.append(self.frames_np,frame_np)
240
 
241
-
242
 
243
  def update_audio_buffer(self, last_offset):
244
  with self.lock:
@@ -275,10 +281,9 @@ class PyWhiperCppServe(ServeClientBase):
275
  markers = config.PAUSE_END_MARKERS
276
  is_end = False
277
  else:
278
- # 使用句号 问好等长句结尾符号作为判断
279
  markers = config.SENTENCE_END_MARKERS
280
  is_end = True
281
-
282
 
283
  for idx, seg in enumerate(segments):
284
  left_watch_sequences.append(seg)
@@ -287,7 +292,7 @@ class PyWhiperCppServe(ServeClientBase):
287
  rest_buffer_duration = (len(audio_buffer) - seg_index) / self.sample_rate
288
  # is_end = any(i in seg.text for i in config.SENTENCE_END_MARKERS)
289
  right_watch_sequences = segments[min(idx+1, len(segments)):]
290
- if rest_buffer_duration >= 1.5:
291
  left_watch_idx = seg_index
292
  break
293
  return left_watch_idx, left_watch_sequences, right_watch_sequences, is_end
@@ -321,8 +326,6 @@ class PyWhiperCppServe(ServeClientBase):
321
  continue
322
 
323
  audio_buffer = self.get_audio_chunk_for_processing()
324
- logger.info(f"Processing audio with duration: {len(audio_buffer)}")
325
- # segments = self.transcribe_audio(audio_buffer)
326
  # c+= 1
327
  # name = f"dev-{c}.wav"
328
  # save_to_wave(name, audio_buffer)
@@ -362,7 +365,7 @@ class PyWhiperCppServe(ServeClientBase):
362
  context=message,
363
  from_=self.language,
364
  to=self.dst_lang,
365
- tran_content="this is english sample",
366
  partial=False
367
  )
368
  if self._segment_manager.string.strip():
@@ -372,7 +375,7 @@ class PyWhiperCppServe(ServeClientBase):
372
  context=self._segment_manager.string,
373
  from_=self.language,
374
  to=self.dst_lang,
375
- tran_content="this is english sample",
376
  )
377
 
378
  else:
@@ -384,7 +387,7 @@ class PyWhiperCppServe(ServeClientBase):
384
  context=message,
385
  from_=self.language,
386
  to=self.dst_lang,
387
- tran_content="this is english sample",
388
  )
389
 
390
  def send_to_client(self, data:TransResult):
 
116
  whisper_model = None
117
  llm_model = None
118
  vad_model = None
119
+ init_event = None
120
 
121
  @classmethod
122
  def initializer(cls, event:mp.Event, warmup=True):
 
134
  # init llamacpp
135
  cls.llm_model = QwenTranslator(config.LLM_MODEL_PATH, config.LLM_SYS_PROMPT)
136
  cls.vad_model = VoiceActivityDetector()
137
+ cls.init_event = event
138
  event.set()
139
+
140
+
141
+ @classmethod
142
+ def init(cls):
143
+ pass
144
+
145
 
146
  @classmethod
147
  def warmup(cls, warmup_steps=1):
 
198
  # 进程初始化后再开始收音频
199
  self._ready_state = mp.Event()
200
  self._pool = PPool(
201
+ max_workers=1, initializer=PywhisperInference.initializer, initargs=(self._ready_state, ))
202
+ self._pool.submit(PywhisperInference.init_event)
203
  logger.info('Create a process to process audio.')
204
  self.send_ready_state()
205
  # self.load_frame_thread = threading.Thread(target=self.load_frame_from_queue)
 
211
  self.trans_thread.start()
212
 
213
  def send_ready_state(self):
214
+ self._ready_state.wait()
 
215
 
216
  self.websocket.send(json.dumps({
217
  "uid": self.client_uid,
 
245
  else:
246
  self.frames_np = np.append(self.frames_np,frame_np)
247
 
 
248
 
249
  def update_audio_buffer(self, last_offset):
250
  with self.lock:
 
281
  markers = config.PAUSE_END_MARKERS
282
  is_end = False
283
  else:
284
+ # 使用句号 长句结尾符号作为判断
285
  markers = config.SENTENCE_END_MARKERS
286
  is_end = True
 
287
 
288
  for idx, seg in enumerate(segments):
289
  left_watch_sequences.append(seg)
 
292
  rest_buffer_duration = (len(audio_buffer) - seg_index) / self.sample_rate
293
  # is_end = any(i in seg.text for i in config.SENTENCE_END_MARKERS)
294
  right_watch_sequences = segments[min(idx+1, len(segments)):]
295
+ if rest_buffer_duration >= 1:
296
  left_watch_idx = seg_index
297
  break
298
  return left_watch_idx, left_watch_sequences, right_watch_sequences, is_end
 
326
  continue
327
 
328
  audio_buffer = self.get_audio_chunk_for_processing()
 
 
329
  # c+= 1
330
  # name = f"dev-{c}.wav"
331
  # save_to_wave(name, audio_buffer)
 
365
  context=message,
366
  from_=self.language,
367
  to=self.dst_lang,
368
+ tran_content=self.translate_text(message),
369
  partial=False
370
  )
371
  if self._segment_manager.string.strip():
 
375
  context=self._segment_manager.string,
376
  from_=self.language,
377
  to=self.dst_lang,
378
+ tran_content=self.translate_text(self._segment_manager.string.stri()),
379
  )
380
 
381
  else:
 
387
  context=message,
388
  from_=self.language,
389
  to=self.dst_lang,
390
+ tran_content=self.translate_text(message),
391
  )
392
 
393
  def send_to_client(self, data:TransResult):