daihui.zhang commited on
Commit
c54125b
·
1 Parent(s): 8be1cbc

change logic of persist words

Browse files
transcribe/strategy.py CHANGED
@@ -250,7 +250,7 @@ class TranscriptStabilityAnalyzer:
250
  return
251
 
252
  # yield from self._handle_short_buffer(current, prev)
253
- if buffer_duration <= 12:
254
  yield from self._handle_short_buffer(current, prev)
255
  else:
256
  yield from self._handle_long_buffer(current)
@@ -281,8 +281,8 @@ class TranscriptStabilityAnalyzer:
281
 
282
  def _handle_long_buffer(self, curr: TranscriptChunk) -> Iterator[TranscriptResult]:
283
  chunks = curr.split_by(SplitMode.PUNCTUATION)
284
- if len(chunks) > 2:
285
- stable, remaining = chunks[:-2], chunks[-2:]
286
  # stable_str = self.merge_chunks(stable)
287
  # remaining_str = self.merge_chunks(remaining)
288
  yield from self._yield_commit_results(
 
250
  return
251
 
252
  # yield from self._handle_short_buffer(current, prev)
253
+ if buffer_duration < 4:
254
  yield from self._handle_short_buffer(current, prev)
255
  else:
256
  yield from self._handle_long_buffer(current)
 
281
 
282
  def _handle_long_buffer(self, curr: TranscriptChunk) -> Iterator[TranscriptResult]:
283
  chunks = curr.split_by(SplitMode.PUNCTUATION)
284
+ if len(chunks) > 1:
285
+ stable, remaining = chunks[:-1], chunks[-1:]
286
  # stable_str = self.merge_chunks(stable)
287
  # remaining_str = self.merge_chunks(remaining)
288
  yield from self._yield_commit_results(
transcribe/whisper_llm_serve.py CHANGED
@@ -141,8 +141,9 @@ class WhisperTranscriptionService(ServeClientBase):
141
 
142
  result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
143
  segments = result.segments
144
-
145
- log_block("Whisper transcription output", f"{''.join(seg.text for seg in segments)}", "")
 
146
  log_block("Whisper transcription time", f"{(time.perf_counter() - start_time):.3f}", "s")
147
 
148
  return [
@@ -203,7 +204,7 @@ class WhisperTranscriptionService(ServeClientBase):
203
  if audio_buffer is None:
204
  time.sleep(0.2)
205
  continue
206
-
207
  # c+= 1
208
  # save_to_wave(f"dev-{c}.wav", audio_buffer)
209
 
 
141
 
142
  result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
143
  segments = result.segments
144
+ logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
145
+
146
+ log_block("Whisper transcription output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
147
  log_block("Whisper transcription time", f"{(time.perf_counter() - start_time):.3f}", "s")
148
 
149
  return [
 
204
  if audio_buffer is None:
205
  time.sleep(0.2)
206
  continue
207
+ logger.debug(f"🥤 Buffer Length: {len(audio_buffer)/self.sample_rate:.2f} ")
208
  # c+= 1
209
  # save_to_wave(f"dev-{c}.wav", audio_buffer)
210