daihui.zhang
commited on
Commit
·
c54125b
1
Parent(s):
8be1cbc
change logic of persist words
Browse files
transcribe/strategy.py
CHANGED
|
@@ -250,7 +250,7 @@ class TranscriptStabilityAnalyzer:
|
|
| 250 |
return
|
| 251 |
|
| 252 |
# yield from self._handle_short_buffer(current, prev)
|
| 253 |
-
if buffer_duration
|
| 254 |
yield from self._handle_short_buffer(current, prev)
|
| 255 |
else:
|
| 256 |
yield from self._handle_long_buffer(current)
|
|
@@ -281,8 +281,8 @@ class TranscriptStabilityAnalyzer:
|
|
| 281 |
|
| 282 |
def _handle_long_buffer(self, curr: TranscriptChunk) -> Iterator[TranscriptResult]:
|
| 283 |
chunks = curr.split_by(SplitMode.PUNCTUATION)
|
| 284 |
-
if len(chunks) >
|
| 285 |
-
stable, remaining = chunks[:-
|
| 286 |
# stable_str = self.merge_chunks(stable)
|
| 287 |
# remaining_str = self.merge_chunks(remaining)
|
| 288 |
yield from self._yield_commit_results(
|
|
|
|
| 250 |
return
|
| 251 |
|
| 252 |
# yield from self._handle_short_buffer(current, prev)
|
| 253 |
+
if buffer_duration < 4:
|
| 254 |
yield from self._handle_short_buffer(current, prev)
|
| 255 |
else:
|
| 256 |
yield from self._handle_long_buffer(current)
|
|
|
|
| 281 |
|
| 282 |
def _handle_long_buffer(self, curr: TranscriptChunk) -> Iterator[TranscriptResult]:
|
| 283 |
chunks = curr.split_by(SplitMode.PUNCTUATION)
|
| 284 |
+
if len(chunks) > 1:
|
| 285 |
+
stable, remaining = chunks[:-1], chunks[-1:]
|
| 286 |
# stable_str = self.merge_chunks(stable)
|
| 287 |
# remaining_str = self.merge_chunks(remaining)
|
| 288 |
yield from self._yield_commit_results(
|
transcribe/whisper_llm_serve.py
CHANGED
|
@@ -141,8 +141,9 @@ class WhisperTranscriptionService(ServeClientBase):
|
|
| 141 |
|
| 142 |
result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
|
| 143 |
segments = result.segments
|
| 144 |
-
|
| 145 |
-
|
|
|
|
| 146 |
log_block("Whisper transcription time", f"{(time.perf_counter() - start_time):.3f}", "s")
|
| 147 |
|
| 148 |
return [
|
|
@@ -203,7 +204,7 @@ class WhisperTranscriptionService(ServeClientBase):
|
|
| 203 |
if audio_buffer is None:
|
| 204 |
time.sleep(0.2)
|
| 205 |
continue
|
| 206 |
-
|
| 207 |
# c+= 1
|
| 208 |
# save_to_wave(f"dev-{c}.wav", audio_buffer)
|
| 209 |
|
|
|
|
| 141 |
|
| 142 |
result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
|
| 143 |
segments = result.segments
|
| 144 |
+
logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
|
| 145 |
+
|
| 146 |
+
log_block("Whisper transcription output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
|
| 147 |
log_block("Whisper transcription time", f"{(time.perf_counter() - start_time):.3f}", "s")
|
| 148 |
|
| 149 |
return [
|
|
|
|
| 204 |
if audio_buffer is None:
|
| 205 |
time.sleep(0.2)
|
| 206 |
continue
|
| 207 |
+
logger.debug(f"🥤 Buffer Length: {len(audio_buffer)/self.sample_rate:.2f} ")
|
| 208 |
# c+= 1
|
| 209 |
# save_to_wave(f"dev-{c}.wav", audio_buffer)
|
| 210 |
|