daihui.zhang
commited on
Commit
·
eabbf72
1
Parent(s):
5d48b84
fix bug
Browse files
transcribe/strategy.py
CHANGED
|
@@ -345,8 +345,8 @@ class TranscriptStabilityAnalyzer:
|
|
| 345 |
if curr_first and prev_first:
|
| 346 |
|
| 347 |
core = curr_first.compare(prev_first)
|
| 348 |
-
|
| 349 |
-
if core >= 0.8:
|
| 350 |
yield from self._yield_commit_results(curr_first, curr_rest, curr_first.is_end_sentence())
|
| 351 |
return
|
| 352 |
|
|
|
|
| 345 |
if curr_first and prev_first:
|
| 346 |
|
| 347 |
core = curr_first.compare(prev_first)
|
| 348 |
+
has_punctuation = curr_first.has_punctuation()
|
| 349 |
+
if core >= 0.8 and has_punctuation:
|
| 350 |
yield from self._yield_commit_results(curr_first, curr_rest, curr_first.is_end_sentence())
|
| 351 |
return
|
| 352 |
|
transcribe/whisper_llm_serve.py
CHANGED
|
@@ -102,7 +102,7 @@ class WhisperTranscriptionService(ServeClientBase):
|
|
| 102 |
"""应用语音活动检测来优化音频缓冲区"""
|
| 103 |
with self.lock:
|
| 104 |
if self.frames_np is not None:
|
| 105 |
-
self._c+= 1
|
| 106 |
frame = self.frames_np.copy()
|
| 107 |
processed_audio = self._translate_pipe.voice_detect(frame.tobytes())
|
| 108 |
self.frames_np = np.frombuffer(processed_audio.audio, dtype=np.float32).copy()
|
|
@@ -147,6 +147,7 @@ class WhisperTranscriptionService(ServeClientBase):
|
|
| 147 |
|
| 148 |
result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
|
| 149 |
segments = result.segments
|
|
|
|
| 150 |
logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
|
| 151 |
log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
|
| 152 |
log_block("📝 Transcrible time", f"{(time.perf_counter() - start_time):.3f}", "s")
|
|
|
|
| 102 |
"""应用语音活动检测来优化音频缓冲区"""
|
| 103 |
with self.lock:
|
| 104 |
if self.frames_np is not None:
|
| 105 |
+
# self._c+= 1
|
| 106 |
frame = self.frames_np.copy()
|
| 107 |
processed_audio = self._translate_pipe.voice_detect(frame.tobytes())
|
| 108 |
self.frames_np = np.frombuffer(processed_audio.audio, dtype=np.float32).copy()
|
|
|
|
| 147 |
|
| 148 |
result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
|
| 149 |
segments = result.segments
|
| 150 |
+
logger.debug(f"📝 Transcrible Segments: {segments} ")
|
| 151 |
logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
|
| 152 |
log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
|
| 153 |
log_block("📝 Transcrible time", f"{(time.perf_counter() - start_time):.3f}", "s")
|