MoYoYoTech
/

Translator

Model card Files Files and versions

daihui.zhang commited on Apr 16

Commit

eabbf72

·

1 Parent(s): 5d48b84

fix bug

Files changed (2) hide show

transcribe/strategy.py +2 -2
transcribe/whisper_llm_serve.py +2 -1

transcribe/strategy.py CHANGED Viewed

@@ -345,8 +345,8 @@ class TranscriptStabilityAnalyzer:
         if curr_first and prev_first:
             core = curr_first.compare(prev_first)
-            # has_punctuation = curr_first.has_punctuation()
-            if core >= 0.8:
                 yield from self._yield_commit_results(curr_first, curr_rest, curr_first.is_end_sentence())
                 return

         if curr_first and prev_first:
             core = curr_first.compare(prev_first)
+            has_punctuation = curr_first.has_punctuation()
+            if core >= 0.8 and has_punctuation:
                 yield from self._yield_commit_results(curr_first, curr_rest, curr_first.is_end_sentence())
                 return

transcribe/whisper_llm_serve.py CHANGED Viewed

@@ -102,7 +102,7 @@ class WhisperTranscriptionService(ServeClientBase):
         """应用语音活动检测来优化音频缓冲区"""
         with self.lock:
             if self.frames_np is not None:
-                self._c+= 1
                 frame = self.frames_np.copy()
                 processed_audio = self._translate_pipe.voice_detect(frame.tobytes())
                 self.frames_np = np.frombuffer(processed_audio.audio, dtype=np.float32).copy()
@@ -147,6 +147,7 @@ class WhisperTranscriptionService(ServeClientBase):
         result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
         segments = result.segments
         logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
         log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
         log_block("📝 Transcrible time", f"{(time.perf_counter() - start_time):.3f}", "s")

         """应用语音活动检测来优化音频缓冲区"""
         with self.lock:
             if self.frames_np is not None:
+                # self._c+= 1
                 frame = self.frames_np.copy()
                 processed_audio = self._translate_pipe.voice_detect(frame.tobytes())
                 self.frames_np = np.frombuffer(processed_audio.audio, dtype=np.float32).copy()
         result = self._translate_pipe.transcrible(audio_buffer.tobytes(), self.source_language)
         segments = result.segments
+        logger.debug(f"📝 Transcrible Segments: {segments} ")
         logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
         log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
         log_block("📝 Transcrible time", f"{(time.perf_counter() - start_time):.3f}", "s")