Spaces:

utkarsh-dixit
/

WhisperFusion

Paused

jpc commited on Jan 17

Commit

00c7470

•

1 Parent(s): 3d1dc04

Implemented aborting from speculative WhisperSpeech generation

Files changed (1) hide show

tts_service.py CHANGED Viewed

@@ -43,12 +43,18 @@ class WhisperSpeechTTS:
             llm_output = llm_response["llm_output"][0]
             self.eos = llm_response["eos"]
             # only process if the output updated
             if self.last_llm_response != llm_output.strip():
                 logging.INFO("[WhisperSpeech INFO:] Tunning TTS inference ...")
-                audio = self.pipe.vocoder.decode(self.pipe.generate_atoks(llm_output.strip()))
-                self.output_audio = audio.cpu().numpy()
-                self.last_llm_response = llm_output.strip()
             if self.eos and self.output_audio is not None:
                 try:

             llm_output = llm_response["llm_output"][0]
             self.eos = llm_response["eos"]
+            def should_abort():
+                if not audio_queue.empty(): raise TimeoutError()
             # only process if the output updated
             if self.last_llm_response != llm_output.strip():
                 logging.INFO("[WhisperSpeech INFO:] Tunning TTS inference ...")
+                try:
+                    audio = self.pipe.generate(llm_output.strip(), step_callback=should_abort)
+                    self.output_audio = audio.cpu().numpy()
+                    self.last_llm_response = llm_output.strip()
+                except TimeoutError:
+                    pass
             if self.eos and self.output_audio is not None:
                 try: