Spaces:
Runtime error
Runtime error
streaming speech in the debug
Browse files- audio_stream_processor.py +42 -0
- chat_service.py +1 -1
- debug.py +35 -22
- speech_service.py +9 -0
audio_stream_processor.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
from threading import Thread
|
| 3 |
+
from queue import Queue
|
| 4 |
+
from typing import Iterator
|
| 5 |
+
|
| 6 |
+
class AudioStreamProcessor:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.queue = Queue()
|
| 9 |
+
self.thread = Thread(target=self._process_audio_streams)
|
| 10 |
+
self.thread.start()
|
| 11 |
+
|
| 12 |
+
def add_audio_stream(self, audio_stream: Iterator[bytes]):
|
| 13 |
+
self.queue.put(audio_stream)
|
| 14 |
+
|
| 15 |
+
def _process_audio_streams(self):
|
| 16 |
+
while True:
|
| 17 |
+
audio_stream = self.queue.get()
|
| 18 |
+
if audio_stream is None: # We'll use None as a sentinel to mark the end
|
| 19 |
+
break
|
| 20 |
+
self._stream(audio_stream)
|
| 21 |
+
|
| 22 |
+
def _stream(self, audio_stream: Iterator[bytes]):
|
| 23 |
+
mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
|
| 24 |
+
mpv_process = subprocess.Popen(
|
| 25 |
+
mpv_command,
|
| 26 |
+
stdin=subprocess.PIPE,
|
| 27 |
+
stdout=subprocess.DEVNULL,
|
| 28 |
+
stderr=subprocess.DEVNULL,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
for chunk in audio_stream:
|
| 32 |
+
if chunk is not None:
|
| 33 |
+
mpv_process.stdin.write(chunk)
|
| 34 |
+
mpv_process.stdin.flush()
|
| 35 |
+
|
| 36 |
+
if mpv_process.stdin:
|
| 37 |
+
mpv_process.stdin.close()
|
| 38 |
+
mpv_process.wait()
|
| 39 |
+
|
| 40 |
+
def close(self):
|
| 41 |
+
self.queue.put(None) # Signal the processing thread to terminate
|
| 42 |
+
self.thread.join()
|
chat_service.py
CHANGED
|
@@ -55,7 +55,7 @@ class ChatService:
|
|
| 55 |
max_new_tokens=200,
|
| 56 |
do_sample=True,
|
| 57 |
top_k=40,
|
| 58 |
-
temperature=1.0,
|
| 59 |
pad_token_id=self._tokenizer.eos_token_id,
|
| 60 |
)
|
| 61 |
agent_response = self._tokenizer.decode(outputs[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])
|
|
|
|
| 55 |
max_new_tokens=200,
|
| 56 |
do_sample=True,
|
| 57 |
top_k=40,
|
| 58 |
+
temperature=1.0, # use 1.0 for debugging/deteministic results
|
| 59 |
pad_token_id=self._tokenizer.eos_token_id,
|
| 60 |
)
|
| 61 |
agent_response = self._tokenizer.decode(outputs[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])
|
debug.py
CHANGED
|
@@ -2,34 +2,47 @@ from clip_transform import CLIPTransform
|
|
| 2 |
from chat_service import ChatService
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from speech_service import SpeechService
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
load_dotenv()
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
# print ("CLIP success")
|
| 11 |
|
| 12 |
-
print ("Initializing
|
| 13 |
-
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
-
user_speech_service
|
|
|
|
|
|
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
-
prompts = [
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
]
|
| 27 |
-
for prompt in prompts:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
ai_speech_service.speak(response)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from chat_service import ChatService
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from speech_service import SpeechService
|
| 5 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 6 |
+
from audio_stream_processor import AudioStreamProcessor
|
| 7 |
|
|
|
|
| 8 |
|
| 9 |
+
def run_debug_code():
|
| 10 |
+
load_dotenv()
|
|
|
|
| 11 |
|
| 12 |
+
# print ("Initializing CLIP templates")
|
| 13 |
+
# clip_transform = CLIPTransform()
|
| 14 |
+
# print ("CLIP success")
|
| 15 |
|
| 16 |
+
print ("Initializing Chat")
|
| 17 |
+
chat_service = ChatService()
|
| 18 |
|
| 19 |
+
user_speech_service = SpeechService(voice_id="Adam")
|
| 20 |
+
ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
| 21 |
+
processor = AudioStreamProcessor()
|
| 22 |
|
| 23 |
+
# user_speech_service.print_voices() # if you want to see your custom voices
|
| 24 |
|
| 25 |
+
prompts = [
|
| 26 |
+
"hello, how are you today?",
|
| 27 |
+
"tell me about your shadow self?",
|
| 28 |
+
"hmm, interesting, tell me more about that.",
|
| 29 |
+
"wait, that is so interesting, what else?",
|
| 30 |
+
]
|
| 31 |
+
for prompt in prompts:
|
| 32 |
+
print ("")
|
| 33 |
+
print (f'prompt: "{prompt}"')
|
| 34 |
+
stream = user_speech_service.stream(prompt)
|
| 35 |
+
processor.add_audio_stream(stream)
|
|
|
|
| 36 |
|
| 37 |
+
response = chat_service.chat(prompt)
|
| 38 |
+
print ("")
|
| 39 |
+
print (f'response: "{response}"')
|
| 40 |
+
stream = ai_speech_service.stream(response)
|
| 41 |
+
processor.add_audio_stream(stream)
|
| 42 |
|
| 43 |
+
processor.close()
|
| 44 |
+
print ("Chat success")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
if __name__ == '__main__':
|
| 48 |
+
run_debug_code()
|
speech_service.py
CHANGED
|
@@ -46,3 +46,12 @@ class SpeechService:
|
|
| 46 |
play(audio)
|
| 47 |
return
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
play(audio)
|
| 47 |
return
|
| 48 |
|
| 49 |
+
def stream(self, prompt):
|
| 50 |
+
audio_stream = generate(
|
| 51 |
+
text=prompt,
|
| 52 |
+
voice=self._voice_id,
|
| 53 |
+
model=self._model_id,
|
| 54 |
+
stream=True
|
| 55 |
+
)
|
| 56 |
+
return audio_stream
|
| 57 |
+
|