Spaces:
Runtime error
Runtime error
streaming speech in the debug
Browse files- audio_stream_processor.py +42 -0
- chat_service.py +1 -1
- debug.py +35 -22
- speech_service.py +9 -0
audio_stream_processor.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
from threading import Thread
|
3 |
+
from queue import Queue
|
4 |
+
from typing import Iterator
|
5 |
+
|
6 |
+
class AudioStreamProcessor:
|
7 |
+
def __init__(self):
|
8 |
+
self.queue = Queue()
|
9 |
+
self.thread = Thread(target=self._process_audio_streams)
|
10 |
+
self.thread.start()
|
11 |
+
|
12 |
+
def add_audio_stream(self, audio_stream: Iterator[bytes]):
|
13 |
+
self.queue.put(audio_stream)
|
14 |
+
|
15 |
+
def _process_audio_streams(self):
|
16 |
+
while True:
|
17 |
+
audio_stream = self.queue.get()
|
18 |
+
if audio_stream is None: # We'll use None as a sentinel to mark the end
|
19 |
+
break
|
20 |
+
self._stream(audio_stream)
|
21 |
+
|
22 |
+
def _stream(self, audio_stream: Iterator[bytes]):
|
23 |
+
mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
|
24 |
+
mpv_process = subprocess.Popen(
|
25 |
+
mpv_command,
|
26 |
+
stdin=subprocess.PIPE,
|
27 |
+
stdout=subprocess.DEVNULL,
|
28 |
+
stderr=subprocess.DEVNULL,
|
29 |
+
)
|
30 |
+
|
31 |
+
for chunk in audio_stream:
|
32 |
+
if chunk is not None:
|
33 |
+
mpv_process.stdin.write(chunk)
|
34 |
+
mpv_process.stdin.flush()
|
35 |
+
|
36 |
+
if mpv_process.stdin:
|
37 |
+
mpv_process.stdin.close()
|
38 |
+
mpv_process.wait()
|
39 |
+
|
40 |
+
def close(self):
|
41 |
+
self.queue.put(None) # Signal the processing thread to terminate
|
42 |
+
self.thread.join()
|
chat_service.py
CHANGED
@@ -55,7 +55,7 @@ class ChatService:
|
|
55 |
max_new_tokens=200,
|
56 |
do_sample=True,
|
57 |
top_k=40,
|
58 |
-
temperature=1.0,
|
59 |
pad_token_id=self._tokenizer.eos_token_id,
|
60 |
)
|
61 |
agent_response = self._tokenizer.decode(outputs[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])
|
|
|
55 |
max_new_tokens=200,
|
56 |
do_sample=True,
|
57 |
top_k=40,
|
58 |
+
temperature=1.0, # use 1.0 for debugging/deteministic results
|
59 |
pad_token_id=self._tokenizer.eos_token_id,
|
60 |
)
|
61 |
agent_response = self._tokenizer.decode(outputs[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])
|
debug.py
CHANGED
@@ -2,34 +2,47 @@ from clip_transform import CLIPTransform
|
|
2 |
from chat_service import ChatService
|
3 |
from dotenv import load_dotenv
|
4 |
from speech_service import SpeechService
|
|
|
|
|
5 |
|
6 |
-
load_dotenv()
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
# print ("CLIP success")
|
11 |
|
12 |
-
print ("Initializing
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
|
18 |
-
user_speech_service
|
|
|
|
|
19 |
|
|
|
20 |
|
21 |
-
prompts = [
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
]
|
27 |
-
for prompt in prompts:
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
ai_speech_service.speak(response)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from chat_service import ChatService
|
3 |
from dotenv import load_dotenv
|
4 |
from speech_service import SpeechService
|
5 |
+
from concurrent.futures import ThreadPoolExecutor
|
6 |
+
from audio_stream_processor import AudioStreamProcessor
|
7 |
|
|
|
8 |
|
9 |
+
def run_debug_code():
|
10 |
+
load_dotenv()
|
|
|
11 |
|
12 |
+
# print ("Initializing CLIP templates")
|
13 |
+
# clip_transform = CLIPTransform()
|
14 |
+
# print ("CLIP success")
|
15 |
|
16 |
+
print ("Initializing Chat")
|
17 |
+
chat_service = ChatService()
|
18 |
|
19 |
+
user_speech_service = SpeechService(voice_id="Adam")
|
20 |
+
ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
21 |
+
processor = AudioStreamProcessor()
|
22 |
|
23 |
+
# user_speech_service.print_voices() # if you want to see your custom voices
|
24 |
|
25 |
+
prompts = [
|
26 |
+
"hello, how are you today?",
|
27 |
+
"tell me about your shadow self?",
|
28 |
+
"hmm, interesting, tell me more about that.",
|
29 |
+
"wait, that is so interesting, what else?",
|
30 |
+
]
|
31 |
+
for prompt in prompts:
|
32 |
+
print ("")
|
33 |
+
print (f'prompt: "{prompt}"')
|
34 |
+
stream = user_speech_service.stream(prompt)
|
35 |
+
processor.add_audio_stream(stream)
|
|
|
36 |
|
37 |
+
response = chat_service.chat(prompt)
|
38 |
+
print ("")
|
39 |
+
print (f'response: "{response}"')
|
40 |
+
stream = ai_speech_service.stream(response)
|
41 |
+
processor.add_audio_stream(stream)
|
42 |
|
43 |
+
processor.close()
|
44 |
+
print ("Chat success")
|
45 |
+
|
46 |
+
|
47 |
+
if __name__ == '__main__':
|
48 |
+
run_debug_code()
|
speech_service.py
CHANGED
@@ -46,3 +46,12 @@ class SpeechService:
|
|
46 |
play(audio)
|
47 |
return
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
play(audio)
|
47 |
return
|
48 |
|
49 |
+
def stream(self, prompt):
|
50 |
+
audio_stream = generate(
|
51 |
+
text=prompt,
|
52 |
+
voice=self._voice_id,
|
53 |
+
model=self._model_id,
|
54 |
+
stream=True
|
55 |
+
)
|
56 |
+
return audio_stream
|
57 |
+
|