Spaces:
Sleeping
Sleeping
created chat_pipeline
Browse files- audio_stream_processor.py +14 -9
- chat_pipeline.py +96 -0
- debug.py +24 -1
- pipeline.py +23 -17
- speech_service.py +2 -1
- streaming_chat_service.py +53 -2
audio_stream_processor.py
CHANGED
@@ -10,6 +10,7 @@ class AudioStreamProcessor:
|
|
10 |
self.queue = Queue()
|
11 |
self._is_running = threading.Event()
|
12 |
self._is_running.set()
|
|
|
13 |
self.thread = Thread(target=self._process_audio_streams)
|
14 |
self.thread.start()
|
15 |
|
@@ -18,30 +19,34 @@ class AudioStreamProcessor:
|
|
18 |
self.queue.put(audio_stream)
|
19 |
|
20 |
def _process_audio_streams(self):
|
|
|
21 |
while self._is_running.is_set() or not self.queue.empty():
|
22 |
try:
|
23 |
audio_stream = self.queue.get(timeout=1)
|
24 |
except Empty:
|
25 |
continue
|
26 |
self._stream(audio_stream)
|
|
|
27 |
|
28 |
def _stream(self, audio_stream: Iterator[bytes]):
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
|
30 |
-
mpv_process = subprocess.Popen(
|
31 |
mpv_command,
|
32 |
stdin=subprocess.PIPE,
|
33 |
stdout=subprocess.DEVNULL,
|
34 |
stderr=subprocess.DEVNULL,
|
35 |
)
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
if mpv_process.stdin:
|
43 |
-
mpv_process.stdin.close()
|
44 |
-
mpv_process.wait()
|
45 |
|
46 |
def close(self):
|
47 |
self._is_running.clear()
|
|
|
10 |
self.queue = Queue()
|
11 |
self._is_running = threading.Event()
|
12 |
self._is_running.set()
|
13 |
+
self.mpv_process = None
|
14 |
self.thread = Thread(target=self._process_audio_streams)
|
15 |
self.thread.start()
|
16 |
|
|
|
19 |
self.queue.put(audio_stream)
|
20 |
|
21 |
def _process_audio_streams(self):
|
22 |
+
self._start_mpv()
|
23 |
while self._is_running.is_set() or not self.queue.empty():
|
24 |
try:
|
25 |
audio_stream = self.queue.get(timeout=1)
|
26 |
except Empty:
|
27 |
continue
|
28 |
self._stream(audio_stream)
|
29 |
+
self._close_mpv()
|
30 |
|
31 |
def _stream(self, audio_stream: Iterator[bytes]):
|
32 |
+
for chunk in audio_stream:
|
33 |
+
if chunk is not None:
|
34 |
+
self.mpv_process.stdin.write(chunk)
|
35 |
+
self.mpv_process.stdin.flush()
|
36 |
+
|
37 |
+
def _start_mpv(self):
|
38 |
mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
|
39 |
+
self.mpv_process = subprocess.Popen(
|
40 |
mpv_command,
|
41 |
stdin=subprocess.PIPE,
|
42 |
stdout=subprocess.DEVNULL,
|
43 |
stderr=subprocess.DEVNULL,
|
44 |
)
|
45 |
|
46 |
+
def _close_mpv(self):
|
47 |
+
if self.mpv_process.stdin:
|
48 |
+
self.mpv_process.stdin.close()
|
49 |
+
self.mpv_process.wait()
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def close(self):
|
52 |
self._is_running.clear()
|
chat_pipeline.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import time
|
3 |
+
from clip_transform import CLIPTransform
|
4 |
+
from chat_service import ChatService
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from speech_service import SpeechService
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
+
from audio_stream_processor import AudioStreamProcessor
|
9 |
+
from streaming_chat_service import StreamingChatService
|
10 |
+
from pipeline import Pipeline, Node, Job
|
11 |
+
from typing import List
|
12 |
+
|
13 |
+
class ChatJob(Job):
|
14 |
+
def __init__(self, data, chat_service: ChatService):
|
15 |
+
super().__init__(data)
|
16 |
+
self.chat_service = chat_service
|
17 |
+
|
18 |
+
class Node1(Node):
|
19 |
+
next_id = 0
|
20 |
+
|
21 |
+
async def process_job(self, job: ChatJob):
|
22 |
+
# input job.data is the input string
|
23 |
+
# output job.data is the next sentance
|
24 |
+
async for sentence in job.chat_service.get_responses_as_sentances_async(job.data):
|
25 |
+
if job.chat_service.ignore_sentence(sentence):
|
26 |
+
continue
|
27 |
+
print(f"{sentence}")
|
28 |
+
new_job = ChatJob(sentence, job.chat_service)
|
29 |
+
new_job.id = self.next_id
|
30 |
+
self.next_id += 1
|
31 |
+
yield new_job
|
32 |
+
|
33 |
+
class Node2(Node):
|
34 |
+
next_id = 0
|
35 |
+
|
36 |
+
async def process_job(self, job: ChatJob):
|
37 |
+
# input job.data is the sentance
|
38 |
+
# output job.data is the streamed speech bytes
|
39 |
+
async for chunk in job.chat_service.get_speech_chunks_async(job.data):
|
40 |
+
new_job = ChatJob(chunk, job.chat_service)
|
41 |
+
new_job.id = self.next_id
|
42 |
+
self.next_id += 1
|
43 |
+
yield new_job
|
44 |
+
|
45 |
+
|
46 |
+
class Node3(Node):
|
47 |
+
# sync_size = 64
|
48 |
+
# sync = []
|
49 |
+
|
50 |
+
async def process_job(self, job: ChatJob):
|
51 |
+
# input job.data is the streamed speech bytes
|
52 |
+
# Node3.sync.append(job.data)
|
53 |
+
job.chat_service.enqueue_speech_bytes_to_play([job.data])
|
54 |
+
yield job
|
55 |
+
# if len(Node3.sync) >= Node3.sync_size:
|
56 |
+
# audio_chunks = Node3.sync[:Node3.sync_size]
|
57 |
+
# Node3.sync = Node3.sync[Node3.sync_size:]
|
58 |
+
# job.chat_service.enqueue_speech_bytes_to_play(audio_chunks)
|
59 |
+
# yield job
|
60 |
+
|
61 |
+
class ChatPipeline():
|
62 |
+
def __init__(self):
|
63 |
+
load_dotenv()
|
64 |
+
self.pipeline = Pipeline()
|
65 |
+
self.audio_processor = AudioStreamProcessor()
|
66 |
+
self.chat_service = StreamingChatService(self.audio_processor, voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
67 |
+
|
68 |
+
def __enter__(self):
|
69 |
+
return self
|
70 |
+
|
71 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
72 |
+
self.audio_processor.close()
|
73 |
+
self.audio_processor = None
|
74 |
+
|
75 |
+
def __del__(self):
|
76 |
+
if self.audio_processor:
|
77 |
+
self.audio_processor.close()
|
78 |
+
self.audio_processor = None
|
79 |
+
|
80 |
+
async def start(self):
|
81 |
+
self.node1_queue = asyncio.Queue()
|
82 |
+
self.node2_queue = asyncio.Queue()
|
83 |
+
self.node3_queue = asyncio.Queue()
|
84 |
+
self.sync = []
|
85 |
+
await self.pipeline.add_node(Node1, 1, self.node1_queue, self.node2_queue, sequential_node=True)
|
86 |
+
await self.pipeline.add_node(Node2, 1, self.node2_queue, self.node3_queue, sequential_node=True)
|
87 |
+
await self.pipeline.add_node(Node3, 1, self.node3_queue, None, sequential_node=True)
|
88 |
+
|
89 |
+
async def enqueue(self, prompt):
|
90 |
+
job = ChatJob(prompt, self.chat_service)
|
91 |
+
await self.pipeline.enqueue_job(job)
|
92 |
+
|
93 |
+
async def wait_until_all_jobs_idle(self):
|
94 |
+
# TODO - implement this
|
95 |
+
while True:
|
96 |
+
await asyncio.sleep(0.1)
|
debug.py
CHANGED
@@ -1,4 +1,6 @@
|
|
|
|
1 |
import time
|
|
|
2 |
from clip_transform import CLIPTransform
|
3 |
from chat_service import ChatService
|
4 |
from dotenv import load_dotenv
|
@@ -125,8 +127,29 @@ def run_debug_code():
|
|
125 |
audio_processor.close()
|
126 |
print ("Chat success")
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
if __name__ == '__main__':
|
130 |
# time_sentance_lenghts()
|
131 |
# test_sentance_lenghts()
|
132 |
-
run_debug_code()
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
import time
|
3 |
+
from chat_pipeline import ChatPipeline
|
4 |
from clip_transform import CLIPTransform
|
5 |
from chat_service import ChatService
|
6 |
from dotenv import load_dotenv
|
|
|
127 |
audio_processor.close()
|
128 |
print ("Chat success")
|
129 |
|
130 |
+
async def run_pipeline():
|
131 |
+
load_dotenv()
|
132 |
+
|
133 |
+
try:
|
134 |
+
chat_pipeline = ChatPipeline()
|
135 |
+
await chat_pipeline.start()
|
136 |
+
prompts = [
|
137 |
+
"hello, how are you today?",
|
138 |
+
"tell me about your shadow self?",
|
139 |
+
"hmm, interesting, tell me more about that.",
|
140 |
+
"wait, that is so interesting, what else?",
|
141 |
+
]
|
142 |
+
for prompt in prompts:
|
143 |
+
await chat_pipeline.enqueue(prompt)
|
144 |
+
await chat_pipeline.wait_until_all_jobs_idle()
|
145 |
+
except KeyboardInterrupt:
|
146 |
+
print("Pipeline interrupted by user")
|
147 |
+
except Exception as e:
|
148 |
+
print(f"An error occurred: {e}")
|
149 |
|
150 |
if __name__ == '__main__':
|
151 |
# time_sentance_lenghts()
|
152 |
# test_sentance_lenghts()
|
153 |
+
# run_debug_code()
|
154 |
+
asyncio.run(run_pipeline())
|
155 |
+
|
pipeline.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import asyncio
|
|
|
2 |
|
3 |
class Job:
|
4 |
def __init__(self, data):
|
@@ -23,28 +24,33 @@ class Node:
|
|
23 |
raise ValueError('job_sync is not None and sequential_node is False')
|
24 |
|
25 |
async def run(self):
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
if self.output_queue is not None:
|
32 |
-
await self.output_queue.put(job)
|
33 |
-
if self.job_sync is not None:
|
34 |
-
self.job_sync.append(job)
|
35 |
-
self._jobs_processed += 1
|
36 |
-
else:
|
37 |
-
# ensure that jobs are processed in order
|
38 |
-
self.buffer[job.id] = job
|
39 |
-
while self.next_i in self.buffer:
|
40 |
-
job = self.buffer.pop(self.next_i)
|
41 |
async for job in self.process_job(job):
|
42 |
if self.output_queue is not None:
|
43 |
await self.output_queue.put(job)
|
44 |
if self.job_sync is not None:
|
45 |
self.job_sync.append(job)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
self._jobs_processed += 1
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
|
49 |
async def process_job(self, job: Job):
|
50 |
raise NotImplementedError()
|
@@ -79,7 +85,7 @@ class Pipeline:
|
|
79 |
self.nodes.append(node_name)
|
80 |
|
81 |
# if input_queue is None then this is the root node
|
82 |
-
if len(self.input_queues)
|
83 |
self.root_queue = input_queue
|
84 |
|
85 |
self.input_queues.append(input_queue)
|
|
|
1 |
import asyncio
|
2 |
+
import traceback
|
3 |
|
4 |
class Job:
|
5 |
def __init__(self, data):
|
|
|
24 |
raise ValueError('job_sync is not None and sequential_node is False')
|
25 |
|
26 |
async def run(self):
|
27 |
+
try:
|
28 |
+
while True:
|
29 |
+
job: Job = await self.input_queue.get()
|
30 |
+
self._jobs_dequeued += 1
|
31 |
+
if self.sequential_node == False:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
async for job in self.process_job(job):
|
33 |
if self.output_queue is not None:
|
34 |
await self.output_queue.put(job)
|
35 |
if self.job_sync is not None:
|
36 |
self.job_sync.append(job)
|
37 |
+
self._jobs_processed += 1
|
38 |
+
else:
|
39 |
+
# ensure that jobs are processed in order
|
40 |
+
self.buffer[job.id] = job
|
41 |
+
while self.next_i in self.buffer:
|
42 |
+
job = self.buffer.pop(self.next_i)
|
43 |
+
async for job in self.process_job(job):
|
44 |
+
if self.output_queue is not None:
|
45 |
+
await self.output_queue.put(job)
|
46 |
+
if self.job_sync is not None:
|
47 |
+
self.job_sync.append(job)
|
48 |
self._jobs_processed += 1
|
49 |
+
self.next_i += 1
|
50 |
+
except Exception as e:
|
51 |
+
print(f"An error occurred in node: {self.__class__.__name__} worker: {self.worker_id}: {e}")
|
52 |
+
traceback.print_exc()
|
53 |
+
raise # Re-raises the last exception.
|
54 |
|
55 |
async def process_job(self, job: Job):
|
56 |
raise NotImplementedError()
|
|
|
85 |
self.nodes.append(node_name)
|
86 |
|
87 |
# if input_queue is None then this is the root node
|
88 |
+
if len(self.input_queues) == 0:
|
89 |
self.root_queue = input_queue
|
90 |
|
91 |
self.input_queues.append(input_queue)
|
speech_service.py
CHANGED
@@ -38,7 +38,8 @@ class SpeechService:
|
|
38 |
text=prompt,
|
39 |
voice=self._voice_id,
|
40 |
model=self._model_id,
|
41 |
-
|
|
|
42 |
)
|
43 |
return audio_stream
|
44 |
|
|
|
38 |
text=prompt,
|
39 |
voice=self._voice_id,
|
40 |
model=self._model_id,
|
41 |
+
stream_chunk_size=2048,
|
42 |
+
stream=True,
|
43 |
)
|
44 |
return audio_stream
|
45 |
|
streaming_chat_service.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import json
|
2 |
import os
|
3 |
import torch
|
@@ -51,14 +53,19 @@ class StreamingChatService:
|
|
51 |
text_to_speak = sentence[:last_termination_index+1]
|
52 |
return text_to_speak
|
53 |
|
54 |
-
def
|
55 |
# exit if empty, white space or an single breaket
|
56 |
if text_to_speak.isspace():
|
57 |
-
return
|
58 |
# exit if not letters or numbers
|
59 |
has_letters = any(char.isalpha() for char in text_to_speak)
|
60 |
has_numbers = any(char.isdigit() for char in text_to_speak)
|
61 |
if not has_letters and not has_numbers:
|
|
|
|
|
|
|
|
|
|
|
62 |
return
|
63 |
stream = self._speech_service.stream(text_to_speak)
|
64 |
self._audio_processor.add_audio_stream(stream)
|
@@ -93,3 +100,47 @@ class StreamingChatService:
|
|
93 |
print(current_sentence)
|
94 |
self._messages.append({"role": "assistant", "content": agent_response})
|
95 |
return agent_response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import itertools
|
3 |
import json
|
4 |
import os
|
5 |
import torch
|
|
|
53 |
text_to_speak = sentence[:last_termination_index+1]
|
54 |
return text_to_speak
|
55 |
|
56 |
+
def ignore_sentence(self, text_to_speak):
|
57 |
# exit if empty, white space or an single breaket
|
58 |
if text_to_speak.isspace():
|
59 |
+
return True
|
60 |
# exit if not letters or numbers
|
61 |
has_letters = any(char.isalpha() for char in text_to_speak)
|
62 |
has_numbers = any(char.isdigit() for char in text_to_speak)
|
63 |
if not has_letters and not has_numbers:
|
64 |
+
return True
|
65 |
+
return False
|
66 |
+
|
67 |
+
def _safe_enqueue_text_to_speak(self, text_to_speak):
|
68 |
+
if self.ignore_sentence(text_to_speak):
|
69 |
return
|
70 |
stream = self._speech_service.stream(text_to_speak)
|
71 |
self._audio_processor.add_audio_stream(stream)
|
|
|
100 |
print(current_sentence)
|
101 |
self._messages.append({"role": "assistant", "content": agent_response})
|
102 |
return agent_response
|
103 |
+
|
104 |
+
async def get_responses_as_sentances_async(self, prompt):
|
105 |
+
self._messages.append({"role": "user", "content": prompt})
|
106 |
+
agent_response = ""
|
107 |
+
current_sentence = ""
|
108 |
+
|
109 |
+
response = await openai.ChatCompletion.acreate(
|
110 |
+
model=self._model_id,
|
111 |
+
messages=self._messages,
|
112 |
+
temperature=1.0, # use 1.0 for debugging/deterministic results
|
113 |
+
stream=True
|
114 |
+
)
|
115 |
+
|
116 |
+
async for chunk in response:
|
117 |
+
chunk_message = chunk['choices'][0]['delta']
|
118 |
+
if 'content' in chunk_message:
|
119 |
+
chunk_text = chunk_message['content']
|
120 |
+
current_sentence += chunk_text
|
121 |
+
agent_response += chunk_text
|
122 |
+
text_to_speak = self._should_we_send_to_voice(current_sentence)
|
123 |
+
if text_to_speak:
|
124 |
+
yield text_to_speak
|
125 |
+
current_sentence = current_sentence[len(text_to_speak):]
|
126 |
+
|
127 |
+
if len(current_sentence) > 0:
|
128 |
+
yield current_sentence
|
129 |
+
self._messages.append({"role": "assistant", "content": agent_response})
|
130 |
+
|
131 |
+
async def get_speech_chunks_async(self, text_to_speak):
|
132 |
+
stream = self._speech_service.stream(text_to_speak)
|
133 |
+
stream, stream_backup = itertools.tee(stream)
|
134 |
+
while True:
|
135 |
+
# Check if there's a next item in the stream
|
136 |
+
next_item = next(stream_backup, None)
|
137 |
+
if next_item is None:
|
138 |
+
# Stream is exhausted, exit the loop
|
139 |
+
break
|
140 |
+
|
141 |
+
# Run next(stream) in a separate thread to avoid blocking the event loop
|
142 |
+
chunk = await asyncio.to_thread(next, stream)
|
143 |
+
yield chunk
|
144 |
+
|
145 |
+
def enqueue_speech_bytes_to_play(self, speech_bytes):
|
146 |
+
self._audio_processor.add_audio_stream(speech_bytes)
|