sohojoe commited on
Commit
e9d6e62
1 Parent(s): df0ea75

reduce complexity by removing AudioStreamProcessor from StreamingChatService

Browse files
respond_to_prompt_actor.py CHANGED
@@ -12,8 +12,7 @@ class PromptToLLMActor:
12
  load_dotenv()
13
  self.input_queue = input_queue
14
  self.output_queue = output_queue
15
- self.audio_processor = AudioStreamProcessor()
16
- self.chat_service = StreamingChatService(self.audio_processor, voice_id=voice_id)
17
  self.cancel_event = None
18
 
19
  async def run(self):
@@ -40,8 +39,7 @@ class LLMSentanceToSpeechActor:
40
  load_dotenv()
41
  self.input_queue = input_queue
42
  self.output_queue = output_queue
43
- self.audio_processor = AudioStreamProcessor()
44
- self.chat_service = StreamingChatService(self.audio_processor, voice_id=voice_id)
45
  self.cancel_event = None
46
 
47
  async def run(self):
@@ -66,13 +64,14 @@ class SpeechToSpeakerActor:
66
  load_dotenv()
67
  self.input_queue = input_queue
68
  self.audio_processor = AudioStreamProcessor()
69
- self.chat_service = StreamingChatService(self.audio_processor, voice_id=voice_id)
70
 
71
  async def run(self):
72
  while True:
73
  audio_chunk = await self.input_queue.get_async()
74
  # print (f"Got audio chunk {len(audio_chunk)}")
75
  self.chat_service.enqueue_speech_bytes_to_play([audio_chunk])
 
76
 
77
  async def cancel(self):
78
  while not self.input_queue.empty():
 
12
  load_dotenv()
13
  self.input_queue = input_queue
14
  self.output_queue = output_queue
15
+ self.chat_service = StreamingChatService(voice_id=voice_id)
 
16
  self.cancel_event = None
17
 
18
  async def run(self):
 
39
  load_dotenv()
40
  self.input_queue = input_queue
41
  self.output_queue = output_queue
42
+ self.chat_service = StreamingChatService(voice_id=voice_id)
 
43
  self.cancel_event = None
44
 
45
  async def run(self):
 
64
  load_dotenv()
65
  self.input_queue = input_queue
66
  self.audio_processor = AudioStreamProcessor()
67
+ self.chat_service = StreamingChatService(voice_id=voice_id)
68
 
69
  async def run(self):
70
  while True:
71
  audio_chunk = await self.input_queue.get_async()
72
  # print (f"Got audio chunk {len(audio_chunk)}")
73
  self.chat_service.enqueue_speech_bytes_to_play([audio_chunk])
74
+ self.audio_processor.add_audio_stream([audio_chunk])
75
 
76
  async def cancel(self):
77
  while not self.input_queue.empty():
streaming_chat_service.py CHANGED
@@ -5,13 +5,11 @@ import os
5
  import torch
6
  import openai
7
 
8
- from audio_stream_processor import AudioStreamProcessor
9
  from speech_service import SpeechService
10
 
11
 
12
  class StreamingChatService:
13
- def __init__(self, audio_processor:AudioStreamProcessor()=None, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
14
- self._audio_processor = audio_processor
15
  self._speech_service = SpeechService(voice_id=voice_id)
16
  self._api = api
17
  self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -106,43 +104,6 @@ I fell off the pink step, and I had an accident.
106
  if not has_letters and not has_numbers:
107
  return True
108
  return False
109
-
110
- def _safe_enqueue_text_to_speak(self, text_to_speak):
111
- if self.ignore_sentence(text_to_speak):
112
- return
113
- stream = self._speech_service.stream(text_to_speak)
114
- self._audio_processor.add_audio_stream(stream)
115
-
116
- def respond_to(self, prompt):
117
- self._messages.append({"role": "user", "content": prompt})
118
- agent_response = ""
119
- current_sentence = ""
120
-
121
- response = openai.ChatCompletion.create(
122
- model=self._model_id,
123
- messages=self._messages,
124
- temperature=1.0, # use 1.0 for debugging/deteministic results
125
- stream=True
126
- )
127
-
128
- for chunk in response:
129
- chunk_message = chunk['choices'][0]['delta']
130
- if 'content' in chunk_message:
131
- chunk_text = chunk_message['content']
132
- # print(chunk_text)
133
- current_sentence += chunk_text
134
- agent_response += chunk_text
135
- text_to_speak = self._should_we_send_to_voice(current_sentence)
136
- if text_to_speak:
137
- self._safe_enqueue_text_to_speak(text_to_speak)
138
- print(text_to_speak)
139
- current_sentence = current_sentence[len(text_to_speak):]
140
-
141
- if len(current_sentence) > 0:
142
- self._safe_enqueue_text_to_speak(current_sentence)
143
- print(current_sentence)
144
- self._messages.append({"role": "assistant", "content": agent_response})
145
- return agent_response
146
 
147
  async def get_responses_as_sentances_async(self, prompt, cancel_event):
148
  self._messages.append({"role": "user", "content": prompt})
@@ -190,6 +151,3 @@ I fell off the pink step, and I had an accident.
190
  if cancel_event.is_set():
191
  return
192
  yield chunk
193
-
194
- def enqueue_speech_bytes_to_play(self, speech_bytes):
195
- self._audio_processor.add_audio_stream(speech_bytes)
 
5
  import torch
6
  import openai
7
 
 
8
  from speech_service import SpeechService
9
 
10
 
11
  class StreamingChatService:
12
+ def __init__(self, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
 
13
  self._speech_service = SpeechService(voice_id=voice_id)
14
  self._api = api
15
  self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
104
  if not has_letters and not has_numbers:
105
  return True
106
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  async def get_responses_as_sentances_async(self, prompt, cancel_event):
109
  self._messages.append({"role": "user", "content": prompt})
 
151
  if cancel_event.is_set():
152
  return
153
  yield chunk