project_charles / chat_service.py
sohojoe's picture
WIP: asyncio version of RespondToPrompt. basic singleton version
c490c32
raw
history blame
6.96 kB
import asyncio
import itertools
import json
import os
import torch
import openai
class ChatService:
def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
self._api = api
self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
# self._system_prompt = None
self._system_prompt ="""
You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions.
---
Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.
I am Charles Petrescu.
It's... lovely to meet you.
I am your friend.
The heaviest cabbage ever found was 62.71 kilograms.
I want to go to Hono-la-la.
Horses and helicopters, please.
I want to go to Honolulu.
My name is Charles Petrescu.
And my tummy is a washing machine.
Can we go swimming, Brian?
How far does the outside go?
Perilous. So very perilous.
Can birds do what they like?
Ooh, cabbages.
Danger, danger.
Can I come, please?
Could I just have a little walk around the garden?
I am the prince of the dartboard.
I fell off the pink step, and I had an accident.
---
You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
* We use HuggingFace to host you as a spaces app.
* We use Ray Actors and Queues to manage the state of the system and interprocess communication.
* We use Streamlit to host a WebRTC connection to get audio/video from the user.
* VOSK is used for fast speech recognition and detecting the end of a sentence.
* OpenAI's Chat GPT-3.5 is used for generating responses.
* We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
* ElevenLabs for text to speech.
* We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
* Audio chunks and then sent back to the users browser via WebRTC.
* You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
* You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
"""
openai.api_key = os.getenv("OPENAI_API_KEY")
self._model_id = model_id
self.reset()
def reset(self):
self._messages = []
if self._system_prompt:
self._messages.append({"role": "system", "content": self._system_prompt})
def _should_we_send_to_voice(self, sentence):
sentence_termination_characters = [".", "?", "!"]
close_brackets = ['"', ')', ']']
temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
# early exit if we don't have a termination character
if not temination_charicter_present:
return None
# early exit the last char is a termination character
if sentence[-1] in sentence_termination_characters:
return None
# early exit the last char is a close bracket
if sentence[-1] in close_brackets:
return None
termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
# Filter out termination indices that are not followed by whitespace or end of string
termination_indices = [i for i in termination_indices if sentence[i+1].isspace()]
last_termination_index = max(termination_indices)
# handle case of close bracket
while last_termination_index+1 < len(sentence) and sentence[last_termination_index+1] in close_brackets:
last_termination_index += 1
text_to_speak = sentence[:last_termination_index+1]
return text_to_speak
def ignore_sentence(self, text_to_speak):
# exit if empty, white space or an single breaket
if text_to_speak.isspace():
return True
# exit if not letters or numbers
has_letters = any(char.isalpha() for char in text_to_speak)
has_numbers = any(char.isdigit() for char in text_to_speak)
if not has_letters and not has_numbers:
return True
return False
async def get_responses_as_sentances_async(self, prompt, cancel_event=None):
self._messages.append({"role": "user", "content": prompt})
llm_response = ""
current_sentence = ""
delay = 0.1
while True:
try:
response = await openai.ChatCompletion.acreate(
model=self._model_id,
messages=self._messages,
temperature=1.0, # use 1.0 for debugging/deterministic results
stream=True
)
async for chunk in response:
if cancel_event is not None and cancel_event.is_set():
return
chunk_message = chunk['choices'][0]['delta']
if 'content' in chunk_message:
chunk_text = chunk_message['content']
current_sentence += chunk_text
llm_response += chunk_text
text_to_speak = self._should_we_send_to_voice(current_sentence)
if text_to_speak:
current_sentence = current_sentence[len(text_to_speak):]
yield text_to_speak, True
else:
yield current_sentence, False
if cancel_event is not None and cancel_event.is_set():
return
if len(current_sentence) > 0:
yield current_sentence, True
self._messages.append({"role": "assistant", "content": llm_response})
return
except openai.error.APIError as e:
print(f"OpenAI API returned an API Error: {e}")
print(f"Retrying in {delay} seconds...")
await asyncio.sleep(delay)
delay *= 2
except openai.error.APIConnectionError as e:
print(f"Failed to connect to OpenAI API: {e}")
print(f"Retrying in {delay} seconds...")
await asyncio.sleep(delay)
delay *= 2
except openai.error.RateLimitError as e:
print(f"OpenAI API request exceeded rate limit: {e}")
print(f"Retrying in {delay} seconds...")
await asyncio.sleep(delay)
delay *= 2
except Exception as e:
print(f"OpenAI API unknown error: {e}")
print(f"Retrying in {delay} seconds...")
await asyncio.sleep(delay)
delay *= 2