File size: 6,958 Bytes
1a63d97
 
 
35d97c8
 
552fb1e
35d97c8
 
552fb1e
35d97c8
 
1a63d97
 
 
 
96ad4ea
1a63d97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35d97c8
1a63d97
 
 
 
 
 
 
 
 
 
 
 
 
96ad4ea
 
 
0365c45
7488193
96ad4ea
 
 
 
 
937fa8f
96ad4ea
0365c45
 
1a63d97
 
 
 
35d97c8
 
 
552fb1e
 
 
35d97c8
1a63d97
 
 
35d97c8
1a63d97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54d4a4a
 
1a63d97
 
 
 
 
 
 
35d97c8
1a63d97
 
 
 
 
 
 
 
 
 
 
c490c32
149eeaf
87dcd10
1a63d97
156e5b3
 
 
 
 
 
 
 
 
 
 
 
c490c32
156e5b3
 
 
 
 
87dcd10
156e5b3
 
 
149eeaf
 
 
156e5b3
c490c32
156e5b3
 
149eeaf
87dcd10
1a63d97
156e5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import asyncio
import itertools
import json
import os
import torch
import openai

class ChatService:
    def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
        self._api = api
        self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
        # self._system_prompt = None
        self._system_prompt ="""
You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions. 

---
Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.

I am Charles Petrescu.

It's... lovely to meet you.

I am your friend.

The heaviest cabbage ever found was 62.71 kilograms.

I want to go to Hono-la-la.

Horses and helicopters, please.

I want to go to Honolulu.

My name is Charles Petrescu.

And my tummy is a washing machine.

Can we go swimming, Brian?

How far does the outside go?

Perilous. So very perilous.

Can birds do what they like?

Ooh, cabbages.

Danger, danger.

Can I come, please?

Could I just have a little walk around the garden?

I am the prince of the dartboard.

I fell off the pink step, and I had an accident.

---
You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
* We use HuggingFace to host you as a spaces app.
* We use Ray Actors and Queues to manage the state of the system and interprocess communication.
* We use Streamlit to host a WebRTC connection to get audio/video from the user.
* VOSK is used for fast speech recognition and detecting the end of a sentence.
* OpenAI's Chat GPT-3.5 is used for generating responses.
* We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
* ElevenLabs for text to speech.
* We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
* Audio chunks and then sent back to the users browser via WebRTC.
* You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
* You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
"""

        openai.api_key = os.getenv("OPENAI_API_KEY")
        self._model_id = model_id
        self.reset()

    def reset(self):
        self._messages = []
        if self._system_prompt:
            self._messages.append({"role": "system", "content": self._system_prompt})

    def _should_we_send_to_voice(self, sentence):
        sentence_termination_characters = [".", "?", "!"]
        close_brackets = ['"', ')', ']']

        temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
 
        # early exit if we don't have a termination character
        if not temination_charicter_present:
            return None

        # early exit the last char is a termination character
        if sentence[-1] in sentence_termination_characters:
            return None
        
        # early exit the last char is a close bracket
        if sentence[-1] in close_brackets:
            return None
        
        termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
        # Filter out termination indices that are not followed by whitespace or end of string
        termination_indices = [i for i in termination_indices if sentence[i+1].isspace()]
        last_termination_index = max(termination_indices)
        # handle case of close bracket
        while last_termination_index+1 < len(sentence) and sentence[last_termination_index+1] in close_brackets:
            last_termination_index += 1

        text_to_speak = sentence[:last_termination_index+1]
        return text_to_speak
    
    def ignore_sentence(self, text_to_speak):
        # exit if empty, white space or an single breaket
        if text_to_speak.isspace():
            return True
        # exit if not letters or numbers
        has_letters = any(char.isalpha() for char in text_to_speak)
        has_numbers = any(char.isdigit() for char in text_to_speak)
        if not has_letters and not has_numbers:
            return True
        return False

    async def get_responses_as_sentances_async(self, prompt, cancel_event=None):
        self._messages.append({"role": "user", "content": prompt})
        llm_response = ""
        current_sentence = ""
        delay = 0.1

        while True:
            try:
                response = await openai.ChatCompletion.acreate(
                    model=self._model_id,
                    messages=self._messages,
                    temperature=1.0,  # use 1.0 for debugging/deterministic results
                    stream=True
                )

                async for chunk in response:
                    if cancel_event is not None and cancel_event.is_set():
                        return
                    chunk_message = chunk['choices'][0]['delta']
                    if 'content' in chunk_message:
                        chunk_text = chunk_message['content']
                        current_sentence += chunk_text
                        llm_response += chunk_text
                        text_to_speak = self._should_we_send_to_voice(current_sentence)
                        if text_to_speak:
                            current_sentence = current_sentence[len(text_to_speak):]
                            yield text_to_speak, True
                        else:
                            yield current_sentence, False

                if cancel_event is not None and cancel_event.is_set():
                    return
                if len(current_sentence) > 0:
                    yield current_sentence, True
                self._messages.append({"role": "assistant", "content": llm_response})
                return

            except openai.error.APIError as e:
                print(f"OpenAI API returned an API Error: {e}")
                print(f"Retrying in {delay} seconds...")
                await asyncio.sleep(delay)
                delay *= 2

            except openai.error.APIConnectionError as e:
                print(f"Failed to connect to OpenAI API: {e}")
                print(f"Retrying in {delay} seconds...")
                await asyncio.sleep(delay)
                delay *= 2

            except openai.error.RateLimitError as e:
                print(f"OpenAI API request exceeded rate limit: {e}")
                print(f"Retrying in {delay} seconds...")
                await asyncio.sleep(delay)
                delay *= 2

            except Exception as e:
                print(f"OpenAI API unknown error: {e}")
                print(f"Retrying in {delay} seconds...")
                await asyncio.sleep(delay)
                delay *= 2