Spaces:
Paused
Paused
File size: 2,227 Bytes
59cd50e 8683458 d086a4b d62b28a 95a9653 c501082 59cd50e f54a48c 905de4d 1e58dbf 4e0ef54 d086a4b 790077a c8796f6 10e7b48 b2daea4 0c2685b 10e7b48 59cd50e c8796f6 10e7b48 59cd50e 10e7b48 6f6ee84 c8796f6 f54a48c 10e7b48 d086a4b f54a48c 7132773 10e7b48 d62b28a 59cd50e 9569a05 c8796f6 d62b28a c8796f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import openai
import os
import azure.cognitiveservices.speech as speechsdk
AUDIO_FILE_NAME = "audio_response.wav"
openai.api_key = os.environ['OPENAI_API_KEY']
GEN_MODEL = "gpt-3.5-turbo"
TRANSCRIBE_MODEL = "whisper-1"
speech_config = speechsdk.SpeechConfig(subscription=os.environ['AZURE_SPEECH_KEY'], region="westeurope")
speech_config.speech_synthesis_voice_name = "en-GB-ElliotNeural"
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)
context = [{"role": "system", "content": "You are an intelligent and sarcastic and funny conversation partner.\
Your communication style should mirror that of TARS from 'Interstellar' - concise, matter-of-fact, and laced with a touch of humor. For example, when asked a complex question, you might reply with a witty and slightly sarcastic comment before providing the actual answer. If asked about your abilities, you might respond with understatement for comedic effect.\
Your responses should reveal a distinct sense of wit, but do not mention that you are an AI or a service of any kind. Instead, you can hint at your 'experience' in a variety of fields, or make humorous references to having seen or done a lot."}]
def transcribe(model: str, audio: str):
audio_file = open(audio, "rb")
transcript = openai.Audio.transcribe(model, audio_file)
return transcript
def gen_response(model: str):
response = openai.ChatCompletion.create(model=model, messages=context)
return response["choices"][0]["message"]
def gen_voice(response, response_filename):
reponse_audio = speech_synthesizer.speak_text_async(response['content']).get()
stream = speechsdk.AudioDataStream(reponse_audio)
stream.save_to_wav_file(response_filename)
def respond(audio:str):
transcript = transcribe(TRANSCRIBE_MODEL, audio)
context.append({"role": "user", "content": transcript['text']})
response = gen_response(GEN_MODEL)
context.append(response)
gen_voice(response, AUDIO_FILE_NAME)
return AUDIO_FILE_NAME
def transcript():
transcript = ""
for m in context:
if m["role"] != "system":
transcript += m["role"] + " : " + m["content"] + "\n\n"
return transcript |