File size: 2,447 Bytes
59cd50e
8683458
d086a4b
 
6f6ee84
 
 
 
624e4ac
6f6ee84
 
 
95a9653
d62b28a
95a9653
6f6ee84
 
 
 
905de4d
59cd50e
905de4d
17a1355
4e0ef54
d086a4b
95a9653
 
 
59cd50e
10e7b48
b2daea4
95a9653
10e7b48
59cd50e
10e7b48
 
 
59cd50e
10e7b48
 
 
 
d086a4b
78c9ab0
6f6ee84
23830c1
6f6ee84
 
 
 
 
 
 
f41a51e
7132773
10e7b48
d086a4b
10e7b48
7132773
10e7b48
d62b28a
59cd50e
d62b28a
 
 
ababde8
d62b28a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import openai
import os
import azure.cognitiveservices.speech as speechsdk

import csv
from huggingface_hub import Repository

DATASET_REPO_URL = "https://huggingface.co/datasets/FranklinWillemen/persistent-space-dataset"
DATA_FILENAME = "data.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")

LANGUAGE = "nl"
AUDIO_FILE_NAME = "audio_response.wav"

repo = Repository(
    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)

openai.api_key = os.environ['OPEN_AI_KEY']

speech_config = speechsdk.SpeechConfig(subscription=os.environ['AZURE_SPEECH_KEY'], region="westeurope")
speech_config.speech_synthesis_voice_name = "nl-NL-ColetteNeural"
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)

context = [{"role": "system", "content": 'Je bent een slimme en behulpzame gesprekspartner. \
                                          Antwoord beknopt en ter zake.\
                                          Vermeld niet dat je een AI of een soort service bent.'}]

def transcribe(model: str, audio: str):
    audio_file = open(audio, "rb")
    transcript = openai.Audio.transcribe(model, audio_file, language=LANGUAGE)
    return transcript

def gen_response(model: str, context: list):
    response = openai.ChatCompletion.create(model=model, messages=context)
    return response["choices"][0]["message"]

def gen_voice(response, response_filename):
    reponse_audio = speech_synthesizer.speak_text_async(response['content']).get()
    stream = speechsdk.AudioDataStream(reponse_audio)
    stream.save_to_wav_file(response_filename)

def memory():
    with open(DATA_FILE, "a") as csvfile:
        for message in context:
            writer = csv.DictWriter(csvfile, fieldnames=["name", "message"])
            writer.writerow(
                {"name": message['role'], "message": message['content']}
            )
        commit_url = repo.push_to_hub()
        print(commit_url)
    
def respond(audio:str):
    transcript = transcribe("whisper-1", audio)
    context.append({"role": "user", "content": transcript['text']})

    response = gen_response("gpt-3.5-turbo", context)
    context.append(response)
    
    gen_voice(response, AUDIO_FILE_NAME)

    transcript = ""
    for m in context:
        if m["role"] != "system":
            transcript += m["role"] + " : " + m["content"] + "\n\n"

    return AUDIO_FILE_NAME, transcript