dost.ai / app.py
Akmyradov's picture
Update app.py
97a252e
import os
import gradio as gr
import whisper
import openai
import tempfile
from neon_tts_plugin_coqui import CoquiTTS
model = whisper.load_model("small")
class Dost:
LANGUAGES = list(CoquiTTS.langs.keys())
coquiTTS = CoquiTTS()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
def __init__(self):
self.convHistory = []
self.voice = None
self.result = []
def recognize(self, audio):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
lang = max(probs, key=probs.get)
options = whisper.DecodingOptions(fp16 = False)
result = whisper.decode(model, mel, options)
print("-------------------RECOGNIZE---------------------")
print(result)
self.response(result.text, lang)
def response(self, prompt, lang):
response = openai.Completion.create(
model="text-davinci-002",
prompt=f"You: {prompt}Friend: ",
temperature=0.5,
max_tokens=60,
top_p=1.0,
frequency_penalty=0.5,
presence_penalty=0.0,
stop=["You:"]
)
choice = response['choices'][0]['text']
print("-------------------RESPONSE---------------------")
print(choice)
self.convHistory.append((prompt, choice))
self.result.append(self.convHistory)
print(self.convHistory[0])
print(type(self.convHistory[0]))
self.say(choice, lang)
def say(self, text, language):
coqui_langs = ['en' ,'es' ,'fr' ,'de' ,'pl' ,'uk' ,'ro' ,'hu' ,'bg' ,'nl' ,'fi' ,'sl' ,'lv' ,'ga']
if language not in coqui_langs:
language = 'en'
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
self.coquiTTS.get_tts(text, fp, speaker = {"language" : language})
print("-------------------AUDIOOUTPUT---------------------")
print("DONE", fp.name)
self.result.append(fp.name)
def start(self, audio, state):
self.convHistory = state
self.result = []
self.recognize(audio)
print(self.result)
return tuple(self.result)
dost = Dost()
with gr.Blocks() as demo:
state = gr.State([])
with gr.Row():
with gr.Column():
input_audio = gr.Audio(source="microphone", type="filepath")
btn = gr.Button("Submit")
conversation = gr.Chatbot(value=dost.convHistory)
output_audio = gr.Audio(label="AI voice response")
btn.click(dost.start, inputs=[input_audio, state], outputs=[conversation, output_audio])
demo.launch(debug=True)