Akmyradov commited on
Commit
3f492a3
1 Parent(s): 7913b34

Create app.py

Browse files

This is early version of Dost.ai application. It helps you practice your language skills. Currently only supports, whatever language you speak. Just kidding.

Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ import openai
5
+ import tempfile
6
+ from neon_tts_plugin_coqui import CoquiTTS
7
+
8
+ model = whisper.load_model("small")
9
+
10
+ class Dost:
11
+ LANGUAGES = list(CoquiTTS.langs.keys())
12
+ coquiTTS = CoquiTTS()
13
+ openai.api_key = "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"
14
+ def __init__(self):
15
+ self.convHistory = []
16
+ self.voice = None
17
+ self.result = []
18
+
19
+ def recognize(self, audio):
20
+ audio = whisper.load_audio(audio)
21
+ audio = whisper.pad_or_trim(audio)
22
+
23
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
24
+
25
+ _, probs = model.detect_language(mel)
26
+ lang = max(probs, key=probs.get)
27
+
28
+ options = whisper.DecodingOptions(fp16 = False)
29
+ result = whisper.decode(model, mel, options)
30
+
31
+ print("-------------------RECOGNIZE---------------------")
32
+ print(self.result)
33
+ self.response(result.text, lang)
34
+
35
+ def response(self, prompt, lang):
36
+ response = openai.Completion.create(
37
+ model="text-davinci-002",
38
+ prompt=f"You: {prompt}Friend: ",
39
+ temperature=0.5,
40
+ max_tokens=60,
41
+ top_p=1.0,
42
+ frequency_penalty=0.5,
43
+ presence_penalty=0.0,
44
+ stop=["You:"]
45
+ )
46
+ choice = response['choices'][0]['text']
47
+ print("-------------------RESPONSE---------------------")
48
+ print(self.result)
49
+ self.convHistory.append((prompt, choice))
50
+ self.result.append(self.convHistory)
51
+ print(self.convHistory[0])
52
+ print(type(self.convHistory[0]))
53
+ self.say(choice, lang)
54
+
55
+ def say(self, text, language):
56
+ coqui_langs = ['en' ,'es' ,'fr' ,'de' ,'pl' ,'uk' ,'ro' ,'hu' ,'bg' ,'nl' ,'fi' ,'sl' ,'lv' ,'ga']
57
+ if language not in coqui_langs:
58
+ language = 'en'
59
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
60
+ self.coquiTTS.get_tts(text, fp, speaker = {"language" : language})
61
+ print("-------------------AUDIOOUTPUT---------------------")
62
+ print(self.result)
63
+ self.result.append(fp.name)
64
+
65
+ def start(self, audio):
66
+ self.result = []
67
+ self.recognize(audio)
68
+ print(self.result)
69
+ return tuple(self.result)
70
+
71
+ dost = Dost()
72
+ with gr.Blocks() as demo:
73
+ with gr.Row():
74
+ with gr.Column():
75
+ input_audio = gr.Audio(source="microphone", type="filepath")
76
+ btn = gr.Button("Submit")
77
+ conversation = gr.Chatbot(value=dost.convHistory)
78
+ output_audio = gr.Audio(label="AI voice response")
79
+ btn.click(dost.start, inputs=[input_audio], outputs=[conversation, output_audio])
80
+
81
+ demo.launch(debug=True)