sanchit-gandhi HF staff commited on
Commit
c39fa45
1 Parent(s): c15645d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -0
app.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+
6
+ from pyChatGPT import ChatGPT
7
+
8
+ from speechbrain.pretrained import Tacotron2
9
+ from speechbrain.pretrained import HIFIGAN
10
+
11
+ import json
12
+ import soundfile as sf
13
+
14
+
15
+ device = 0 if torch.cuda.is_available() else "cpu"
16
+
17
+ pipe = pipeline(
18
+ task="automatic-speech-recognition",
19
+ model="openai/whisper-tiny.en",
20
+ chunk_length_s=30,
21
+ device=device,
22
+ )
23
+
24
+ session_token = os.environ.get("SessionToken")
25
+
26
+ # Intialize TTS (tacotron2) and Vocoder (HiFIGAN)
27
+ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts", overrides={"max_decoder_steps": 2000}, run_opts={"device":device})
28
+ hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
29
+
30
+ def get_response_from_chatbot(text, reset_conversation):
31
+ try:
32
+ if reset_conversation:
33
+ api.refresh_auth()
34
+ api.reset_conversation()
35
+ resp = api.send_message(text)
36
+ response = resp["message"]
37
+ except:
38
+ response = "Sorry, the chatGPT queue is full. Please try again later."
39
+ return response
40
+
41
+
42
+ def chat(input_audio, chat_history, reset_conversation):
43
+ # speech -> text (Whisper)
44
+ message = pipe(input_audio)["text"]
45
+
46
+ # text -> response (chatGPT)
47
+ response = get_response_from_chatbot(message, reset_conversation)
48
+
49
+ # response -> speech (Tacotron 2)
50
+ mel_output, mel_length, alignment = tacotron2.encode_text(response)
51
+ wav = hifi_gan.decode_batch(mel_output)
52
+ sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050)
53
+
54
+ out_chat = []
55
+ chat_history = chat_history if not reset_conversation else ""
56
+ if chat_history != "":
57
+ out_chat = json.loads(chat_history)
58
+
59
+ out_chat.append((message, response))
60
+ chat_history = json.dumps(out_chat)
61
+
62
+ return out_chat, chat_history, "out.wav"
63
+
64
+
65
+ start_work= """async() => {
66
+ function isMobile() {
67
+ try {
68
+ document.createEvent("TouchEvent"); return true;
69
+ } catch(e) {
70
+ return false;
71
+ }
72
+ }
73
+ function getClientHeight()
74
+ {
75
+ var clientHeight=0;
76
+ if(document.body.clientHeight&&document.documentElement.clientHeight) {
77
+ var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
78
+ } else {
79
+ var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
80
+ }
81
+ return clientHeight;
82
+ }
83
+
84
+ function setNativeValue(element, value) {
85
+ const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
86
+ const prototype = Object.getPrototypeOf(element);
87
+ const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
88
+
89
+ if (valueSetter && valueSetter !== prototypeValueSetter) {
90
+ prototypeValueSetter.call(element, value);
91
+ } else {
92
+ valueSetter.call(element, value);
93
+ }
94
+ }
95
+ var gradioEl = document.querySelector('body > gradio-app').shadowRoot;
96
+ if (!gradioEl) {
97
+ gradioEl = document.querySelector('body > gradio-app');
98
+ }
99
+
100
+ if (typeof window['gradioEl'] === 'undefined') {
101
+ window['gradioEl'] = gradioEl;
102
+
103
+ const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
104
+ const page2 = window['gradioEl'].querySelectorAll('#page_2')[0];
105
+
106
+ page1.style.display = "none";
107
+ page2.style.display = "block";
108
+ window['div_count'] = 0;
109
+ window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0];
110
+ window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0];
111
+ chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0];
112
+ prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0];
113
+ window['chat_bot1'].children[1].textContent = '';
114
+
115
+ clientHeight = getClientHeight();
116
+ new_height = (clientHeight-300) + 'px';
117
+ chat_row.style.height = new_height;
118
+ window['chat_bot'].style.height = new_height;
119
+ window['chat_bot'].children[2].style.height = new_height;
120
+ window['chat_bot1'].style.height = new_height;
121
+ window['chat_bot1'].children[2].style.height = new_height;
122
+ prompt_row.children[0].style.flex = 'auto';
123
+ prompt_row.children[0].style.width = '100%';
124
+
125
+ window['checkChange'] = function checkChange() {
126
+ try {
127
+ if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) {
128
+ new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count'];
129
+ for (var i = 0; i < new_len; i++) {
130
+ new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true);
131
+ window['chat_bot1'].children[2].children[0].appendChild(new_div);
132
+ }
133
+ window['div_count'] = chat_bot.children[2].children[0].children.length;
134
+ }
135
+ if (window['chat_bot'].children[0].children.length > 1) {
136
+ window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent;
137
+ } else {
138
+ window['chat_bot1'].children[1].textContent = '';
139
+ }
140
+
141
+ } catch(e) {
142
+ }
143
+ }
144
+ window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);
145
+ }
146
+
147
+ return false;
148
+ }"""
149
+
150
+
151
+ with gr.Blocks(title="Talk to chatGPT") as demo:
152
+ gr.Markdown("## Talk to chatGPT ##")
153
+ gr.HTML("<p>You can duplicate this space and use your own session token: <a style='display:inline-block' href='https://huggingface.co/spaces/yizhangliu/chatGPT?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=10' alt='Duplicate Space'></a></p>")
154
+ gr.HTML("<p> Instruction on how to get session token can be seen in video <a style='display:inline-block' href='https://www.youtube.com/watch?v=TdNSj_qgdFk'><font style='color:blue;weight:bold;'>here</font></a>. Add your session token by going to settings and add under secrets. </p>")
155
+ with gr.Group(elem_id="page_1", visible=True) as page_1:
156
+ with gr.Box():
157
+ with gr.Row():
158
+ start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True)
159
+ start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
160
+
161
+ with gr.Group(elem_id="page_2", visible=False) as page_2:
162
+ with gr.Row(elem_id="chat_row"):
163
+ chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
164
+ chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))
165
+ with gr.Row():
166
+ prompt_input_audio = gr.Audio(
167
+ source="microphone",
168
+ type="filepath",
169
+ label="Record Audio Input",
170
+
171
+ )
172
+ prompt_output_audio = gr.Audio()
173
+
174
+ reset_conversation = gr.Checkbox(label="Reset conversation?", value=False)
175
+ with gr.Row(elem_id="prompt_row"):
176
+ chat_history = gr.Textbox(lines=4, label="prompt", visible=False)
177
+ submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style(
178
+ margin=True,
179
+ rounded=(True, True, True, True),
180
+ width=100,
181
+ )
182
+
183
+
184
+ submit_btn.click(fn=chat,
185
+ inputs=[prompt_input_audio, chat_history, reset_conversation],
186
+ outputs=[chatbot, chat_history, prompt_output_audio],
187
+ )
188
+
189
+ demo.launch(debug=True)