Spaces:
Build error
Build error
File size: 9,260 Bytes
c39fa45 dab2036 c39fa45 6089c46 c39fa45 5b3945c 84a239d c39fa45 84a239d c39fa45 dab2036 c39fa45 84a239d dab2036 4878f60 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 84a239d c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 dc4a5a9 f97003c dab2036 a9c8ec4 dab2036 0416a1f dab2036 af563cc 668874a dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 dab2036 c39fa45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import torch
import os
import gradio as gr
from transformers import pipeline
from pyChatGPT import ChatGPT
from speechbrain.pretrained import Tacotron2
from speechbrain.pretrained import HIFIGAN
import json
import soundfile as sf
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Intialise STT (Whisper)
pipe = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-base.en",
chunk_length_s=30,
device=device,
)
# Initialise ChatGPT session
session_token = os.environ.get("SessionToken")
api = ChatGPT(session_token=session_token)
# Intialise TTS (tacotron2) and Vocoder (HiFIGAN)
tacotron2 = Tacotron2.from_hparams(
source="speechbrain/tts-tacotron2-ljspeech",
savedir="tmpdir_tts",
overrides={"max_decoder_steps": 10000},
run_opts={"device": device},
)
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
def get_response_from_chatbot(text, reset_conversation):
try:
if reset_conversation:
api.refresh_auth()
api.reset_conversation()
resp = api.send_message(text)
response = resp["message"]
except:
response = "Sorry, the chatGPT queue is full. Please try again later."
return response
def chat(input_audio, chat_history, reset_conversation):
# speech -> text (Whisper)
message = pipe(input_audio)["text"]
# text -> response (chatGPT)
response = get_response_from_chatbot(message, reset_conversation)
# response -> speech (tacotron2)
mel_output, mel_length, alignment = tacotron2.encode_text(response)
wav = hifi_gan.decode_batch(mel_output)
sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050)
out_chat = []
chat_history = chat_history if not reset_conversation else ""
if chat_history != "":
out_chat = json.loads(chat_history)
out_chat.append((message, response))
chat_history = json.dumps(out_chat)
return out_chat, chat_history, "out.wav"
start_work = """async() => {
function isMobile() {
try {
document.createEvent("TouchEvent"); return true;
} catch(e) {
return false;
}
}
function getClientHeight()
{
var clientHeight=0;
if(document.body.clientHeight&&document.documentElement.clientHeight) {
var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
} else {
var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
}
return clientHeight;
}
function setNativeValue(element, value) {
const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
const prototype = Object.getPrototypeOf(element);
const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
if (valueSetter && valueSetter !== prototypeValueSetter) {
prototypeValueSetter.call(element, value);
} else {
valueSetter.call(element, value);
}
}
var gradioEl = document.querySelector('body > gradio-app').shadowRoot;
if (!gradioEl) {
gradioEl = document.querySelector('body > gradio-app');
}
if (typeof window['gradioEl'] === 'undefined') {
window['gradioEl'] = gradioEl;
const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
const page2 = window['gradioEl'].querySelectorAll('#page_2')[0];
page1.style.display = "none";
page2.style.display = "block";
window['div_count'] = 0;
window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0];
window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0];
chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0];
prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0];
window['chat_bot1'].children[1].textContent = '';
clientHeight = getClientHeight();
new_height = (clientHeight-300) + 'px';
chat_row.style.height = new_height;
window['chat_bot'].style.height = new_height;
window['chat_bot'].children[2].style.height = new_height;
window['chat_bot1'].style.height = new_height;
window['chat_bot1'].children[2].style.height = new_height;
prompt_row.children[0].style.flex = 'auto';
prompt_row.children[0].style.width = '100%';
window['checkChange'] = function checkChange() {
try {
if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) {
new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count'];
for (var i = 0; i < new_len; i++) {
new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true);
window['chat_bot1'].children[2].children[0].appendChild(new_div);
}
window['div_count'] = chat_bot.children[2].children[0].children.length;
}
if (window['chat_bot'].children[0].children.length > 1) {
window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent;
} else {
window['chat_bot1'].children[1].textContent = '';
}
} catch(e) {
}
}
window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);
}
return false;
}"""
with gr.Blocks(title="Talk to chatGPT") as demo:
gr.Markdown("## Talk to chatGPT ##")
gr.HTML(
"<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en' class='underline'>Whisper</a> to convert the input speech"
" to transcribed text, <a href='https://chat.openai.com/chat' class='underline'>chatGPT</a> to generate responses, and <a"
" href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech' class='underline'>tacotron2</a> to convert the response to"
" output speech: </p>"
)
gr.HTML("<p> <center><img src='https://raw.githubusercontent.com/sanchit-gandhi/codesnippets/main/pipeline.png' width='870'></center> </p>")
gr.HTML(
"<p>You can duplicate this space and use your own session token: <a style='display:inline-block'"
" href='https://huggingface.co/spaces/sanchit-gandhi/chatGPT?duplicate=true'><img"
" src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10'"
" alt='Duplicate Space'></a></p>"
)
gr.HTML(
"<p> Instructions on how to obtain your session token can be found in the video <a style='display:inline-block'"
" href='https://youtu.be/TdNSj_qgdFk?t=175'><font style='color:blue;weight:bold;'>here</font></a>."
" Add your session token by going to <i>Settings</i> -> <i>New secret</i> and add the token under the name <i>SessionToken</i>. </p>"
)
with gr.Group(elem_id="page_1", visible=True) as page_1:
with gr.Box():
with gr.Row():
start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True)
start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
with gr.Group(elem_id="page_2", visible=False) as page_2:
with gr.Row(elem_id="chat_row"):
chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))
with gr.Row():
prompt_input_audio = gr.Audio(
source="microphone",
type="filepath",
label="Record Audio Input",
)
prompt_output_audio = gr.Audio()
reset_conversation = gr.Checkbox(label="Reset conversation?", value=False)
with gr.Row(elem_id="prompt_row"):
chat_history = gr.Textbox(lines=4, label="prompt", visible=False)
submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style(
margin=True,
rounded=(True, True, True, True),
width=100,
)
submit_btn.click(
fn=chat,
inputs=[prompt_input_audio, chat_history, reset_conversation],
outputs=[chatbot, chat_history, prompt_output_audio],
)
demo.launch(debug=True)
|