Yusin's picture
Update app.py (#10)
9922164
raw
history blame
3.89 kB
import tempfile
import gradio as gr
from neon_tts_plugin_coqui import CoquiTTS
LANGUAGES = list(CoquiTTS.langs.keys())
default_lang = "en"
# ChatGPT
from pyChatGPT import ChatGPT
import whisper
whisper_model = whisper.load_model("small")
#whisper = gr.Blocks.load(name="spaces/sanchit-gandhi/whisper-large-v2")
import os
session_token = os.environ.get('SessionToken')
title = "Speech to ChatGPT to Speech"
#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
coquiTTS = CoquiTTS()
# ChatGPT
def chat_hf(audio, custom_token, language):
try:
whisper_text = translate(audio)
api = ChatGPT(session_token)
resp = api.send_message(whisper_text)
api.refresh_auth() # refresh the authorization token
api.reset_conversation() # reset the conversation
gpt_response = resp['message']
except:
whisper_text = translate(audio)
api = ChatGPT(custom_token)
resp = api.send_message(whisper_text)
api.refresh_auth() # refresh the authorization token
api.reset_conversation() # reset the conversation
gpt_response = resp['message']
# to voice
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
return whisper_text, gpt_response, fp.name
# whisper
def translate(audio):
print("""
β€”
Sending audio to Whisper ...
β€”
""")
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
_, probs = whisper_model.detect_language(mel)
transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
transcription = whisper.decode(whisper_model, mel, transcript_options)
print("language spoken: " + transcription.language)
print("transcript: " + transcription.text)
print("β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”")
return transcription.text
#def translate(audio):
# print("""
# β€”
# Sending audio to Whisper ...
# β€”
# """)
#
# text_result = whisper(audio, None, "transcribe", fn_index=0)
# print(text_result)
# return text_result
with gr.Blocks() as blocks:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
+ title
+ "</h1>")
#gr.Markdown(description)
with gr.Row(equal_height=True):# equal_height=False
with gr.Column():# variant="panel"
radio = gr.Radio(
label="Language",
choices=LANGUAGES,
value=default_lang
)
audio_file = gr.inputs.Audio(source="microphone", type="filepath")
custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
with gr.Row():# mobile_collapse=False
submit = gr.Button("Submit", variant="primary")
with gr.Column():
text1 = gr.Textbox(label="Speech to Text")
text2 = gr.Textbox(label="ChatGPT response")
audio = gr.Audio(label="Output", interactive=False)
#gr.Markdown(info)
#gr.Markdown("<center>"
# +f'<img src={badge} alt="visitors badge"/>'
# +"</center>")
# actions
submit.click(
chat_hf,
[audio_file, custom_token, radio],
[text1, text2, audio],
)
radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
blocks.launch(debug=True)