Spaces:
Runtime error
Runtime error
File size: 4,001 Bytes
16d35dc 95819a5 ba11b0c 3caa7d1 1baef98 ba11b0c 93641fd 16d35dc 0e00791 cda6806 16d35dc ca70bdc 69b01bf cda6806 7010662 33794fa 50a4f2e 72d7b91 cda6806 7010662 72d7b91 cda6806 16d35dc ff44154 cda6806 9922164 95819a5 43b3f79 95819a5 a06df0b 16d35dc cda6806 c171280 8752186 16d35dc c171280 ded54c4 16d35dc ff44154 c598d42 ff44154 cda6806 16d35dc cda6806 b270fe6 cda6806 16d35dc ff44154 16d35dc cda6806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import tempfile
import gradio as gr
from neon_tts_plugin_coqui import CoquiTTS
LANGUAGES = list(CoquiTTS.langs.keys())
default_lang = "en"
#import whisper
#whisper_model = whisper.load_model("small")
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
#chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
chatgpt = gr.Blocks.load(name="spaces/seawolf2357/chatgptclone")
import os
import json
#session_token = os.environ.get('SessionToken')
#api_endpoint = os.environ.get('API_EndPoint')
title = "Speech to ChatGPT to Speech"
#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
coquiTTS = CoquiTTS()
# ChatGPT
def chat_hf(audio, custom_token, language):
try:
whisper_text = translate(audio)
if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
else:
gpt_response = chatgpt(whisper_text, [], fn_index=0)
print(gpt_response)
gpt_response = gpt_response[0]
except:
whisper_text = translate(audio)
gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
print(gpt_response)
# to voice
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
return whisper_text, gpt_response, fp.name
# whisper
#def translate(audio):
# print("""
# β
# Sending audio to Whisper ...
# β
# """)
#
# audio = whisper.load_audio(audio)
# audio = whisper.pad_or_trim(audio)
#
# mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
#
# _, probs = whisper_model.detect_language(mel)
#
# transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
#
# transcription = whisper.decode(whisper_model, mel, transcript_options)
#
# print("language spoken: " + transcription.language)
# print("transcript: " + transcription.text)
# print("βββββββββββββββββββββββββββββββββββββββββββ")
#
# return transcription.text
def translate(audio):
print("""
β
Sending audio to Whisper ...
β
""")
text_result = whisper(audio, None, "transcribe", fn_index=0)
print(text_result)
return text_result
with gr.Blocks() as blocks:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
+ title
+ "</h1>")
#gr.Markdown(description)
radio = gr.Radio(label="Language",choices=LANGUAGES,value=default_lang)
with gr.Row(equal_height=True):# equal_height=False
with gr.Column():# variant="panel"
audio_file = gr.Audio(source="microphone",type="filepath")
custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
with gr.Row():# mobile_collapse=False
submit = gr.Button("Submit", variant="primary")
with gr.Column():
text1 = gr.Textbox(label="Speech to Text")
text2 = gr.Textbox(label="ChatGPT Response")
audio = gr.Audio(label="Output", interactive=False)
#gr.Markdown(info)
#gr.Markdown("<center>"
# +f'<img src={badge} alt="visitors badge"/>'
# +"</center>")
# actions
submit.click(
chat_hf,
[audio_file, custom_token, radio],
[text1, text2, audio],
)
radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
blocks.launch(debug=True)
|