File size: 3,889 Bytes
16d35dc
 
 
 
 
 
cda6806
 
9922164
 
 
3caa7d1
cda6806
16d35dc
0e00791
cda6806
 
16d35dc
 
 
 
cda6806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16d35dc
ff44154
cda6806
 
 
 
 
 
 
 
 
 
 
9922164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a06df0b
16d35dc
 
 
 
 
cda6806
8752186
16d35dc
 
 
 
 
 
ded54c4
 
16d35dc
 
ff44154
 
a06df0b
ff44154
cda6806
 
 
 
16d35dc
 
 
cda6806
b270fe6
cda6806
16d35dc
ff44154
16d35dc
 
cda6806
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import tempfile
import gradio as gr
from neon_tts_plugin_coqui import CoquiTTS
LANGUAGES = list(CoquiTTS.langs.keys())
default_lang = "en"

# ChatGPT
from pyChatGPT import ChatGPT
import whisper
whisper_model = whisper.load_model("small")
#whisper = gr.Blocks.load(name="spaces/sanchit-gandhi/whisper-large-v2")
import os
session_token = os.environ.get('SessionToken') 

title = "Speech to ChatGPT to Speech"
#info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
#badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"

coquiTTS = CoquiTTS()


# ChatGPT
def chat_hf(audio, custom_token, language):
    try:
        whisper_text = translate(audio)
        api = ChatGPT(session_token) 
        resp = api.send_message(whisper_text)
        
        api.refresh_auth()  # refresh the authorization token
        api.reset_conversation()  # reset the conversation
        gpt_response = resp['message']

    except:
        whisper_text = translate(audio)
        api = ChatGPT(custom_token) 
        resp = api.send_message(whisper_text)
        
        api.refresh_auth()  # refresh the authorization token
        api.reset_conversation()  # reset the conversation
        gpt_response = resp['message']

    # to voice
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
    
    return whisper_text, gpt_response, fp.name

# whisper
def translate(audio):
    print("""
    β€”
    Sending audio to Whisper ...
    β€”
    """)
    
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)
    
    mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
    
    _, probs = whisper_model.detect_language(mel)
    
    transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
    
    transcription = whisper.decode(whisper_model, mel, transcript_options)
    
    print("language spoken: " + transcription.language)
    print("transcript: " + transcription.text)
    print("β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”")  
      
    return transcription.text

#def translate(audio):
#    print("""
#    β€”
#    Sending audio to Whisper ...
#    β€”
#    """)
#   
#    text_result = whisper(audio, None, "transcribe", fn_index=0)
#    print(text_result)
#    return text_result


with gr.Blocks() as blocks:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
                + title
                + "</h1>")
    #gr.Markdown(description)
    with gr.Row(equal_height=True):# equal_height=False
        with gr.Column():# variant="panel"
            radio = gr.Radio(
                label="Language",
                choices=LANGUAGES,
                value=default_lang
            )
            audio_file = gr.inputs.Audio(source="microphone", type="filepath")
            custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
            with gr.Row():# mobile_collapse=False
                submit = gr.Button("Submit", variant="primary")
        with gr.Column():
            text1 = gr.Textbox(label="Speech to Text")
            text2 = gr.Textbox(label="ChatGPT response")
            audio = gr.Audio(label="Output", interactive=False)
    #gr.Markdown(info)
    #gr.Markdown("<center>"
    #            +f'<img src={badge} alt="visitors badge"/>'
    #            +"</center>")

    # actions
    submit.click(
        chat_hf,
        [audio_file, custom_token, radio],
        [text1, text2, audio],
    )
    radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)


blocks.launch(debug=True)