File size: 3,956 Bytes
892a82a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe4ba68
892a82a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe4ba68
892a82a
fe4ba68
892a82a
 
 
 
 
 
fe4ba68
892a82a
fe4ba68
892a82a
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import gradio as gr

from TTS.api import TTS

tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)

tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST")

import os

import openai

import torch
import torchaudio
from speechbrain.pretrained import SpectralMaskEnhancement

enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="pretrained_models/metricgan-plus-voicebank",
#run_opts={"device":"cuda"},
)

mes = [
    {"role": "system", "content": "You are my personal assistant. Try to be helpful."}
]

def chatgpt(apikey, result):
    
    openai.api_key = apikey

    messages = mes

    # chatgpt
    content = result
    messages.append({"role": "user", "content": content})

    completion = openai.ChatCompletion.create(
      model = "gpt-3.5-turbo",
      messages = messages
    )

    chat_response = completion.choices[0].message.content

    messages.append({"role": "assistant", "content": chat_response}) 

    return chat_response

def english(text_en, upload, VoiceMicrophone):
    if upload is not None:
        tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")

    else:
        tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
    
    noisy = enhance_model.load_audio(
    "output.wav"
    ).unsqueeze(0)

    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)

    return "enhanced.wav"

def chinese(text_cn, upload1, VoiceMicrophone1):
    if upload1 is not None:
        tts2.tts_with_vc_to_file(
            text_cn + "。",
            speaker_wav=upload1,
            file_path="ouptut1.wav"
        )

    else:
        tts2.tts_with_vc_to_file(
            text_cn + "。",
            speaker_wav=VoiceMicrophone1,
            file_path="ouptut1.wav"
        )

    return "ouptut1.wav"

block = gr.Blocks()

with block:
    with gr.Group():
        gr.Markdown(
            """ # <center>Talk to AI</center>
            
            
      """
        )
        
        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
              
                inp1 = gr.Textbox(label='请输入您的Openai-API-Key', type = "password")
                inp2 = gr.Textbox(label='说点什么吧(中英皆可)')

                btn = gr.Button("开始对话吧")

        texts1 = gr.Textbox(lines=3, label="ChatGPT的回答")
              
        btn.click(chatgpt, [inp1, inp2], [texts1])

        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
                inp3 = texts1
                inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
                inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可')

                btn1 = gr.Button("用喜欢的声音听一听吧(中文)")

        out1 = gr.Audio(label="合成的专属声音(中文)")

        btn1.click(chinese, [inp3, inp4, inp5], [out1])

        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):

                btn2 = gr.Button("用喜欢的声音听一听吧(英文)")

        out2 = gr.Audio(label="合成的专属声音(英文)")
        
        btn2.click(english, [inp3, inp4, inp5], [out2])

        gr.Markdown(
            """ ### <center>仅供学习交流使用</center>
            
            ### <center>Powered by [ChatGPT](https://chat.openai.com/).</center>
            
      """
        )
        
        gr.HTML('''
        <div class="footer">
                    <p>
                    </p>
                    <p>
                    </p>
        </div>
        ''')

block.launch(show_error=True)