File size: 6,381 Bytes
aa95dd5
 
 
530748c
ad81144
 
 
 
 
aa95dd5
 
2b48c61
aa95dd5
875ff99
aa95dd5
8b0db71
 
 
 
 
 
132e7e7
 
 
 
 
 
 
8b0db71
 
ad81144
 
 
 
 
 
 
 
 
 
 
 
132e7e7
8b0db71
 
 
 
 
 
 
 
 
 
79e9146
 
 
 
 
 
8b0db71
 
2b48c61
a4cb207
 
2b48c61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa95dd5
 
 
 
 
 
 
79e9146
aa95dd5
 
 
8b0db71
 
 
2636400
c9eb04b
925df59
1fcb37d
ad81144
8344d66
ad81144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d64a0b1
12d2fcc
7210b03
ad81144
d64a0b1
 
ad81144
 
 
 
 
54dfa91
 
ad81144
8b0db71
ad81144
fc7a5f0
8b0db71
 
 
 
 
 
aa95dd5
8b0db71
aa95dd5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import gradio as gr
import os
import tempfile
from openai import OpenAI
from tts_voice import tts_order_voice
import edge_tts
import tempfile
import anyio


# Set an environment variable for key
#os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')

#client = OpenAI() # add api_key

import torch
import torchaudio
import gradio as gr
from scipy.io import wavfile
from scipy.io.wavfile import write

from speechbrain.pretrained import SpectralMaskEnhancement

enhance_model = SpectralMaskEnhancement.from_hparams(
    source="speechbrain/metricgan-plus-voicebank",
    savedir="pretrained_models/metricgan-plus-voicebank",
)

knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')

language_dict = tts_order_voice

async def text_to_speech_edge(text, language_code):
    voice = language_dict[language_code]
    communicate = edge_tts.Communicate(text, voice)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        tmp_path = tmp_file.name

    await communicate.save(tmp_path)

    return "语音合成完成:{}".format(text), tmp_path


def voice_change(audio_in, audio_ref):
    samplerate1, data1 = wavfile.read(audio_in)
    samplerate2, data2 = wavfile.read(audio_ref)
    write("./audio_in.wav", samplerate1, data1)
    write("./audio_ref.wav", samplerate2, data2)

    query_seq = knn_vc.get_features("./audio_in.wav")
    matching_set = knn_vc.get_matching_set(["./audio_ref.wav"])
    out_wav = knn_vc.match(query_seq, matching_set, topk=4)
    torchaudio.save('output.wav', out_wav[None], 16000)
    noisy = enhance_model.load_audio(
        'output.wav'
    ).unsqueeze(0)
    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
    torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
    return 'enhanced.wav'


def tts(text, model, voice, api_key):
    if len(text)>300:
        raise gr.Error('您输入的文本字符多于300个,请缩短您的文本')
    if api_key == '':
        raise gr.Error('Please enter your OpenAI API Key')
    else:
        try:
            client = OpenAI(api_key=api_key)

            response = client.audio.speech.create(
                model=model, # "tts-1","tts-1-hd"
                voice=voice, # 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'
                input=text,
            )

        except Exception as error:
            # Handle any exception that occurs
            raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")
            print(str(error))

    # Create a temp file to save the audio
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
        temp_file.write(response.content)

    # Get the file path of the temp file
    temp_file_path = temp_file.name
        
    return temp_file_path


app = gr.Blocks()

with app:
    gr.Markdown("# <center>🌟 - OpenAI TTS + AI变声</center>")
    gr.Markdown(os.environ.get('OPENAI_API_KEY')+"### <center>🎶 地表最强文本转语音模型 + 3秒实时AI变声,支持中文!Powered by [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech) and [KNN-VC](https://github.com/bshall/knn-vc) </center>")
    gr.Markdown("### <center>🌊 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
    with gr.Tab("🤗 OpenAI TTS"):
        with gr.Row(variant='panel'):
          api_key = gr.Textbox(type='password', label='OpenAI API Key', placeholder='请在此填写您的OpenAI API Key')
          model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='请选择模型(tts-1推理更快,tts-1-hd音质更好)', value='tts-1')
          voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='请选择一个说话人', value='alloy')
        with gr.Row():
          with gr.Column():
            inp_text = gr.Textbox(label="请填写您想生成的文本(中英文皆可)", placeholder="想说却还没说的 还很多 攒着是因为想写成歌", lines=5)
            btn_text = gr.Button("一键开启真实拟声吧", variant="primary")
    
          with gr.Column():
            inp1 = gr.Audio(type="filepath", label="OpenAI TTS真实拟声", interactive=False)
            inp2 = gr.Audio(type="filepath", label="请上传AI变声的参照音频(决定变声后的语音音色)")
            btn1 = gr.Button("一键开启AI变声吧", variant="primary")
          with gr.Column():
            out1 = gr.Audio(type="filepath", label="AI变声后的专属音频")
          btn_text.click(tts, [inp_text, model, voice, api_key], inp1)
          btn1.click(voice_change, [inp1, inp2], out1)
    with gr.Tab("⚡ Edge TTS"):
        with gr.Row():
            input_text = gr.Textbox(lines=5, placeholder="想说却还没说的 还很多 攒着是因为想写成歌", label="请填写您想生成的文本(中英文皆可)")
            default_language = list(language_dict.keys())[15]
            language = gr.Dropdown(choices=list(language_dict.keys()), value=default_language, label="请选择文本对应的语言")
            btn_edge = gr.Button("一键开启真实拟声吧", variant="primary")
            output_text = gr.Textbox(label="输出文本", visible=False)
            output_audio = gr.Audio(type="filepath", label="Edge TTS真实拟声")

        with gr.Row():
            inp_vc = gr.Audio(type="filepath", label="请上传AI变声的参照音频(决定变声后的语音音色)")
            btn_vc = gr.Button("一键开启AI变声吧", variant="primary")
            out_vc = gr.Audio(type="filepath", label="AI变声后的专属音频")

        btn_edge.click(text_to_speech_edge, [input_text, language], [output_text, output_audio])
        btn_vc.click(voice_change, [output_audio, inp_vc], out_vc)

        
    gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。Get your OpenAI API Key [here](https://platform.openai.com/api-keys).</center>")
    gr.HTML('''
        <div class="footer">
                    <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
                    </p>
        </div>
    ''')

app.launch(show_error=True)