Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,23 @@
|
|
1 |
-
#from turtle import title
|
2 |
import gradio as gr
|
3 |
|
4 |
import git
|
5 |
-
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
|
7 |
os.system('pip install -q -e TTS/')
|
8 |
os.system('pip install -q torchaudio==0.9.0')
|
@@ -18,7 +33,6 @@ TTS_PATH = "TTS/"
|
|
18 |
# add libraries into environment
|
19 |
sys.path.append(TTS_PATH) # set this if TTS is not installed globally
|
20 |
|
21 |
-
import os
|
22 |
import string
|
23 |
import time
|
24 |
import argparse
|
@@ -28,14 +42,13 @@ import numpy as np
|
|
28 |
import IPython
|
29 |
from IPython.display import Audio
|
30 |
|
31 |
-
import torch
|
32 |
import torchaudio
|
33 |
from speechbrain.pretrained import SpectralMaskEnhancement
|
34 |
|
35 |
enhance_model = SpectralMaskEnhancement.from_hparams(
|
36 |
source="speechbrain/metricgan-plus-voicebank",
|
37 |
savedir="pretrained_models/metricgan-plus-voicebank",
|
38 |
-
|
39 |
)
|
40 |
|
41 |
from TTS.tts.utils.synthesis import synthesis
|
@@ -169,8 +182,7 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
|
|
169 |
|
170 |
voicefixer.restore(input=out_path, # input wav file path
|
171 |
output="audio1.wav", # output wav file path
|
172 |
-
|
173 |
-
cuda = False,
|
174 |
mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
|
175 |
|
176 |
noisy = enhance_model.load_audio(
|
@@ -182,11 +194,117 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
|
|
182 |
|
183 |
return "enhanced.wav"
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
import git
|
4 |
+
import os, gc, torch
|
5 |
+
from datetime import datetime
|
6 |
+
from huggingface_hub import hf_hub_download
|
7 |
+
from pynvml import *
|
8 |
+
nvmlInit()
|
9 |
+
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
10 |
+
ctx_limit = 1024
|
11 |
+
title1 = "RWKV-4-Raven-7B-v9-Eng99%-Other1%-20230412-ctx8192"
|
12 |
+
|
13 |
+
from rwkv.model import RWKV
|
14 |
+
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename=f"{title1}.pth")
|
15 |
+
model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
|
16 |
+
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
17 |
+
pipeline = PIPELINE(model, "20B_tokenizer.json")
|
18 |
+
|
19 |
+
os.environ["RWKV_JIT_ON"] = '1'
|
20 |
+
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
21 |
os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
|
22 |
os.system('pip install -q -e TTS/')
|
23 |
os.system('pip install -q torchaudio==0.9.0')
|
|
|
33 |
# add libraries into environment
|
34 |
sys.path.append(TTS_PATH) # set this if TTS is not installed globally
|
35 |
|
|
|
36 |
import string
|
37 |
import time
|
38 |
import argparse
|
|
|
42 |
import IPython
|
43 |
from IPython.display import Audio
|
44 |
|
|
|
45 |
import torchaudio
|
46 |
from speechbrain.pretrained import SpectralMaskEnhancement
|
47 |
|
48 |
enhance_model = SpectralMaskEnhancement.from_hparams(
|
49 |
source="speechbrain/metricgan-plus-voicebank",
|
50 |
savedir="pretrained_models/metricgan-plus-voicebank",
|
51 |
+
run_opts={"device":"cuda"},
|
52 |
)
|
53 |
|
54 |
from TTS.tts.utils.synthesis import synthesis
|
|
|
182 |
|
183 |
voicefixer.restore(input=out_path, # input wav file path
|
184 |
output="audio1.wav", # output wav file path
|
185 |
+
cuda=True, # whether to use gpu acceleration'
|
|
|
186 |
mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
|
187 |
|
188 |
noisy = enhance_model.load_audio(
|
|
|
194 |
|
195 |
return "enhanced.wav"
|
196 |
|
197 |
+
def generate_prompt(instruction, input=None):
|
198 |
+
if input:
|
199 |
+
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
200 |
+
# Instruction:
|
201 |
+
{instruction}
|
202 |
+
# Input:
|
203 |
+
{input}
|
204 |
+
# Response:
|
205 |
+
"""
|
206 |
+
else:
|
207 |
+
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
208 |
+
# Instruction:
|
209 |
+
{instruction}
|
210 |
+
# Response:
|
211 |
+
"""
|
212 |
+
|
213 |
+
def evaluate(
|
214 |
+
instruction,
|
215 |
+
input=None,
|
216 |
+
# token_count=200,
|
217 |
+
# temperature=1.0,
|
218 |
+
# top_p=0.7,
|
219 |
+
# presencePenalty = 0.1,
|
220 |
+
# countPenalty = 0.1,
|
221 |
+
):
|
222 |
+
args = PIPELINE_ARGS(temperature = max(0.2, float(1.0)), top_p = float(0.5),
|
223 |
+
alpha_frequency = 0.4,
|
224 |
+
alpha_presence = 0.4,
|
225 |
+
token_ban = [], # ban the generation of some tokens
|
226 |
+
token_stop = [0]) # stop generation whenever you see any token here
|
227 |
+
|
228 |
+
instruction = instruction.strip()
|
229 |
+
input = input.strip()
|
230 |
+
ctx = generate_prompt(instruction, input)
|
231 |
+
|
232 |
+
gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
|
233 |
+
print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
|
234 |
+
|
235 |
+
all_tokens = []
|
236 |
+
out_last = 0
|
237 |
+
out_str = ''
|
238 |
+
occurrence = {}
|
239 |
+
state = None
|
240 |
+
for i in range(int(200)):
|
241 |
+
out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
|
242 |
+
for n in occurrence:
|
243 |
+
out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
|
244 |
+
|
245 |
+
token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
|
246 |
+
if token in args.token_stop:
|
247 |
+
break
|
248 |
+
all_tokens += [token]
|
249 |
+
if token not in occurrence:
|
250 |
+
occurrence[token] = 1
|
251 |
+
else:
|
252 |
+
occurrence[token] += 1
|
253 |
+
|
254 |
+
tmp = pipeline.decode(all_tokens[out_last:])
|
255 |
+
if '\ufffd' not in tmp:
|
256 |
+
out_str += tmp
|
257 |
+
yield out_str.strip()
|
258 |
+
out_last = i + 1
|
259 |
+
gc.collect()
|
260 |
+
torch.cuda.empty_cache()
|
261 |
+
yield out_str.strip()
|
262 |
+
|
263 |
+
|
264 |
+
block = gr.Blocks()
|
265 |
+
|
266 |
+
with block:
|
267 |
+
with gr.Group():
|
268 |
+
gr.Markdown(
|
269 |
+
""" <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center>
|
270 |
+
## <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center>
|
271 |
+
### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center>
|
272 |
+
|
273 |
+
### <center>Model by [Raven](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B). Thanks to [PENG Bo](https://github.com/BlinkDL). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
|
274 |
+
|
275 |
+
"""
|
276 |
+
)
|
277 |
+
|
278 |
+
with gr.Box():
|
279 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
280 |
+
|
281 |
+
inp1 = gr.components.Textbox(lines=2, label="说些什么吧(中英皆可,英文对话效果更好)", value="Tell me a joke.")
|
282 |
+
inp2 = gr.components.Textbox(lines=2, label="对话的背景信息(选填,请合理合规使用此程序)", placeholder="none")
|
283 |
+
|
284 |
+
btn = gr.Button("开始对话吧")
|
285 |
+
|
286 |
+
text = gr.Textbox(lines=5, label="Raven的回答")
|
287 |
+
|
288 |
+
btn.click(evaluate, [inp1, inp2], [text])
|
289 |
+
|
290 |
+
with gr.Box():
|
291 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
292 |
+
inp3 = text
|
293 |
+
inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件, max. 30mb)", type="filepath")
|
294 |
+
inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可')
|
295 |
+
|
296 |
+
btn1 = gr.Button("用喜欢的声音听一听吧")
|
297 |
+
|
298 |
+
out1 = gr.Audio(label="合成的专属声音")
|
299 |
+
|
300 |
+
btn1.click(greet, [inp3, inp4, inp5], [out1])
|
301 |
+
|
302 |
+
gr.HTML('''
|
303 |
+
<div class="footer">
|
304 |
+
<p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
|
305 |
+
</p>
|
306 |
+
</div>
|
307 |
+
''')
|
308 |
+
|
309 |
+
|
310 |
+
block.launch(show_error=True)
|