Raven-with-Voice-Cloning-2.0

Build error

App Files Files Community

Kevin676 commited on Apr 13, 2023

Commit

6a4d447

•

1 Parent(s): 07977e9

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -15

app.py CHANGED Viewed

@@ -1,8 +1,23 @@
-#from turtle import title
 import gradio as gr
 import git
-import os
 os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
 os.system('pip install -q -e TTS/')
 os.system('pip install -q torchaudio==0.9.0')
@@ -18,7 +33,6 @@ TTS_PATH = "TTS/"
 # add libraries into environment
 sys.path.append(TTS_PATH) # set this if TTS is not installed globally
-import os
 import string
 import time
 import argparse
@@ -28,14 +42,13 @@ import numpy as np
 import IPython
 from IPython.display import Audio
-import torch
 import torchaudio
 from speechbrain.pretrained import SpectralMaskEnhancement
 enhance_model = SpectralMaskEnhancement.from_hparams(
 source="speechbrain/metricgan-plus-voicebank",
 savedir="pretrained_models/metricgan-plus-voicebank",
-#run_opts={"device":"cuda"},
 )
 from TTS.tts.utils.synthesis import synthesis
@@ -169,8 +182,7 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
       voicefixer.restore(input=out_path, # input wav file path
                       output="audio1.wav", # output wav file path
-#                      cuda=True, # whether to use gpu acceleration'
-                      cuda = False,
                       mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
       noisy = enhance_model.load_audio(
@@ -182,11 +194,117 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
       return "enhanced.wav"
-gr.Interface(
-    fn=greet,
-    inputs=[gr.inputs.Textbox(label='请输入您想要合成的文字，请自觉合法合规使用！'),gr.Audio(type="filepath", source="upload",label='请上传您喜欢的声音(wav/mp3文件, max. 30mb)'),gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')],
-    outputs="audio",
-    title="🥳💬💕 - Voice Cloning/声音合成测试版（目前只支持英文文本合成，中文版正在开发中，敬请期待）",
-    description = "注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习使用。用户生成内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。",
-    article = "🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！",
-).launch()

 import gradio as gr
 import git
+import os, gc, torch
+from datetime import datetime
+from huggingface_hub import hf_hub_download
+from pynvml import *
+nvmlInit()
+gpu_h = nvmlDeviceGetHandleByIndex(0)
+ctx_limit = 1024
+title1 = "RWKV-4-Raven-7B-v9-Eng99%-Other1%-20230412-ctx8192"
+from rwkv.model import RWKV
+model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename=f"{title1}.pth")
+model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
+from rwkv.utils import PIPELINE, PIPELINE_ARGS
+pipeline = PIPELINE(model, "20B_tokenizer.json")
+os.environ["RWKV_JIT_ON"] = '1'
+os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
 os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
 os.system('pip install -q -e TTS/')
 os.system('pip install -q torchaudio==0.9.0')
 # add libraries into environment
 sys.path.append(TTS_PATH) # set this if TTS is not installed globally
 import string
 import time
 import argparse
 import IPython
 from IPython.display import Audio
 import torchaudio
 from speechbrain.pretrained import SpectralMaskEnhancement
 enhance_model = SpectralMaskEnhancement.from_hparams(
 source="speechbrain/metricgan-plus-voicebank",
 savedir="pretrained_models/metricgan-plus-voicebank",
+run_opts={"device":"cuda"},
 )
 from TTS.tts.utils.synthesis import synthesis
       voicefixer.restore(input=out_path, # input wav file path
                       output="audio1.wav", # output wav file path
+                      cuda=True, # whether to use gpu acceleration'
                       mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
       noisy = enhance_model.load_audio(
       return "enhanced.wav"
+def generate_prompt(instruction, input=None):
+    if input:
+        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+# Instruction:
+{instruction}
+# Input:
+{input}
+# Response:
+"""
+    else:
+        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
+# Instruction:
+{instruction}
+# Response:
+"""
+def evaluate(
+    instruction,
+    input=None,
+#    token_count=200,
+#    temperature=1.0,
+#    top_p=0.7,
+#    presencePenalty = 0.1,
+#    countPenalty = 0.1,
+):
+    args = PIPELINE_ARGS(temperature = max(0.2, float(1.0)), top_p = float(0.5),
+                     alpha_frequency = 0.4,
+                     alpha_presence = 0.4,
+                     token_ban = [], # ban the generation of some tokens
+                     token_stop = [0]) # stop generation whenever you see any token here
+    instruction = instruction.strip()
+    input = input.strip()
+    ctx = generate_prompt(instruction, input)
+    gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
+    print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
+    all_tokens = []
+    out_last = 0
+    out_str = ''
+    occurrence = {}
+    state = None
+    for i in range(int(200)):
+        out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
+        for n in occurrence:
+            out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
+        if token in args.token_stop:
+            break
+        all_tokens += [token]
+        if token not in occurrence:
+            occurrence[token] = 1
+        else:
+            occurrence[token] += 1
+        tmp = pipeline.decode(all_tokens[out_last:])
+        if '\ufffd' not in tmp:
+            out_str += tmp
+            yield out_str.strip()
+            out_last = i + 1
+    gc.collect()
+    torch.cuda.empty_cache()
+    yield out_str.strip()
+block = gr.Blocks()
+with block:
+    with gr.Group():
+        gr.Markdown(
+            """  <center>🥳💬💕 - TalktoAI，随时随地，谈天说地！</center>
+            ## <center>🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！</center>
+            ### <center>注意❗：请不要输入或生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。</center>
+            ### <center>Model by [Raven](https://huggingface.co/spaces/BlinkDL/Raven-RWKV-7B). Thanks to [PENG Bo](https://github.com/BlinkDL). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
+      """
+        )
+        with gr.Box():
+            with gr.Row().style(mobile_collapse=False, equal_height=True):
+                inp1 = gr.components.Textbox(lines=2, label="说些什么吧(中英皆可，英文对话效果更好)", value="Tell me a joke.")
+                inp2 = gr.components.Textbox(lines=2, label="对话的背景信息(选填，请合理合规使用此程序)", placeholder="none")
+                btn = gr.Button("开始对话吧")
+        text = gr.Textbox(lines=5, label="Raven的回答")
+        btn.click(evaluate, [inp1, inp2], [text])
+        with gr.Box():
+            with gr.Row().style(mobile_collapse=False, equal_height=True):
+                inp3 = text
+                inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件, max. 30mb)", type="filepath")
+                inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')
+                btn1 = gr.Button("用喜欢的声音听一听吧")
+        out1 = gr.Audio(label="合成的专属声音")
+        btn1.click(greet, [inp3, inp4, inp5], [out1])
+        gr.HTML('''
+        <div class="footer">
+                    <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
+                    </p>
+        </div>
+        ''')
+block.launch(show_error=True)