Spaces:
Runtime error
Runtime error
import subprocess | |
import random | |
import os | |
from pathlib import Path | |
import librosa | |
from scipy.io import wavfile | |
import numpy as np | |
import torch | |
import csv | |
import whisper | |
import gradio as gr | |
os.system("pip install --upgrade Cython==0.29.35") | |
os.system("pip install pysptk --no-build-isolation") | |
os.system("pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html") | |
os.system("pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html") | |
import sox | |
def split_long_audio(model, filepaths, save_dir="data_dir", out_sr=44100): | |
if isinstance(filepaths, str): | |
filepaths = [filepaths] | |
for file_idx, filepath in enumerate(filepaths): | |
save_path = Path(save_dir) | |
save_path.mkdir(exist_ok=True, parents=True) | |
print(f"Transcribing file {file_idx}: '{filepath}' to segments...") | |
result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5) | |
segments = result['segments'] | |
wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True) | |
wav, _ = librosa.effects.trim(wav, top_db=20) | |
peak = np.abs(wav).max() | |
if peak > 1.0: | |
wav = 0.98 * wav / peak | |
wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr) | |
wav2 /= max(wav2.max(), -wav2.min()) | |
for i, seg in enumerate(segments): | |
start_time = seg['start'] | |
end_time = seg['end'] | |
wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)] | |
wav_seg_name = f"{file_idx}_{i}.wav" | |
out_fpath = save_path / wav_seg_name | |
wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16)) | |
whisper_size = "medium" | |
whisper_model = whisper.load_model(whisper_size) | |
from modelscope.tools import run_auto_label | |
from modelscope.models.audio.tts import SambertHifigan | |
from modelscope.pipelines import pipeline | |
from modelscope.utils.constant import Tasks | |
from modelscope.metainfo import Trainers | |
from modelscope.trainers import build_trainer | |
from modelscope.utils.audio.audio_utils import TtsTrainType | |
pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k' | |
dataset_id = "/home/user/app/output_training_data/" | |
pretrain_work_dir = "/home/user/app/pretrain_work_dir/" | |
def auto_label(Voicetoclone, VoiceMicrophone): | |
if VoiceMicrophone is not None: | |
audio = VoiceMicrophone | |
else: | |
audio = Voicetoclone | |
try: | |
split_long_audio(whisper_model, audio, "/home/user/app/test_wavs/") | |
input_wav = "/home/user/app/test_wavs/" | |
output_data = "/home/user/app/output_training_data/" | |
ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision="v1.0.7") | |
except Exception: | |
pass | |
return "标注成功" | |
def train(a): | |
try: | |
train_info = { | |
TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型 | |
'train_steps': 52, # 训练多少个step | |
'save_interval_steps': 50, # 每训练多少个step保存一次checkpoint | |
'log_interval': 10 # 每训练多少个step打印一次训练日志 | |
} | |
} | |
# 配置训练参数,指定数据集,临时工作目录和train_info | |
kwargs = dict( | |
model=pretrained_model_id, # 指定要finetune的模型 | |
model_revision = "v1.0.6", | |
work_dir=pretrain_work_dir, # 指定临时工作目录 | |
train_dataset=dataset_id, # 指定数据集id | |
train_type=train_info # 指定要训练类型及参数 | |
) | |
trainer = build_trainer(Trainers.speech_kantts_trainer, | |
default_args=kwargs) | |
trainer.train() | |
except Exception: | |
pass | |
return "训练完成" | |
import random | |
def infer(text): | |
model_dir = "/home/user/app/pretrain_work_dir/" | |
custom_infer_abs = { | |
'voice_name': | |
'F7', | |
'am_ckpt': | |
os.path.join(model_dir, 'tmp_am', 'ckpt'), | |
'am_config': | |
os.path.join(model_dir, 'tmp_am', 'config.yaml'), | |
'voc_ckpt': | |
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'), | |
'voc_config': | |
os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', | |
'config.yaml'), | |
'audio_config': | |
os.path.join(model_dir, 'data', 'audio_config.yaml'), | |
'se_file': | |
os.path.join(model_dir, 'data', 'se', 'se.npy') | |
} | |
kwargs = {'custom_ckpt': custom_infer_abs} | |
model_id = SambertHifigan(os.path.join(model_dir, "orig_model"), **kwargs) | |
inference = pipeline(task=Tasks.text_to_speech, model=model_id) | |
output = inference(input=text) | |
filename = str(random.randint(1, 1000000000000)) | |
with open(filename + "myfile.wav", mode='bx') as f: | |
f.write(output["output_wav"]) | |
return filename + "myfile.wav" | |
from textwrap import dedent | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("# <center>🥳🎶🎡 - Sambert中文声音克隆</center>") | |
gr.Markdown("## <center>🌟 - 训练3分钟,推理5秒钟,中英真实拟声 </center>") | |
gr.Markdown("### <center>🌊 - 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>") | |
with gr.Row(): | |
with gr.Column(): | |
inp1 = gr.Audio(type="filepath", source="upload", label="方案一:请从本地上传一段语音") | |
inp_micro = gr.Audio(type="filepath", source="microphone", label="方案二:请用麦克风录制您的声音") | |
with gr.Column(): | |
out1 = gr.Textbox(label="标注情况", lines=1, interactive=False) | |
out2 = gr.Textbox(label="训练情况", lines=1, interactive=False) | |
inp2 = gr.Textbox(label="请在这里填写您想合成的文本", placeholder="想说却还没说的 还很多...", lines=3) | |
with gr.Column(): | |
out3 = gr.Audio(type="filepath", label="为您合成的专属音频") | |
with gr.Row(): | |
btn1 = gr.Button("1.标注数据") | |
btn2 = gr.Button("2.开始训练") | |
btn3 = gr.Button("3.一键推理", variant="primary") | |
btn1.click(auto_label, [inp1, inp_micro], out1) | |
btn2.click(train, out1, out2) | |
btn3.click(infer, inp2, out3) | |
with gr.Accordion("📒 使用指南", open=True): | |
_ = f""" 如何使用此程序: | |
* 使用方案一或方案二,上传一分钟左右的语音后,依次点击“1.标注数据”、“2.开始训练”、“3.一键推理”即可开启声音克隆之旅 | |
* 选择两个方案中的一个即可,程序会优先使用麦克风上传的语音;如果您需要从本地上传语音文件,请不要同时用方案二录制语音 | |
* 您可以随时编辑想要合成的文本内容,但请不要生成会对个人以及组织造成侵害的内容 | |
* 如果您需要用方案二录制您的声音,以下是一段长度合适的文本,供您朗读并录制: | |
我看到鸟儿飞到天空,它们飞得多快呀。明天它们再飞过同样的路线,也永远不是今天了。或许明天飞过这条路线的,不是老鸟,而是小鸟了。时间过得飞快,使我小心眼里不只是着急,还有悲伤。有一天我放学回家,看到太阳快落山了,就下决心说:“我要比太阳更快地回家。”我狂奔回去,站在庭院里喘气的时候,看到太阳还露着半边脸,我高兴地跳起来。那一天我跑赢了太阳。以后我常做这样的游戏,有时和太阳赛跑,有时和西北风比赛,有时一个暑假的作业,我十天就做完了。那时我三年级,常把哥哥五年级的作业拿来做。后来的二十年里,我因此受益无穷。虽然我知道人永远跑不过时间,但是可以比原来快跑几步。那几步虽然很小很小,但作用却很大很大。如果将来我有什么要教给我的孩子,我会告诉他:假若你一直和时间赛跑,你就可以成功。 | |
""" | |
gr.Markdown(dedent(_)) | |
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>") | |
gr.HTML(''' | |
<div class="footer"> | |
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘 | |
</p> | |
</div> | |
''') | |
app.launch(show_error=True) | |