Spaces:
Runtime error
Runtime error
Commit
·
39b6e8b
1
Parent(s):
d6c5415
Upload 4 files
Browse files- consts.py +3 -0
- utils_base.py +56 -0
- utils_label.py +99 -0
- utils_sambert.py +141 -0
consts.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
port = 6006
|
2 |
+
base_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
|
3 |
+
base_model_version = 'v1.0.6'
|
utils_base.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 基础方法封装
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
import glob
|
5 |
+
|
6 |
+
# 绝对路径获取方法
|
7 |
+
curPath = os.path.dirname(os.path.abspath(__file__))
|
8 |
+
def getAbsPath (relativePath):
|
9 |
+
joinPath = os.path.join(curPath, relativePath)
|
10 |
+
return os.path.normpath(
|
11 |
+
os.path.abspath(joinPath)
|
12 |
+
)
|
13 |
+
|
14 |
+
# 数据集存放路径
|
15 |
+
datasets_dir = getAbsPath('./datasets')
|
16 |
+
if not shutil.os.path.exists(datasets_dir):
|
17 |
+
shutil.os.makedirs(datasets_dir)
|
18 |
+
|
19 |
+
# 获取数据集列表 ----------------------------------------------------
|
20 |
+
def get_dataset_list():
|
21 |
+
contents = os.listdir(datasets_dir)
|
22 |
+
sub_dirs = [
|
23 |
+
content
|
24 |
+
for content in contents
|
25 |
+
if os.path.isdir(os.path.join(datasets_dir, content))
|
26 |
+
]
|
27 |
+
return sub_dirs
|
28 |
+
|
29 |
+
# 小模型存放路径
|
30 |
+
models_dir = getAbsPath('./models')
|
31 |
+
if not shutil.os.path.exists(models_dir):
|
32 |
+
shutil.os.makedirs(models_dir)
|
33 |
+
|
34 |
+
# 获取模型列表 ----------------------------------------------------
|
35 |
+
def get_model_list():
|
36 |
+
contents = os.listdir(models_dir)
|
37 |
+
sub_dirs = [
|
38 |
+
content
|
39 |
+
for content in contents
|
40 |
+
if os.path.isdir(os.path.join(models_dir, content))
|
41 |
+
]
|
42 |
+
return sub_dirs
|
43 |
+
|
44 |
+
# 确保对应的空目录存在
|
45 |
+
def ensure_empty_dir(dirpath):
|
46 |
+
if shutil.os.path.exists(dirpath):
|
47 |
+
shutil.rmtree(dirpath)
|
48 |
+
shutil.os.makedirs(dirpath)
|
49 |
+
|
50 |
+
# 获取目录中的最后一个文件
|
51 |
+
def get_last_file(dirpath):
|
52 |
+
files = glob.glob(os.path.join(dirpath, '*'))
|
53 |
+
sorted_files = sorted(files, key=os.path.basename)
|
54 |
+
if sorted_files:
|
55 |
+
return sorted_files[-1]
|
56 |
+
return False
|
utils_label.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 对 sambert 训练的数据标注处理
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
import uuid
|
5 |
+
import librosa
|
6 |
+
import gradio as gr
|
7 |
+
from scipy.io import wavfile
|
8 |
+
import numpy as np
|
9 |
+
import whisper
|
10 |
+
from modelscope.tools import run_auto_label
|
11 |
+
from utils_base import ensure_empty_dir, datasets_dir, get_dataset_list
|
12 |
+
|
13 |
+
# 绝对路径获取方法
|
14 |
+
curPath = os.path.dirname(os.path.abspath(__file__))
|
15 |
+
def getAbsPath (relativePath):
|
16 |
+
joinPath = os.path.join(curPath, relativePath)
|
17 |
+
return os.path.normpath(
|
18 |
+
os.path.abspath(joinPath)
|
19 |
+
)
|
20 |
+
|
21 |
+
# 初始化 whisper 模型的加载
|
22 |
+
model_path = getAbsPath('../../models/whisper/medium.pt')
|
23 |
+
whisper_model = None
|
24 |
+
if shutil.os.path.exists(model_path):
|
25 |
+
whisper_model = whisper.load_model(model_path)
|
26 |
+
else:
|
27 |
+
whisper_model = whisper.load_model('medium')
|
28 |
+
|
29 |
+
# whisper 音频分割方法 ----------------------------------------------
|
30 |
+
def split_long_audio(model, filepaths, save_path, out_sr=44100):
|
31 |
+
# 格式化输入的音频路径(兼容单个音频和多个音频)
|
32 |
+
if isinstance(filepaths, str):
|
33 |
+
filepaths = [filepaths]
|
34 |
+
|
35 |
+
# 对音频依次做拆分并存放到临时路径
|
36 |
+
for file_idx, filepath in enumerate(filepaths):
|
37 |
+
print(f"Transcribing file {file_idx}: '{filepath}' to segments...")
|
38 |
+
result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5)
|
39 |
+
segments = result['segments']
|
40 |
+
|
41 |
+
# 采用 librosa 配合 scipy 做音频数据分割
|
42 |
+
wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)
|
43 |
+
wav, _ = librosa.effects.trim(wav, top_db=20)
|
44 |
+
peak = np.abs(wav).max()
|
45 |
+
if peak > 1.0:
|
46 |
+
wav = 0.98 * wav / peak
|
47 |
+
wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)
|
48 |
+
wav2 /= max(wav2.max(), -wav2.min())
|
49 |
+
|
50 |
+
# 将长音频文件分割成一条条的短音频并放入指定的目录
|
51 |
+
for i, seg in enumerate(segments):
|
52 |
+
start_time = seg['start']
|
53 |
+
end_time = seg['end']
|
54 |
+
wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]
|
55 |
+
wav_seg_name = f"{file_idx}_{i}.wav"
|
56 |
+
out_fpath = os.path.join(save_path, wav_seg_name)
|
57 |
+
wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))
|
58 |
+
|
59 |
+
# 自动标注与标注后的文件打包 --------------------------------------------
|
60 |
+
def auto_label(audio, name):
|
61 |
+
if not audio or not name:
|
62 |
+
return '', gr.update(choices=get_dataset_list())
|
63 |
+
|
64 |
+
# 创建临时目录用于存放分割后的音频与再次标注的信息
|
65 |
+
input_wav = getAbsPath(f'./temp/input-{ uuid.uuid4() }')
|
66 |
+
ensure_empty_dir(input_wav)
|
67 |
+
|
68 |
+
work_dir = os.path.join(datasets_dir, name)
|
69 |
+
ensure_empty_dir(work_dir)
|
70 |
+
|
71 |
+
# 音频分割
|
72 |
+
split_long_audio(whisper_model, audio, input_wav)
|
73 |
+
|
74 |
+
# 音频自动标注
|
75 |
+
# 第一次会自动下载对应的模型
|
76 |
+
run_auto_label(
|
77 |
+
input_wav=input_wav,
|
78 |
+
work_dir=work_dir,
|
79 |
+
resource_revision='v1.0.7'
|
80 |
+
)
|
81 |
+
|
82 |
+
# 移除目录
|
83 |
+
shutil.rmtree(input_wav)
|
84 |
+
|
85 |
+
# 返回结果
|
86 |
+
return '打标成功', gr.update(choices=get_dataset_list())
|
87 |
+
|
88 |
+
# 删除数据集 ----------------------------------------------------
|
89 |
+
# name - 删除的数据集名称
|
90 |
+
def delete_dataset(name):
|
91 |
+
try:
|
92 |
+
if not name:
|
93 |
+
return gr.update(choices=get_dataset_list())
|
94 |
+
|
95 |
+
target_dir = os.path.join(datasets_dir, name)
|
96 |
+
shutil.rmtree(target_dir)
|
97 |
+
return gr.update(choices=get_dataset_list(), value=None)
|
98 |
+
except Exception:
|
99 |
+
return gr.update(choices=get_dataset_list(), value=None)
|
utils_sambert.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 训练部分实现
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
import uuid
|
5 |
+
import gradio as gr
|
6 |
+
from modelscope.models.audio.tts import SambertHifigan
|
7 |
+
from modelscope.pipelines import pipeline
|
8 |
+
from modelscope.utils.constant import Tasks
|
9 |
+
|
10 |
+
from modelscope.metainfo import Trainers
|
11 |
+
from modelscope.trainers import build_trainer
|
12 |
+
from modelscope.utils.audio.audio_utils import TtsTrainType
|
13 |
+
from modelscope.hub.utils.utils import get_cache_dir
|
14 |
+
|
15 |
+
from utils_base import ensure_empty_dir, get_last_file, models_dir, get_model_list
|
16 |
+
import consts
|
17 |
+
|
18 |
+
# 绝对路径获取方法
|
19 |
+
curPath = os.path.dirname(os.path.abspath(__file__))
|
20 |
+
def getAbsPath (relativePath):
|
21 |
+
joinPath = os.path.join(curPath, relativePath)
|
22 |
+
return os.path.normpath(
|
23 |
+
os.path.abspath(joinPath)
|
24 |
+
)
|
25 |
+
|
26 |
+
# 模型训练 ---------------------------------------------------------
|
27 |
+
# name - 训练结果(小模型)命名
|
28 |
+
# steps - 训练步数
|
29 |
+
# train_dataset_zip - 数据集zip包路径
|
30 |
+
def train(name, steps, train_dataset_name):
|
31 |
+
# 创建临时目录用于放置 训练结果
|
32 |
+
work_dir = getAbsPath(f'./temp/work-{ uuid.uuid4() }')
|
33 |
+
ensure_empty_dir(work_dir)
|
34 |
+
|
35 |
+
# 数据集目录
|
36 |
+
train_dataset = getAbsPath(f'./datasets/{ train_dataset_name }')
|
37 |
+
|
38 |
+
# 进行训练
|
39 |
+
trainer = build_trainer(
|
40 |
+
Trainers.speech_kantts_trainer,
|
41 |
+
default_args=dict(
|
42 |
+
# 指定要finetune的 模型/版本
|
43 |
+
model = consts.base_model_id,
|
44 |
+
model_revision = consts.base_model_version,
|
45 |
+
|
46 |
+
work_dir = work_dir, # 指定临时工作目录
|
47 |
+
train_dataset = train_dataset, # 数据集目录
|
48 |
+
|
49 |
+
# 训练参数
|
50 |
+
train_type = {
|
51 |
+
TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型
|
52 |
+
'train_steps': steps + 1, # 训练多少个step
|
53 |
+
'save_interval_steps': 20, # 每训练多少个step保存一次checkpoint
|
54 |
+
'log_interval': 10 # 每训练多少个step打印一次训练日志
|
55 |
+
}
|
56 |
+
}
|
57 |
+
)
|
58 |
+
)
|
59 |
+
trainer.train()
|
60 |
+
|
61 |
+
# 挑选需要的文件到结果目录
|
62 |
+
target_dir = os.path.join(models_dir, name)
|
63 |
+
ensure_empty_dir(target_dir)
|
64 |
+
shutil.os.makedirs(os.path.join(target_dir, 'tmp_am', 'ckpt'))
|
65 |
+
shutil.os.makedirs(os.path.join(target_dir, 'data', 'se'))
|
66 |
+
|
67 |
+
shutil.copy(
|
68 |
+
get_last_file(os.path.join(work_dir, 'tmp_am', 'ckpt')),
|
69 |
+
os.path.join(target_dir, 'tmp_am', 'ckpt')
|
70 |
+
)
|
71 |
+
shutil.copy(
|
72 |
+
os.path.join(work_dir, 'tmp_am', 'config.yaml'),
|
73 |
+
os.path.join(target_dir, 'tmp_am'),
|
74 |
+
)
|
75 |
+
shutil.copy(
|
76 |
+
os.path.join(work_dir, 'data', 'audio_config.yaml'),
|
77 |
+
os.path.join(target_dir, 'data'),
|
78 |
+
)
|
79 |
+
shutil.copy(
|
80 |
+
os.path.join(work_dir, 'data', 'se', 'se.npy'),
|
81 |
+
os.path.join(target_dir, 'data', 'se'),
|
82 |
+
)
|
83 |
+
|
84 |
+
# 清理文件
|
85 |
+
shutil.rmtree(work_dir)
|
86 |
+
shutil.rmtree(train_dataset)
|
87 |
+
|
88 |
+
# 返回结果
|
89 |
+
return '训练完成', gr.update(choices=get_model_list())
|
90 |
+
|
91 |
+
# 模型推理 ---------------------------------------------------------
|
92 |
+
# name - 使用的小模型名称
|
93 |
+
# txt - 需要合成音频的文字
|
94 |
+
def infer(name, txt):
|
95 |
+
try:
|
96 |
+
base_model_path = os.path.join(get_cache_dir(), consts.base_model_id)
|
97 |
+
model_path = os.path.join(models_dir, name)
|
98 |
+
custom_infer_abs = {
|
99 |
+
'voice_name': 'F7',
|
100 |
+
|
101 |
+
# 小模型部分
|
102 |
+
'am_ckpt': os.path.join(model_path, 'tmp_am', 'ckpt'),
|
103 |
+
'am_config': os.path.join(model_path, 'tmp_am', 'config.yaml'),
|
104 |
+
'audio_config': os.path.join(model_path, 'data', 'audio_config.yaml'),
|
105 |
+
'se_file': os.path.join(model_path, 'data', 'se', 'se.npy'),
|
106 |
+
|
107 |
+
# 基础模型部分
|
108 |
+
'voc_ckpt': os.path.join(
|
109 |
+
base_model_path, 'basemodel_16k', 'hifigan', 'ckpt'
|
110 |
+
),
|
111 |
+
'voc_config': os.path.join(
|
112 |
+
base_model_path, 'basemodel_16k', 'hifigan', 'config.yaml'
|
113 |
+
)
|
114 |
+
}
|
115 |
+
|
116 |
+
model = SambertHifigan(
|
117 |
+
base_model_path,
|
118 |
+
**{ 'custom_ckpt': custom_infer_abs }
|
119 |
+
)
|
120 |
+
inference = pipeline(task=Tasks.text_to_speech, model=model)
|
121 |
+
output = inference(input=txt)
|
122 |
+
|
123 |
+
output_path = f'/tmp/{ uuid.uuid4() }.wav'
|
124 |
+
with open(output_path, mode='bx') as f:
|
125 |
+
f.write(output['output_wav'])
|
126 |
+
return output_path
|
127 |
+
except Exception:
|
128 |
+
return False
|
129 |
+
|
130 |
+
# 删除模型 ---------------------------------------------------------
|
131 |
+
# name - 删除的小模型名称
|
132 |
+
def delete_model(name):
|
133 |
+
try:
|
134 |
+
if not name:
|
135 |
+
return gr.update(choices=get_model_list())
|
136 |
+
|
137 |
+
target_dir = os.path.join(models_dir, name)
|
138 |
+
shutil.rmtree(target_dir)
|
139 |
+
return gr.update(choices=get_model_list(), value=None)
|
140 |
+
except Exception:
|
141 |
+
return gr.update(choices=get_model_list(), value=None)
|