kevinwang676 commited on
Commit
39b6e8b
·
1 Parent(s): d6c5415

Upload 4 files

Browse files
Files changed (4) hide show
  1. consts.py +3 -0
  2. utils_base.py +56 -0
  3. utils_label.py +99 -0
  4. utils_sambert.py +141 -0
consts.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ port = 6006
2
+ base_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'
3
+ base_model_version = 'v1.0.6'
utils_base.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 基础方法封装
2
+ import os
3
+ import shutil
4
+ import glob
5
+
6
+ # 绝对路径获取方法
7
+ curPath = os.path.dirname(os.path.abspath(__file__))
8
+ def getAbsPath (relativePath):
9
+ joinPath = os.path.join(curPath, relativePath)
10
+ return os.path.normpath(
11
+ os.path.abspath(joinPath)
12
+ )
13
+
14
+ # 数据集存放路径
15
+ datasets_dir = getAbsPath('./datasets')
16
+ if not shutil.os.path.exists(datasets_dir):
17
+ shutil.os.makedirs(datasets_dir)
18
+
19
+ # 获取数据集列表 ----------------------------------------------------
20
+ def get_dataset_list():
21
+ contents = os.listdir(datasets_dir)
22
+ sub_dirs = [
23
+ content
24
+ for content in contents
25
+ if os.path.isdir(os.path.join(datasets_dir, content))
26
+ ]
27
+ return sub_dirs
28
+
29
+ # 小模型存放路径
30
+ models_dir = getAbsPath('./models')
31
+ if not shutil.os.path.exists(models_dir):
32
+ shutil.os.makedirs(models_dir)
33
+
34
+ # 获取模型列表 ----------------------------------------------------
35
+ def get_model_list():
36
+ contents = os.listdir(models_dir)
37
+ sub_dirs = [
38
+ content
39
+ for content in contents
40
+ if os.path.isdir(os.path.join(models_dir, content))
41
+ ]
42
+ return sub_dirs
43
+
44
+ # 确保对应的空目录存在
45
+ def ensure_empty_dir(dirpath):
46
+ if shutil.os.path.exists(dirpath):
47
+ shutil.rmtree(dirpath)
48
+ shutil.os.makedirs(dirpath)
49
+
50
+ # 获取目录中的最后一个文件
51
+ def get_last_file(dirpath):
52
+ files = glob.glob(os.path.join(dirpath, '*'))
53
+ sorted_files = sorted(files, key=os.path.basename)
54
+ if sorted_files:
55
+ return sorted_files[-1]
56
+ return False
utils_label.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 对 sambert 训练的数据标注处理
2
+ import os
3
+ import shutil
4
+ import uuid
5
+ import librosa
6
+ import gradio as gr
7
+ from scipy.io import wavfile
8
+ import numpy as np
9
+ import whisper
10
+ from modelscope.tools import run_auto_label
11
+ from utils_base import ensure_empty_dir, datasets_dir, get_dataset_list
12
+
13
+ # 绝对路径获取方法
14
+ curPath = os.path.dirname(os.path.abspath(__file__))
15
+ def getAbsPath (relativePath):
16
+ joinPath = os.path.join(curPath, relativePath)
17
+ return os.path.normpath(
18
+ os.path.abspath(joinPath)
19
+ )
20
+
21
+ # 初始化 whisper 模型的加载
22
+ model_path = getAbsPath('../../models/whisper/medium.pt')
23
+ whisper_model = None
24
+ if shutil.os.path.exists(model_path):
25
+ whisper_model = whisper.load_model(model_path)
26
+ else:
27
+ whisper_model = whisper.load_model('medium')
28
+
29
+ # whisper 音频分割方法 ----------------------------------------------
30
+ def split_long_audio(model, filepaths, save_path, out_sr=44100):
31
+ # 格式化输入的音频路径(兼容单个音频和多个音频)
32
+ if isinstance(filepaths, str):
33
+ filepaths = [filepaths]
34
+
35
+ # 对音频依次做拆分并存放到临时路径
36
+ for file_idx, filepath in enumerate(filepaths):
37
+ print(f"Transcribing file {file_idx}: '{filepath}' to segments...")
38
+ result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5)
39
+ segments = result['segments']
40
+
41
+ # 采用 librosa 配合 scipy 做音频数据分割
42
+ wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)
43
+ wav, _ = librosa.effects.trim(wav, top_db=20)
44
+ peak = np.abs(wav).max()
45
+ if peak > 1.0:
46
+ wav = 0.98 * wav / peak
47
+ wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)
48
+ wav2 /= max(wav2.max(), -wav2.min())
49
+
50
+ # 将长音频文件分割成一条条的短音频并放入指定的目录
51
+ for i, seg in enumerate(segments):
52
+ start_time = seg['start']
53
+ end_time = seg['end']
54
+ wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]
55
+ wav_seg_name = f"{file_idx}_{i}.wav"
56
+ out_fpath = os.path.join(save_path, wav_seg_name)
57
+ wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))
58
+
59
+ # 自动标注与标注后的文件打包 --------------------------------------------
60
+ def auto_label(audio, name):
61
+ if not audio or not name:
62
+ return '', gr.update(choices=get_dataset_list())
63
+
64
+ # 创建临时目录用于存放分割后的音频与再次标注的信息
65
+ input_wav = getAbsPath(f'./temp/input-{ uuid.uuid4() }')
66
+ ensure_empty_dir(input_wav)
67
+
68
+ work_dir = os.path.join(datasets_dir, name)
69
+ ensure_empty_dir(work_dir)
70
+
71
+ # 音频分割
72
+ split_long_audio(whisper_model, audio, input_wav)
73
+
74
+ # 音频自动标注
75
+ # 第一次会自动下载对应的模型
76
+ run_auto_label(
77
+ input_wav=input_wav,
78
+ work_dir=work_dir,
79
+ resource_revision='v1.0.7'
80
+ )
81
+
82
+ # 移除目录
83
+ shutil.rmtree(input_wav)
84
+
85
+ # 返回结果
86
+ return '打标成功', gr.update(choices=get_dataset_list())
87
+
88
+ # 删除数据集 ----------------------------------------------------
89
+ # name - 删除的数据集名称
90
+ def delete_dataset(name):
91
+ try:
92
+ if not name:
93
+ return gr.update(choices=get_dataset_list())
94
+
95
+ target_dir = os.path.join(datasets_dir, name)
96
+ shutil.rmtree(target_dir)
97
+ return gr.update(choices=get_dataset_list(), value=None)
98
+ except Exception:
99
+ return gr.update(choices=get_dataset_list(), value=None)
utils_sambert.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 训练部分实现
2
+ import os
3
+ import shutil
4
+ import uuid
5
+ import gradio as gr
6
+ from modelscope.models.audio.tts import SambertHifigan
7
+ from modelscope.pipelines import pipeline
8
+ from modelscope.utils.constant import Tasks
9
+
10
+ from modelscope.metainfo import Trainers
11
+ from modelscope.trainers import build_trainer
12
+ from modelscope.utils.audio.audio_utils import TtsTrainType
13
+ from modelscope.hub.utils.utils import get_cache_dir
14
+
15
+ from utils_base import ensure_empty_dir, get_last_file, models_dir, get_model_list
16
+ import consts
17
+
18
+ # 绝对路径获取方法
19
+ curPath = os.path.dirname(os.path.abspath(__file__))
20
+ def getAbsPath (relativePath):
21
+ joinPath = os.path.join(curPath, relativePath)
22
+ return os.path.normpath(
23
+ os.path.abspath(joinPath)
24
+ )
25
+
26
+ # 模型训练 ---------------------------------------------------------
27
+ # name - 训练结果(小模型)命名
28
+ # steps - 训练步数
29
+ # train_dataset_zip - 数据集zip包路径
30
+ def train(name, steps, train_dataset_name):
31
+ # 创建临时目录用于放置 训练结果
32
+ work_dir = getAbsPath(f'./temp/work-{ uuid.uuid4() }')
33
+ ensure_empty_dir(work_dir)
34
+
35
+ # 数据集目录
36
+ train_dataset = getAbsPath(f'./datasets/{ train_dataset_name }')
37
+
38
+ # 进行训练
39
+ trainer = build_trainer(
40
+ Trainers.speech_kantts_trainer,
41
+ default_args=dict(
42
+ # 指定要finetune的 模型/版本
43
+ model = consts.base_model_id,
44
+ model_revision = consts.base_model_version,
45
+
46
+ work_dir = work_dir, # 指定临时工作目录
47
+ train_dataset = train_dataset, # 数据集目录
48
+
49
+ # 训练参数
50
+ train_type = {
51
+ TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型
52
+ 'train_steps': steps + 1, # 训练多少个step
53
+ 'save_interval_steps': 20, # 每训练多少个step保存一次checkpoint
54
+ 'log_interval': 10 # 每训练多少个step打印一次训练日志
55
+ }
56
+ }
57
+ )
58
+ )
59
+ trainer.train()
60
+
61
+ # 挑选需要的文件到结果目录
62
+ target_dir = os.path.join(models_dir, name)
63
+ ensure_empty_dir(target_dir)
64
+ shutil.os.makedirs(os.path.join(target_dir, 'tmp_am', 'ckpt'))
65
+ shutil.os.makedirs(os.path.join(target_dir, 'data', 'se'))
66
+
67
+ shutil.copy(
68
+ get_last_file(os.path.join(work_dir, 'tmp_am', 'ckpt')),
69
+ os.path.join(target_dir, 'tmp_am', 'ckpt')
70
+ )
71
+ shutil.copy(
72
+ os.path.join(work_dir, 'tmp_am', 'config.yaml'),
73
+ os.path.join(target_dir, 'tmp_am'),
74
+ )
75
+ shutil.copy(
76
+ os.path.join(work_dir, 'data', 'audio_config.yaml'),
77
+ os.path.join(target_dir, 'data'),
78
+ )
79
+ shutil.copy(
80
+ os.path.join(work_dir, 'data', 'se', 'se.npy'),
81
+ os.path.join(target_dir, 'data', 'se'),
82
+ )
83
+
84
+ # 清理文件
85
+ shutil.rmtree(work_dir)
86
+ shutil.rmtree(train_dataset)
87
+
88
+ # 返回结果
89
+ return '训练完成', gr.update(choices=get_model_list())
90
+
91
+ # 模型推理 ---------------------------------------------------------
92
+ # name - 使用的小模型名称
93
+ # txt - 需要合成音频的文字
94
+ def infer(name, txt):
95
+ try:
96
+ base_model_path = os.path.join(get_cache_dir(), consts.base_model_id)
97
+ model_path = os.path.join(models_dir, name)
98
+ custom_infer_abs = {
99
+ 'voice_name': 'F7',
100
+
101
+ # 小模型部分
102
+ 'am_ckpt': os.path.join(model_path, 'tmp_am', 'ckpt'),
103
+ 'am_config': os.path.join(model_path, 'tmp_am', 'config.yaml'),
104
+ 'audio_config': os.path.join(model_path, 'data', 'audio_config.yaml'),
105
+ 'se_file': os.path.join(model_path, 'data', 'se', 'se.npy'),
106
+
107
+ # 基础模型部分
108
+ 'voc_ckpt': os.path.join(
109
+ base_model_path, 'basemodel_16k', 'hifigan', 'ckpt'
110
+ ),
111
+ 'voc_config': os.path.join(
112
+ base_model_path, 'basemodel_16k', 'hifigan', 'config.yaml'
113
+ )
114
+ }
115
+
116
+ model = SambertHifigan(
117
+ base_model_path,
118
+ **{ 'custom_ckpt': custom_infer_abs }
119
+ )
120
+ inference = pipeline(task=Tasks.text_to_speech, model=model)
121
+ output = inference(input=txt)
122
+
123
+ output_path = f'/tmp/{ uuid.uuid4() }.wav'
124
+ with open(output_path, mode='bx') as f:
125
+ f.write(output['output_wav'])
126
+ return output_path
127
+ except Exception:
128
+ return False
129
+
130
+ # 删除模型 ---------------------------------------------------------
131
+ # name - 删除的小模型名称
132
+ def delete_model(name):
133
+ try:
134
+ if not name:
135
+ return gr.update(choices=get_model_list())
136
+
137
+ target_dir = os.path.join(models_dir, name)
138
+ shutil.rmtree(target_dir)
139
+ return gr.update(choices=get_model_list(), value=None)
140
+ except Exception:
141
+ return gr.update(choices=get_model_list(), value=None)