"Open

### 0. 如果使用AutoDL,请运行下载packages的加速代码:

In [None]:
!source /etc/network_turbo
!python -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt

### 1. 数据集重采样和标注

In [None]:
import subprocess
import random
import os
from pathlib import Path
import librosa
from scipy.io import wavfile
import numpy as np
import torch
import csv
import whisper

a="linghua" # 请在这里修改说话人的名字,目前只支持中文语音

def split_long_audio(model, filepaths, save_dir="data_dir", out_sr=44100):
 if isinstance(filepaths, str):
 filepaths = [filepaths]

 for file_idx, filepath in enumerate(filepaths):

 save_path = Path(save_dir)
 save_path.mkdir(exist_ok=True, parents=True)

 print(f"Transcribing file {file_idx}: '{filepath}' to segments...")
 result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5)
 segments = result['segments']

 wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)
 wav, _ = librosa.effects.trim(wav, top_db=20)
 peak = np.abs(wav).max()
 if peak > 1.0:
 wav = 0.98 * wav / peak
 wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)
 wav2 /= max(wav2.max(), -wav2.min())

 for i, seg in enumerate(segments):
 start_time = seg['start']
 end_time = seg['end']
 wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]
 wav_seg_name = f"{a}_{i}.wav" # 在上方可修改名字
 out_fpath = save_path / wav_seg_name
 wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))

In [None]:
whisper_size = "large"
whisper_model = whisper.load_model(whisper_size)

### 请将下方的**linghua.wav**修改成自己的.wav文件名,路径./custom_character_voice/**linghua**/也可以改为自己的角色名


In [None]:
split_long_audio(whisper_model, "./linghua.wav", "./custom_character_voice/linghua/")

In [None]:
!python short_audio_transcribe.py --languages "C" --whisper_size large

#### 处理完成后,可以打开"./filelists/short_character_anno.list"文件进行微调

### 2. 文本处理

In [None]:
!python preprocess_text.py

### 3. 运行bert_gen.py

In [None]:
!python bert_gen.py

### 4. 训练

#### 可以在"./configs/config.json"更改训练参数,包括epoch,学习率等

In [None]:
cd monotonic_align

In [None]:
!python setup.py build_ext --inplace

In [None]:
cd ..

#### 若为首次训练,请运行:

In [None]:
!python train_ms.py -c ./configs/config.json

#### 若为继续训练,请运行:

In [None]:
!python train_ms.py -c ./configs/config.json --cont

### 5. 推理

#### 请将下方的**G_lastest.pth**修改为最新的模型文件,如**G_3400.pth**

In [None]:
!python inference_webui.py --model_dir ./logs/OUTPUT_MODEL/G_latest.pth