{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyP+PQNqeOew0Ap+hzVH7i8r", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "### 0. 如果使用AutoDL,请运行下载packages的加速代码:" ], "metadata": { "id": "CGg4SV4ObQaT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "MgfAJzoHbK2-" }, "outputs": [], "source": [ "!source /etc/network_turbo\n", "!python -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt" ] }, { "cell_type": "markdown", "source": [ "### 1. 数据集重采样和标注" ], "metadata": { "id": "sloMn00-bgxY" } }, { "cell_type": "code", "source": [ "import subprocess\n", "import random\n", "import os\n", "from pathlib import Path\n", "import librosa\n", "from scipy.io import wavfile\n", "import numpy as np\n", "import torch\n", "import csv\n", "import whisper\n", "\n", "a=\"linghua\" # 请在这里修改说话人的名字,目前只支持中文语音\n", "\n", "def split_long_audio(model, filepaths, save_dir=\"data_dir\", out_sr=44100):\n", " if isinstance(filepaths, str):\n", " filepaths = [filepaths]\n", "\n", " for file_idx, filepath in enumerate(filepaths):\n", "\n", " save_path = Path(save_dir)\n", " save_path.mkdir(exist_ok=True, parents=True)\n", "\n", " print(f\"Transcribing file {file_idx}: '{filepath}' to segments...\")\n", " result = model.transcribe(filepath, word_timestamps=True, task=\"transcribe\", beam_size=5, best_of=5)\n", " segments = result['segments']\n", "\n", " wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)\n", " wav, _ = librosa.effects.trim(wav, top_db=20)\n", " peak = np.abs(wav).max()\n", " if peak > 1.0:\n", " wav = 0.98 * wav / peak\n", " wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)\n", " wav2 /= max(wav2.max(), -wav2.min())\n", "\n", " for i, seg in enumerate(segments):\n", " start_time = seg['start']\n", " end_time = seg['end']\n", " wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]\n", " wav_seg_name = f\"{a}_{i}.wav\" # 在上方可修改名字\n", " out_fpath = save_path / wav_seg_name\n", " wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))" ], "metadata": { "id": "LtLBGhGCbYYh" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "whisper_size = \"large\"\n", "whisper_model = whisper.load_model(whisper_size)" ], "metadata": { "id": "--wS7X95b--m" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### 请将下方的**linghua.wav**修改成自己的.wav文件名,路径./custom_character_voice/**linghua**/也可以改为自己的角色名\n" ], "metadata": { "id": "0wAE5HRXcCQ_" } }, { "cell_type": "code", "source": [ "split_long_audio(whisper_model, \"./linghua.wav\", \"./custom_character_voice/linghua/\")" ], "metadata": { "id": "3f7ljJhCcEbd" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!python short_audio_transcribe.py --languages \"C\" --whisper_size large" ], "metadata": { "id": "rBJDPe3ccVrP" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "#### 处理完成后,可以打开\"./filelists/short_character_anno.list\"文件进行微调" ], "metadata": { "id": "4pesbcMjcikn" } }, { "cell_type": "markdown", "source": [ "### 2. 文本处理" ], "metadata": { "id": "9pxo4KL-ceGI" } }, { "cell_type": "code", "source": [ "!python preprocess_text.py" ], "metadata": { "id": "_xfO2r_0cgCT" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### 3. 运行bert_gen.py" ], "metadata": { "id": "DoDs7lL6cu01" } }, { "cell_type": "code", "source": [ "!python bert_gen.py" ], "metadata": { "id": "jyiT28B3cxWX" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### 4. 训练" ], "metadata": { "id": "dHQPDFdbc04g" } }, { "cell_type": "markdown", "source": [ "#### 可以在\"./configs/config.json\"更改训练参数,包括epoch,学习率等" ], "metadata": { "id": "gHNws-IUc6Sd" } }, { "cell_type": "code", "source": [ "cd monotonic_align" ], "metadata": { "id": "S56s0emH8BqN" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!python setup.py build_ext --inplace" ], "metadata": { "id": "6rLsyel-8KKc" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "cd .." ], "metadata": { "id": "mUgA6ho2-XAN" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "#### 若为首次训练,请运行:" ], "metadata": { "id": "8vJ6VF__dCYW" } }, { "cell_type": "code", "source": [ "!python train_ms.py -c ./configs/config.json" ], "metadata": { "id": "iwHCWVijc5h6" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "#### 若为继续训练,请运行:" ], "metadata": { "id": "skAGULw2dKXW" } }, { "cell_type": "code", "source": [ "!python train_ms.py -c ./configs/config.json --cont" ], "metadata": { "id": "Ru09Gmavc2t4" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### 5. 推理" ], "metadata": { "id": "IinmucfadVLU" } }, { "cell_type": "markdown", "source": [ "#### 请将下方的**G_lastest.pth**修改为最新的模型文件,如**G_3400.pth**" ], "metadata": { "id": "psBRLH_TdZDb" } }, { "cell_type": "code", "source": [ "!python inference_webui.py --model_dir ./logs/OUTPUT_MODEL/G_latest.pth" ], "metadata": { "id": "lOWVtUgMdUZa" }, "execution_count": null, "outputs": [] } ] }