File size: 4,792 Bytes
c6070db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import IPython.display as ipd\n",
"\n",
"import os\n",
"import json\n",
"import math\n",
"import torch\n",
"from torch import nn\n",
"from torch.nn import functional as F\n",
"from torch.utils.data import DataLoader\n",
"\n",
"import ../commons\n",
"import ../utils\n",
"from ../data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate\n",
"from ../models import SynthesizerTrn\n",
"from ../text.symbols import symbols\n",
"from ../text import text_to_sequence\n",
"\n",
"from scipy.io.wavfile import write\n",
"\n",
"\n",
"def get_text(text, hps):\n",
" text_norm = text_to_sequence(text, hps.data.text_cleaners)\n",
" if hps.data.add_blank:\n",
" text_norm = commons.intersperse(text_norm, 0)\n",
" text_norm = torch.LongTensor(text_norm)\n",
" return text_norm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#############################################################\n",
"# #\n",
"# Single Speakers #\n",
"# #\n",
"#############################################################"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"configs/XXX.json\") #将\"\"内的内容修改为你的模型路径与config路径\n",
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
" torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
" audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#############################################################\n",
"# #\n",
"# Multiple Speakers #\n",
"# #\n",
"#############################################################"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hps = utils.get_hparams_from_file(\"./configs/XXX.json\") #将\"\"内的内容修改为你的模型路径与config路径\n",
"net_g = SynthesizerTrn(\n",
" len(symbols),\n",
" hps.data.filter_length // 2 + 1,\n",
" hps.train.segment_size // hps.data.hop_length,\n",
" n_speakers=hps.data.n_speakers,\n",
" **hps.model).cuda()\n",
"_ = net_g.eval()\n",
"\n",
"_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stn_tst = get_text(\"こんにちは\", hps)\n",
"with torch.no_grad():\n",
" x_tst = stn_tst.cuda().unsqueeze(0)\n",
" x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
" sid = torch.LongTensor([4]).cuda()\n",
" traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
" torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
" audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
"ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
|