File size: 4,792 Bytes
c6070db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import IPython.display as ipd\n",
    "\n",
    "import os\n",
    "import json\n",
    "import math\n",
    "import torch\n",
    "from torch import nn\n",
    "from torch.nn import functional as F\n",
    "from torch.utils.data import DataLoader\n",
    "\n",
    "import ../commons\n",
    "import ../utils\n",
    "from ../data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate\n",
    "from ../models import SynthesizerTrn\n",
    "from ../text.symbols import symbols\n",
    "from ../text import text_to_sequence\n",
    "\n",
    "from scipy.io.wavfile import write\n",
    "\n",
    "\n",
    "def get_text(text, hps):\n",
    "    text_norm = text_to_sequence(text, hps.data.text_cleaners)\n",
    "    if hps.data.add_blank:\n",
    "        text_norm = commons.intersperse(text_norm, 0)\n",
    "    text_norm = torch.LongTensor(text_norm)\n",
    "    return text_norm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#############################################################\n",
    "#                                                           #\n",
    "#                      Single Speakers                      #\n",
    "#                                                           #\n",
    "#############################################################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hps = utils.get_hparams_from_file(\"configs/XXX.json\")  #将\"\"内的内容修改为你的模型路径与config路径\n",
    "net_g = SynthesizerTrn(\n",
    "    len(symbols),\n",
    "    hps.data.filter_length // 2 + 1,\n",
    "    hps.train.segment_size // hps.data.hop_length,\n",
    "    **hps.model).cuda()\n",
    "_ = net_g.eval()\n",
    "\n",
    "_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stn_tst = get_text(\"こんにちは\", hps)\n",
    "with torch.no_grad():\n",
    "    x_tst = stn_tst.cuda().unsqueeze(0)\n",
    "    x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
    "    traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
    "    torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
    "    audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
    "ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#############################################################\n",
    "#                                                           #\n",
    "#                     Multiple Speakers                     #\n",
    "#                                                           #\n",
    "#############################################################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hps = utils.get_hparams_from_file(\"./configs/XXX.json\")  #将\"\"内的内容修改为你的模型路径与config路径\n",
    "net_g = SynthesizerTrn(\n",
    "    len(symbols),\n",
    "    hps.data.filter_length // 2 + 1,\n",
    "    hps.train.segment_size // hps.data.hop_length,\n",
    "    n_speakers=hps.data.n_speakers,\n",
    "    **hps.model).cuda()\n",
    "_ = net_g.eval()\n",
    "\n",
    "_ = utils.load_checkpoint(\"/path/to/model.pth\", net_g, None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stn_tst = get_text(\"こんにちは\", hps)\n",
    "with torch.no_grad():\n",
    "    x_tst = stn_tst.cuda().unsqueeze(0)\n",
    "    x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
    "    sid = torch.LongTensor([4]).cuda()\n",
    "    traced_mod = torch.jit.trace(net_g,(x_tst, x_tst_lengths,sid))\n",
    "    torch.jit.save(traced_mod,\"OUTPUTLIBTORCHMODEL.pt\")\n",
    "    audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()\n",
    "ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}