PhoenixStormJr commited on
Commit
fb17485
·
verified ·
1 Parent(s): ac44c16

Update export_onnx.py

Browse files
Files changed (1) hide show
  1. export_onnx.py +54 -54
export_onnx.py CHANGED
@@ -1,54 +1,54 @@
1
- from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
2
- import torch
3
-
4
- if __name__ == "__main__":
5
- MoeVS = True # 模型是否为MoeVoiceStudio(原MoeSS)使用
6
-
7
- ModelPath = "Shiroha/shiroha.pth" # 模型路径
8
- ExportedPath = "model.onnx" # 输出路径
9
- hidden_channels = 256 # hidden_channels,为768Vec做准备
10
- cpt = torch.load(ModelPath, map_location="cpu")
11
- cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
12
- print(*cpt["config"])
13
-
14
- test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
15
- test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
16
- test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
17
- test_pitchf = torch.rand(1, 200) # nsf基频
18
- test_ds = torch.LongTensor([0]) # 说话人ID
19
- test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
20
-
21
- device = "cpu" # 导出时设备(不影响使用模型)
22
-
23
- net_g = SynthesizerTrnMsNSFsidM(
24
- *cpt["config"], is_half=False
25
- ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
26
- net_g.load_state_dict(cpt["weight"], strict=False)
27
- input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
28
- output_names = [
29
- "audio",
30
- ]
31
- # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
32
- torch.onnx.export(
33
- net_g,
34
- (
35
- test_phone.to(device),
36
- test_phone_lengths.to(device),
37
- test_pitch.to(device),
38
- test_pitchf.to(device),
39
- test_ds.to(device),
40
- test_rnd.to(device),
41
- ),
42
- ExportedPath,
43
- dynamic_axes={
44
- "phone": [1],
45
- "pitch": [1],
46
- "pitchf": [1],
47
- "rnd": [2],
48
- },
49
- do_constant_folding=False,
50
- opset_version=16,
51
- verbose=False,
52
- input_names=input_names,
53
- output_names=output_names,
54
- )
 
1
+ from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
2
+ import torch
3
+
4
+ if __name__ == "__main__":
5
+ MoeVS = True # Whether the model is used by MoeVoiceStudio (formerly MoeSS)
6
+
7
+ ModelPath = "Shiroha/shiroha.pth" # model path
8
+ ExportedPath = "model.onnx" # Output path
9
+ hidden_channels = 256 # hidden_channels, prepare for 768Vec
10
+ cpt = torch.load(ModelPath, map_location="cpu")
11
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
12
+ print(*cpt["config"])
13
+
14
+ test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
15
+ test_phone_lengths = torch.tensor([200]).long() # Hidden unit length (seemingly useless)
16
+ test_pitch = torch.randint(size=(1, 200), low=5, high=255) # Fundamental frequency (in Hz)
17
+ test_pitchf = torch.rand(1, 200) # NSF base frequency
18
+ test_ds = torch.LongTensor([0]) # Speaker ID
19
+ test_rnd = torch.rand(1, 192, 200) # Noise (adding random factors)
20
+
21
+ device = "cpu" # Device when exporting (does not affect the use of the model)
22
+
23
+ net_g = SynthesizerTrnMsNSFsidM(
24
+ *cpt["config"], is_half=False
25
+ ) # Device when exporting (does not affect the use of the model)
26
+ net_g.load_state_dict(cpt["weight"], strict=False)
27
+ input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
28
+ output_names = [
29
+ "audio",
30
+ ]
31
+ # net_g.construct_spkmixmap(n_speaker) export of multiple role mixed tracks
32
+ torch.onnx.export(
33
+ net_g,
34
+ (
35
+ test_phone.to(device),
36
+ test_phone_lengths.to(device),
37
+ test_pitch.to(device),
38
+ test_pitchf.to(device),
39
+ test_ds.to(device),
40
+ test_rnd.to(device),
41
+ ),
42
+ ExportedPath,
43
+ dynamic_axes={
44
+ "phone": [1],
45
+ "pitch": [1],
46
+ "pitchf": [1],
47
+ "rnd": [2],
48
+ },
49
+ do_constant_folding=False,
50
+ opset_version=16,
51
+ verbose=False,
52
+ input_names=input_names,
53
+ output_names=output_names,
54
+ )