Mahiruoshi commited on
Commit
5f275a8
·
verified ·
1 Parent(s): db74c3c

Upload 103 files

Browse files
Data/BangDream/config.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 20000,
5
+ "seed": 42,
6
+ "epochs": 1000,
7
+ "learning_rate": 0.0001,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 14,
14
+ "bf16_run": true,
15
+ "lr_decay": 0.99995,
16
+ "segment_size": 16384,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0,
21
+ "c_commit": 100,
22
+ "skip_optimizer": true,
23
+ "freeze_ZH_bert": false,
24
+ "freeze_JP_bert": false,
25
+ "freeze_EN_bert": true,
26
+ "freeze_emo": false
27
+ },
28
+ "data": {
29
+ "training_files": "Data/test/filelists/train.list",
30
+ "validation_files": "Data/test/filelists/val.list",
31
+ "max_wav_value": 32768.0,
32
+ "sampling_rate": 44100,
33
+ "filter_length": 2048,
34
+ "hop_length": 512,
35
+ "win_length": 2048,
36
+ "n_mel_channels": 128,
37
+ "mel_fmin": 0.0,
38
+ "mel_fmax": null,
39
+ "add_blank": true,
40
+ "n_speakers": 96,
41
+ "cleaned_text": true,
42
+ "spk2id": {
43
+ "紗夜": 0,
44
+ "有咲": 1,
45
+ "たえ": 2,
46
+ "りみ": 3,
47
+ "香澄": 4,
48
+ "ロック": 5,
49
+ "パレオ": 6,
50
+ "レイヤ": 7,
51
+ "千聖": 8,
52
+ "イヴ": 9,
53
+ "蘭": 10,
54
+ "巴": 11,
55
+ "ひまり": 12,
56
+ "つぐみ": 13,
57
+ "華戀": 14,
58
+ "晶": 15,
59
+ "光": 16,
60
+ "未知留": 17,
61
+ "香子": 18,
62
+ "雙葉": 19,
63
+ "真晝": 20,
64
+ "艾露": 21,
65
+ "珠緒": 22,
66
+ "艾露露": 23,
67
+ "純那": 24,
68
+ "克洛迪娜": 25,
69
+ "真矢": 26,
70
+ "奈奈": 27,
71
+ "壘": 28,
72
+ "文": 29,
73
+ "一愛": 30,
74
+ "菈樂菲": 31,
75
+ "司": 32,
76
+ "美空": 33,
77
+ "靜羽": 34,
78
+ "悠悠子": 35,
79
+ "八千代": 36,
80
+ "栞": 37,
81
+ "美帆": 38,
82
+ "安德露": 39,
83
+ "瑪莉亞貝菈": 40,
84
+ "克拉迪亞": 41,
85
+ "桃樂西": 42,
86
+ "瑪麗安": 43,
87
+ "花音": 44,
88
+ "はぐみ": 45,
89
+ "こころ": 46,
90
+ "美咲": 47,
91
+ "沙綾": 48,
92
+ "つくし": 49,
93
+ "瑠唯": 50,
94
+ "透子": 51,
95
+ "七深": 52,
96
+ "ましろ": 53,
97
+ "友希那": 54,
98
+ "リサ": 55,
99
+ "あこ": 56,
100
+ "チュチュ": 57,
101
+ "薫": 58,
102
+ "麻弥": 59,
103
+ "彩": 60,
104
+ "日菜": 61,
105
+ "愛音": 62,
106
+ "楽奈": 63,
107
+ "そよ": 64,
108
+ "立希": 65,
109
+ "燐子": 66,
110
+ "モカ": 67,
111
+ "燈": 68,
112
+ "ますき": 69,
113
+ "祥子": 70,
114
+ "睦": 71,
115
+ "海鈴": 72,
116
+ "にゃむ": 73,
117
+ "初華": 74,
118
+ "三月七1": 75,
119
+ "八重神子1": 76,
120
+ "娜塔莎": 77,
121
+ "宵宫": 78,
122
+ "派蒙11": 79,
123
+ "派蒙13": 80,
124
+ "派蒙3": 81,
125
+ "派蒙7": 82,
126
+ "派蒙8": 83,
127
+ "派蒙9": 84,
128
+ "派蒙10": 85,
129
+ "派蒙6": 86,
130
+ "派蒙4": 87,
131
+ "派蒙1": 88,
132
+ "派蒙2": 89,
133
+ "派蒙15": 90,
134
+ "派蒙16": 91,
135
+ "派蒙14": 92,
136
+ "派蒙12": 93,
137
+ "派蒙5": 94,
138
+ "纳西妲1": 95
139
+ }
140
+ },
141
+ "model": {
142
+ "use_spk_conditioned_encoder": true,
143
+ "use_noise_scaled_mas": true,
144
+ "use_mel_posterior_encoder": false,
145
+ "use_duration_discriminator": true,
146
+ "inter_channels": 192,
147
+ "hidden_channels": 192,
148
+ "filter_channels": 768,
149
+ "n_heads": 2,
150
+ "n_layers": 6,
151
+ "kernel_size": 3,
152
+ "p_dropout": 0.1,
153
+ "resblock": "1",
154
+ "resblock_kernel_sizes": [
155
+ 3,
156
+ 7,
157
+ 11
158
+ ],
159
+ "resblock_dilation_sizes": [
160
+ [
161
+ 1,
162
+ 3,
163
+ 5
164
+ ],
165
+ [
166
+ 1,
167
+ 3,
168
+ 5
169
+ ],
170
+ [
171
+ 1,
172
+ 3,
173
+ 5
174
+ ]
175
+ ],
176
+ "upsample_rates": [
177
+ 8,
178
+ 8,
179
+ 2,
180
+ 2,
181
+ 2
182
+ ],
183
+ "upsample_initial_channel": 512,
184
+ "upsample_kernel_sizes": [
185
+ 16,
186
+ 16,
187
+ 8,
188
+ 2,
189
+ 2
190
+ ],
191
+ "n_layers_q": 3,
192
+ "use_spectral_norm": false,
193
+ "gin_channels": 512,
194
+ "slm": {
195
+ "model": "./slm/wavlm-base-plus",
196
+ "sr": 16000,
197
+ "hidden": 768,
198
+ "nlayers": 13,
199
+ "initial_channel": 64
200
+ }
201
+ },
202
+ "version": "2.3"
203
+ }
Data/BangDream/models/G_60000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcf80ed1889191cb6fe1af0b61643ad9ec1c8648a8c39c7575b351d98b09554
3
+ size 727387388
configs/config.json CHANGED
@@ -10,18 +10,20 @@
10
  0.99
11
  ],
12
  "eps": 1e-09,
13
- "batch_size": 12,
14
- "fp16_run": false,
15
  "lr_decay": 0.99995,
16
  "segment_size": 16384,
17
  "init_lr_ratio": 1,
18
  "warmup_epochs": 0,
19
  "c_mel": 45,
20
  "c_kl": 1.0,
 
21
  "skip_optimizer": true,
22
  "freeze_ZH_bert": false,
23
  "freeze_JP_bert": false,
24
- "freeze_EN_bert": false
 
25
  },
26
  "data": {
27
  "training_files": "filelists/train.list",
@@ -35,7 +37,7 @@
35
  "mel_fmin": 0.0,
36
  "mel_fmax": null,
37
  "add_blank": true,
38
- "n_speakers": 896,
39
  "cleaned_text": true,
40
  "spk2id": {
41
  "派蒙_ZH": 0,
@@ -119,203 +121,203 @@
119
  "伊迪娅_ZH": 78,
120
  "留云借风真君_ZH": 79,
121
  "绮良良_ZH": 80,
122
- "七七_ZH": 81,
123
- "式大将_ZH": 82,
124
- "瑶瑶_ZH": 83,
125
- "奥兹_ZH": 84,
126
- "菲米尼_ZH": 85,
127
- "米卡_ZH": 86,
128
- "哲平_ZH": 87,
129
- "大肉丸_ZH": 88,
130
- "托克_ZH": 89,
131
- "蒂玛乌斯_ZH": 90,
132
- "昆钧_ZH": 91,
133
- "欧菲妮_ZH": 92,
134
- "塞琉斯_ZH": 93,
135
- "仆人_ZH": 94,
136
- "迈勒斯_ZH": 95,
137
- "希格雯_ZH": 96,
138
- "阿守_ZH": 97,
139
- "拉赫曼_ZH": 98,
140
- "杜拉夫_ZH": 99,
141
- "伊利亚斯_ZH": 100,
142
- "阿晃_ZH": 101,
143
- "旁白_ZH": 102,
144
- "爱德琳_ZH": 103,
145
- "埃洛伊_ZH": 104,
146
- "德沃沙克_ZH": 105,
147
- "玛乔丽_ZH": 106,
148
- "塞塔蕾_ZH": 107,
149
- "柊千里_ZH": 108,
150
- "海芭夏_ZH": 109,
151
- "九条镰治_ZH": 110,
152
- "阿娜耶_ZH": 111,
153
- "笼钓瓶一心_ZH": 112,
154
- "回声海螺_ZH": 113,
155
- "劳维克_ZH": 114,
156
- "元太_ZH": 115,
157
- "阿扎尔_ZH": 116,
158
- "查尔斯_ZH": 117,
159
- "阿洛瓦_ZH": 118,
160
- "埃勒曼_ZH": 119,
161
- "纳比尔_ZH": 120,
162
- "莎拉_ZH": 121,
163
- "康纳_ZH": 122,
164
- "博来_ZH": 123,
165
- "玛塞勒_ZH": 124,
166
- "阿祇_ZH": 125,
167
- "博士_ZH": 126,
168
- "玛格丽特_ZH": 127,
169
- "迪尔菲_ZH": 128,
170
- "宛烟_ZH": 129,
171
- "羽生田千鹤_ZH": 130,
172
- "海妮耶_ZH": 131,
173
- "旅行者_ZH": 132,
174
- "霍夫曼_ZH": 133,
175
- "佐西摩斯_ZH": 134,
176
- "鹿野奈奈_ZH": 135,
177
- "舒伯特_ZH": 136,
178
- "天叔_ZH": 137,
179
- "艾莉丝_ZH": 138,
180
- "龙二_ZH": 139,
181
- "莺儿_ZH": 140,
182
- "嘉良_ZH": 141,
183
- "一心传名刀_ZH": 142,
184
- "费迪南德_ZH": 143,
185
- "珊瑚_ZH": 144,
186
- "言笑_ZH": 145,
187
- "久利须_ZH": 146,
188
- "嘉玛_ZH": 147,
189
- "艾文_ZH": 148,
190
- "克洛琳德_ZH": 149,
191
- "丹吉尔_ZH": 150,
192
- "女士_ZH": 151,
193
- "白老先生_ZH": 152,
194
- "天目十五_ZH": 153,
195
- "老孟_ZH": 154,
196
- "巴达维_ZH": 155,
197
- "长生_ZH": 156,
198
- "吴船长_ZH": 157,
199
- "拉齐_ZH": 158,
200
- "艾伯特_ZH": 159,
201
- "松浦_ZH": 160,
202
- "埃泽_ZH": 161,
203
- "阿圆_ZH": 162,
204
- "莫塞伊思_ZH": 163,
205
- "阿拉夫_ZH": 164,
206
- "杜吉耶_ZH": 165,
207
- "石头_ZH": 166,
208
- "百闻_ZH": 167,
209
- "波洛_ZH": 168,
210
- "斯坦利_ZH": 169,
211
- "博易_ZH": 170,
212
- "迈蒙_ZH": 171,
213
- "掇星攫辰天君_ZH": 172,
214
- "毗伽尔_ZH": 173,
215
- "芙卡洛斯_ZH": 174,
216
- "恶龙_ZH": 175,
217
- "恕筠_ZH": 176,
218
- "知易_ZH": 177,
219
- "克列门特_ZH": 178,
220
- "大慈树王_ZH": 179,
221
- "西拉杰_ZH": 180,
222
- "上杉_ZH": 181,
223
- "阿尔卡米_ZH": 182,
224
- "纯水精灵_ZH": 183,
225
- "常九爷_ZH": 184,
226
- "沙扎曼_ZH": 185,
227
- "田铁嘴_ZH": 186,
228
- "克罗索_ZH": 187,
229
- "阿巴图伊_ZH": 188,
230
- "_ZH": 189,
231
  "阿佩普_ZH": 190,
232
  "埃尔欣根_ZH": 191,
233
  "萨赫哈蒂_ZH": 192,
234
  "塔杰·拉德卡尼_ZH": 193,
235
  "安西_ZH": 194,
236
- "埃舍尔_ZH": 195,
237
- "萨齐因_ZH": 196,
238
- "派蒙_JP": 197,
239
- "纳西妲_JP": 198,
240
- "凯亚_JP": 199,
241
- "阿贝多_JP": 200,
242
- "温迪_JP": 201,
243
- "枫原万叶_JP": 202,
244
- "钟离_JP": 203,
245
- "荒泷一斗_JP": 204,
246
- "八重神子_JP": 205,
247
- "艾尔海森_JP": 206,
248
- "提纳里_JP": 207,
249
- "迪希雅_JP": 208,
250
- "卡维_JP": 209,
251
- "宵宫_JP": 210,
252
- "那维莱特_JP": 211,
253
- "莱依拉_JP": 212,
254
- "赛诺_JP": 213,
255
- "莫娜_JP": 214,
256
- "诺艾尔_JP": 215,
257
- "托马_JP": 216,
258
- "凝光_JP": 217,
259
- "林尼_JP": 218,
260
- "北斗_JP": 219,
261
- "柯莱_JP": 220,
262
- "神里绫华_JP": 221,
263
- "可莉_JP": 222,
264
- "芭芭拉_JP": 223,
265
- "雷电将军_JP": 224,
266
- "娜维娅_JP": 225,
267
- "芙宁娜_JP": 226,
268
- "珊瑚宫心海_JP": 227,
269
- "鹿野院平藏_JP": 228,
270
- "迪奥娜_JP": 229,
271
- "_JP": 230,
272
- "五郎_JP": 231,
273
- "班尼特_JP": 232,
274
- "达达利亚_JP": 233,
275
- "安柏_JP": 234,
276
- "莱欧斯利_JP": 235,
277
- "夜兰_JP": 236,
278
- "妮露_JP": 237,
279
- "辛焱_JP": 238,
280
- "丽莎_JP": 239,
281
- "珐露珊_JP": 240,
282
- "_JP": 241,
283
- "香菱_JP": 242,
284
- "迪卢克_JP": 243,
285
- "砂糖_JP": 244,
286
- "烟绯_JP": 245,
287
- "早柚_JP": 246,
288
- "云堇_JP": 247,
289
- "刻晴_JP": 248,
290
- "重云_JP": 249,
291
- "优菈_JP": 250,
292
- "胡桃_JP": 251,
293
- "流浪者_JP": 252,
294
- "久岐忍_JP": 253,
295
- "神里绫人_JP": 254,
296
- "甘雨_JP": 255,
297
- "戴因斯雷布_JP": 256,
298
- "菲谢尔_JP": 257,
299
- "白术_JP": 258,
300
- "行秋_JP": 259,
301
- "九条裟罗_JP": 260,
302
- "夏洛蒂_JP": 261,
303
- "雷泽_JP": 262,
304
- "申鹤_JP": 263,
305
- "_JP": 264,
306
- "_JP": 265,
307
- "迪娜泽黛_JP": 266,
308
- "凯瑟琳_JP": 267,
309
- "多莉_JP": 268,
310
- "坎蒂丝_JP": 269,
311
- "琳妮特_JP": 270,
312
- "萍姥姥_JP": 271,
313
- "罗莎莉亚_JP": 272,
314
- "埃德_JP": 273,
315
- "爱贝尔_JP": 274,
316
- "伊迪娅_JP": 275,
317
- "留云借风真君_JP": 276,
318
- "绮良良_JP": 277,
319
  "七七_JP": 278,
320
  "式大将_JP": 279,
321
  "瑶瑶_JP": 280,
@@ -323,576 +325,571 @@
323
  "菲米尼_JP": 282,
324
  "米卡_JP": 283,
325
  "哲平_JP": 284,
326
- "大肉丸_JP": 285,
327
- "托克_JP": 286,
328
- "蒂玛乌斯_JP": 287,
329
- "昆钧_JP": 288,
330
- "欧菲妮_JP": 289,
331
- "塞琉斯_JP": 290,
332
- "仆人_JP": 291,
333
- "迈勒斯_JP": 292,
334
- "希格雯_JP": 293,
335
- "阿守_JP": 294,
336
- "拉赫曼_JP": 295,
337
- "杜拉夫_JP": 296,
338
- "伊利亚斯_JP": 297,
339
- "阿晃_JP": 298,
340
- "旁白_JP": 299,
341
- "爱德琳_JP": 300,
342
- "埃洛伊_JP": 301,
343
- "德沃沙克_JP": 302,
344
- "玛乔丽_JP": 303,
345
- "塞塔蕾_JP": 304,
346
- "柊千里_JP": 305,
347
- "海芭夏_JP": 306,
348
- "九条镰治_JP": 307,
349
- "阿娜耶_JP": 308,
350
- "笼钓瓶一心_JP": 309,
351
- "回声海螺_JP": 310,
352
- "劳维克_JP": 311,
353
- "元太_JP": 312,
354
- "阿扎尔_JP": 313,
355
- "查尔斯_JP": 314,
356
- "阿洛瓦_JP": 315,
357
- "埃勒曼_JP": 316,
358
- "纳比尔_JP": 317,
359
- "莎拉_JP": 318,
360
- "康纳_JP": 319,
361
- "博来_JP": 320,
362
- "玛塞勒_JP": 321,
363
- "阿祇_JP": 322,
364
- "博士_JP": 323,
365
- "迪尔菲_JP": 324,
366
- "玛格丽特_JP": 325,
367
- "宛烟_JP": 326,
368
- "羽生田千鹤_JP": 327,
369
- "海妮耶_JP": 328,
370
- "霍夫曼_JP": 329,
371
- "旅行者_JP": 330,
372
- "佐西摩斯_JP": 331,
373
- "舒伯特_JP": 332,
374
- "鹿野奈奈_JP": 333,
375
- "天叔_JP": 334,
376
- "龙二_JP": 335,
377
- "艾莉丝_JP": 336,
378
- "莺儿_JP": 337,
379
- "嘉良_JP": 338,
380
- "珊瑚_JP": 339,
381
- "言笑_JP": 340,
382
- "一心传名刀_JP": 341,
383
- "费迪南德_JP": 342,
384
- "久利须_JP": 343,
385
- "嘉玛_JP": 344,
386
- "艾文_JP": 345,
387
- "克洛琳德_JP": 346,
388
- "丹吉尔_JP": 347,
389
- "天目十五_JP": 348,
390
- "女士_JP": 349,
391
- "老孟_JP": 350,
392
- "白老先生_JP": 351,
393
- "舍利夫_JP": 352,
394
- "巴达维_JP": 353,
395
- "拉齐_JP": 354,
396
- "长生_JP": 355,
397
- "吴船长_JP": 356,
398
- "艾伯特_JP": 357,
399
- "松浦_JP": 358,
400
- "埃泽_JP": 359,
401
- "阿圆_JP": 360,
402
- "阿拉夫_JP": 361,
403
- "莫塞伊思_JP": 362,
404
- "石头_JP": 363,
405
- "百闻_JP": 364,
406
- "杜吉耶_JP": 365,
407
- "波洛_JP": 366,
408
- "掇星攫辰天君_JP": 367,
409
- "迈蒙_JP": 368,
410
- "博易_JP": 369,
411
- "诗筠_JP": 370,
412
- "斯坦利_JP": 371,
413
- "毗伽尔_JP": 372,
414
- "芙卡洛斯_JP": 373,
415
- "恶龙_JP": 374,
416
- "小仓澪_JP": 375,
417
- "恕筠_JP": 376,
418
- "知易_JP": 377,
419
- "克列门特_JP": 378,
420
- "大慈树王_JP": 379,
421
- "望雅_JP": 380,
422
- "黑田_JP": 381,
423
- "卡莉娜_JP": 382,
424
- "马姆杜_JP": 383,
425
- "科林斯_JP": 384,
426
- "上杉_JP": 385,
427
- "西拉杰_JP": 386,
428
- "菲尔戈黛特_JP": 387,
429
- "一平_JP": 388,
430
- "纯水精灵_JP": 389,
431
- "阿尔卡米_JP": 390,
432
- "老戴_JP": 391,
433
- "谢赫祖拜尔_JP": 392,
434
- "沙扎曼_JP": 393,
435
- "田铁嘴_JP": 394,
436
- "小野寺_JP": 395,
437
- "百识_JP": 396,
438
- "克罗索_JP": 397,
439
- "莱斯格_JP": 398,
440
- "芷巧_JP": 399,
441
- "加藤洋平_JP": 400,
442
- "阿巴图伊_JP": 401,
443
- "埃尔欣根_JP": 402,
444
- "斯嘉莉_JP": 403,
445
- "阿佩普_JP": 404,
446
- "巫女_JP": 405,
447
- "卡布斯_JP": 406,
448
- "洛伦佐_JP": 407,
449
- "萨赫哈蒂_JP": 408,
450
- "娜德瓦_JP": 409,
451
- "塞德娜_JP": 410,
452
- "塔杰·拉德卡尼_JP": 411,
453
- "绘星_JP": 412,
454
- "泽田_JP": 413,
455
- "安西_JP": 414,
456
- "拉���德_JP": 415,
457
- "亚卡巴_JP": 416,
458
- "有乐斋_JP": 417,
459
- "莱昂_JP": 418,
460
- "尤苏波夫_JP": 419,
461
- "夏妮_JP": 420,
462
- "埃舍尔_JP": 421,
463
- "萨齐因_JP": 422,
464
- "古山_JP": 423,
465
- "自称渊上之物_JP": 424,
466
- "丹羽_JP": 425,
467
- "塞萨尔的日记_JP": 426,
468
- "派蒙_EN": 427,
469
- "纳西妲_EN": 428,
470
- "凯亚_EN": 429,
471
- "阿贝多_EN": 430,
472
- "温迪_EN": 431,
473
- "枫原万叶_EN": 432,
474
- "钟离_EN": 433,
475
- "荒泷一斗_EN": 434,
476
- "八重神子_EN": 435,
477
- "艾尔海森_EN": 436,
478
- "提纳里_EN": 437,
479
- "迪希雅_EN": 438,
480
- "卡维_EN": 439,
481
- "宵宫_EN": 440,
482
- "莱依拉_EN": 441,
483
- "那维莱特_EN": 442,
484
- "赛诺_EN": 443,
485
- "莫娜_EN": 444,
486
- "诺艾尔_EN": 445,
487
- "托马_EN": 446,
488
- "凝光_EN": 447,
489
- "林尼_EN": 448,
490
- "北斗_EN": 449,
491
- "柯莱_EN": 450,
492
- "神里绫华_EN": 451,
493
- "可莉_EN": 452,
494
- "芭芭拉_EN": 453,
495
- "雷电将军_EN": 454,
496
- "娜维娅_EN": 455,
497
- "芙宁娜_EN": 456,
498
- "珊瑚宫心海_EN": 457,
499
- "鹿野院平藏_EN": 458,
500
- "迪奥娜_EN": 459,
501
- "五郎_EN": 460,
502
- "_EN": 461,
503
- "班尼特_EN": 462,
504
- "达达利亚_EN": 463,
505
- "安柏_EN": 464,
506
- "莱欧斯利_EN": 465,
507
- "夜兰_EN": 466,
508
- "妮露_EN": 467,
509
- "辛焱_EN": 468,
510
- "珐露珊_EN": 469,
511
- "丽莎_EN": 470,
512
- "_EN": 471,
513
- "香菱_EN": 472,
514
- "迪卢克_EN": 473,
515
- "砂糖_EN": 474,
516
- "烟绯_EN": 475,
517
- "早柚_EN": 476,
518
- "云堇_EN": 477,
519
- "刻晴_EN": 478,
520
- "重云_EN": 479,
521
- "优菈_EN": 480,
522
- "胡桃_EN": 481,
523
- "流浪者_EN": 482,
524
- "久岐忍_EN": 483,
525
- "神里绫人_EN": 484,
526
- "甘雨_EN": 485,
527
- "戴因斯雷布_EN": 486,
528
- "菲谢尔_EN": 487,
529
- "白术_EN": 488,
530
- "行秋_EN": 489,
531
- "九条裟罗_EN": 490,
532
- "夏洛蒂_EN": 491,
533
- "雷泽_EN": 492,
534
- "申鹤_EN": 493,
535
- "_EN": 494,
536
- "_EN": 495,
537
- "迪娜泽黛_EN": 496,
538
- "凯瑟琳_EN": 497,
539
- "多莉_EN": 498,
540
- "坎蒂丝_EN": 499,
541
- "琳妮特_EN": 500,
542
- "萍姥姥_EN": 501,
543
- "罗莎莉亚_EN": 502,
544
- "埃德_EN": 503,
545
- "爱贝尔_EN": 504,
546
- "伊迪娅_EN": 505,
547
- "留云借风真君_EN": 506,
548
- "绮良良_EN": 507,
549
- "七七_EN": 508,
550
- "式大将_EN": 509,
551
- "瑶瑶_EN": 510,
552
- "奥兹_EN": 511,
553
- "菲米尼_EN": 512,
554
- "米卡_EN": 513,
555
- "哲平_EN": 514,
556
- "大肉丸_EN": 515,
557
- "托克_EN": 516,
558
- "蒂玛乌斯_EN": 517,
559
- "昆钧_EN": 518,
560
- "欧菲妮_EN": 519,
561
- "塞琉斯_EN": 520,
562
- "仆人_EN": 521,
563
- "迈勒斯_EN": 522,
564
- "希格雯_EN": 523,
565
- "阿守_EN": 524,
566
- "拉赫曼_EN": 525,
567
- "杜拉夫_EN": 526,
568
- "伊利亚斯_EN": 527,
569
- "阿晃_EN": 528,
570
- "旁白_EN": 529,
571
- "爱德琳_EN": 530,
572
- "埃洛伊_EN": 531,
573
- "德沃沙克_EN": 532,
574
- "玛乔丽_EN": 533,
575
- "塞塔蕾_EN": 534,
576
- "柊千里_EN": 535,
577
- "海芭夏_EN": 536,
578
- "九条镰治_EN": 537,
579
- "阿娜耶_EN": 538,
580
- "笼钓瓶一心_EN": 539,
581
- "回声海螺_EN": 540,
582
- "劳维克_EN": 541,
583
- "元太_EN": 542,
584
- "阿扎尔_EN": 543,
585
- "查尔斯_EN": 544,
586
- "阿洛瓦_EN": 545,
587
- "埃勒曼_EN": 546,
588
- "纳比尔_EN": 547,
589
- "莎拉_EN": 548,
590
- "康纳_EN": 549,
591
- "博来_EN": 550,
592
- "玛塞勒_EN": 551,
593
- "阿祇_EN": 552,
594
- "博士_EN": 553,
595
- "迪尔菲_EN": 554,
596
- "宛烟_EN": 555,
597
- "玛格丽特_EN": 556,
598
- "羽生田千鹤_EN": 557,
599
- "海妮耶_EN": 558,
600
- "霍夫曼_EN": 559,
601
- "旅行者_EN": 560,
602
- "佐西摩斯_EN": 561,
603
- "鹿野奈奈_EN": 562,
604
- "舒伯特_EN": 563,
605
- "天叔_EN": 564,
606
- "艾莉丝_EN": 565,
607
- "龙二_EN": 566,
608
- "莺儿_EN": 567,
609
- "嘉良_EN": 568,
610
- "珊瑚_EN": 569,
611
- "费迪南德_EN": 570,
612
- "言笑_EN": 571,
613
- "一心传名刀_EN": 572,
614
- "久利须_EN": 573,
615
- "嘉玛_EN": 574,
616
- "艾文_EN": 575,
617
- "克洛琳德_EN": 576,
618
- "丹吉尔_EN": 577,
619
- "女士_EN": 578,
620
- "天目十五_EN": 579,
621
- "老孟_EN": 580,
622
- "白老先生_EN": 581,
623
- "舍利夫_EN": 582,
624
- "巴达维_EN": 583,
625
- "拉齐_EN": 584,
626
- "长生_EN": 585,
627
- "吴船长_EN": 586,
628
- "艾伯特_EN": 587,
629
- "松浦_EN": 588,
630
- "埃泽_EN": 589,
631
- "阿圆_EN": 590,
632
- "阿拉夫_EN": 591,
633
- "莫塞伊思_EN": 592,
634
- "石头_EN": 593,
635
- "百闻_EN": 594,
636
- "杜吉耶_EN": 595,
637
- "波洛_EN": 596,
638
- "斯坦利_EN": 597,
639
- "掇星攫辰天君_EN": 598,
640
- "迈蒙_EN": 599,
641
- "博易_EN": 600,
642
- "诗筠_EN": 601,
643
- "毗伽尔_EN": 602,
644
- "慧心_EN": 603,
645
- "芙卡洛斯_EN": 604,
646
- "恶龙_EN": 605,
647
- "小仓澪_EN": 606,
648
- "恕筠_EN": 607,
649
- "知易_EN": 608,
650
- "克列门特_EN": 609,
651
- "大慈树王_EN": 610,
652
- "维多利亚_EN": 611,
653
- "黑田_EN": 612,
654
- "马姆杜_EN": 613,
655
- "科林斯_EN": 614,
656
- "上杉_EN": 615,
657
- "西拉杰_EN": 616,
658
- "宁禄_EN": 617,
659
- "纯水精灵_EN": 618,
660
- "常九爷_EN": 619,
661
- "阿尔卡米_EN": 620,
662
- "沙扎曼_EN": 621,
663
- "田铁嘴_EN": 622,
664
- "加萨尼_EN": 623,
665
- "克罗索_EN": 624,
666
- "星稀_EN": 625,
667
- "莱斯格_EN": 626,
668
- "阿巴图伊_EN": 627,
669
- "_EN": 628,
670
- "德田_EN": 629,
671
- "埃尔欣根_EN": 630,
672
- "阿佩普_EN": 631,
673
- "萨赫哈蒂_EN": 632,
674
- "洛伦佐_EN": 633,
675
- "塔杰·拉德卡尼_EN": 634,
676
- "泽田_EN": 635,
677
- "安西_EN": 636,
678
- "理水叠山真君_EN": 637,
679
  "埃舍尔_EN": 638,
680
- "萨齐因_EN": 639,
681
- "古田_EN": 640,
682
- "三月七_ZH": 641,
683
- "丹恒_ZH": 642,
684
- "希儿_ZH": 643,
685
- "娜塔莎_ZH": 644,
686
- "希露瓦_ZH": 645,
687
- "瓦尔特_ZH": 646,
688
- "佩拉_ZH": 647,
689
- "布洛妮娅_ZH": 648,
690
- "虎克_ZH": 649,
691
- "素裳_ZH": 650,
692
- "克拉拉_ZH": 651,
693
- "符玄_ZH": 652,
694
- "白露_ZH": 653,
695
- "杰帕德_ZH": 654,
696
- "景元_ZH": 655,
697
- "藿藿_ZH": 656,
698
- "姬子_ZH": 657,
699
- "_ZH": 658,
700
- "_ZH": 659,
701
- "卡芙卡_ZH": 660,
702
- "桂乃芬_ZH": 661,
703
- "艾丝妲_ZH": 662,
704
- "玲可_ZH": 663,
705
- "彦卿_ZH": 664,
706
- "托帕_ZH": 665,
707
- "驭空_ZH": 666,
708
- "浮烟_ZH": 667,
709
- "停云_ZH": 668,
710
- "镜流_ZH": 669,
711
- "罗刹_ZH": 670,
712
- "卢卡_ZH": 671,
713
- "史瓦罗_ZH": 672,
714
- "黑塔_ZH": 673,
715
- "桑博_ZH": 674,
716
- "伦纳德_ZH": 675,
717
- "明曦_ZH": 676,
718
- "银狼_ZH": 677,
719
- "帕姆_ZH": 678,
720
- "青雀_ZH": 679,
721
- "乔瓦尼_ZH": 680,
722
- "公输师傅_ZH": 681,
723
- "晴霓_ZH": 682,
724
- "螺丝咕姆_ZH": 683,
725
- "阿兰_ZH": 684,
726
- "奥列格_ZH": 685,
727
- "丹枢_ZH": 686,
728
- "尾巴_ZH": 687,
729
- "寒鸦_ZH": 688,
730
- "雪衣_ZH": 689,
731
- "可可利亚_ZH": 690,
732
- "青镞_ZH": 691,
733
- "半夏_ZH": 692,
734
- "银枝_ZH": 693,
735
- "大毫_ZH": 694,
736
- "霄翰_ZH": 695,
737
- "信使_ZH": 696,
738
- "费斯曼_ZH": 697,
739
- "绿芙蓉_ZH": 698,
740
- "dev_成男_ZH": 699,
741
- "金人会长_ZH": 700,
742
- "维利特_ZH": 701,
743
- "维尔德_ZH": 702,
744
- "斯科特_ZH": 703,
745
- "卡波特_ZH": 704,
746
- "刃_ZH": 705,
747
- "岩明_ZH": 706,
748
- "浣溪_ZH": 707,
749
- "三月七_JP": 708,
750
- "丹恒_JP": 709,
751
- "希儿_JP": 710,
752
- "娜塔莎_JP": 711,
753
- "希露瓦_JP": 712,
754
- "瓦尔特_JP": 713,
755
- "佩拉_JP": 714,
756
- "布洛妮娅_JP": 715,
757
- "虎克_JP": 716,
758
- "素裳_JP": 717,
759
- "克拉拉_JP": 718,
760
- "符玄_JP": 719,
761
- "白露_JP": 720,
762
- "杰帕德_JP": 721,
763
- "景元_JP": 722,
764
- "藿藿_JP": 723,
765
- "姬子_JP": 724,
766
- "卡芙卡_JP": 725,
767
- "_JP": 726,
768
- "_JP": 727,
769
- "桂乃芬_JP": 728,
770
- "艾丝妲_JP": 729,
771
- "彦卿_JP": 730,
772
- "玲可_JP": 731,
773
- "托帕_JP": 732,
774
- "驭空_JP": 733,
775
- "浮烟_JP": 734,
776
- "停云_JP": 735,
777
- "镜流_JP": 736,
778
- "罗刹_JP": 737,
779
- "卢卡_JP": 738,
780
- "史瓦罗_JP": 739,
781
- "黑塔_JP": 740,
782
- "桑博_JP": 741,
783
- "伦纳德_JP": 742,
784
- "明曦_JP": 743,
785
- "银狼_JP": 744,
786
- "帕姆_JP": 745,
787
- "青雀_JP": 746,
788
- "乔瓦尼_JP": 747,
789
- "公输师傅_JP": 748,
790
- "晴霓_JP": 749,
791
- "螺丝咕姆_JP": 750,
792
- "阿兰_JP": 751,
793
- "奥列格_JP": 752,
794
- "丹枢_JP": 753,
795
- "尾巴_JP": 754,
796
- "寒鸦_JP": 755,
797
- "雪衣_JP": 756,
798
- "可可利亚_JP": 757,
799
- "青镞_JP": 758,
800
- "半夏_JP": 759,
801
- "银枝_JP": 760,
802
- "大毫_JP": 761,
803
- "霄翰_JP": 762,
804
- "信使_JP": 763,
805
- "费斯曼_JP": 764,
806
- "绿芙蓉_JP": 765,
807
- "dev_成男_JP": 766,
808
- "金人会长_JP": 767,
809
- "维利特_JP": 768,
810
- "维尔德_JP": 769,
811
- "斯科特_JP": 770,
812
- "_JP": 771,
813
- "卡波特_JP": 772,
814
- "岩明_JP": 773,
815
- "浣溪_JP": 774,
816
- "净砚_JP": 775,
817
- "紫月季_JP": 776,
818
- "歌蒂_JP": 777,
819
- "奇怪的云骑_JP": 778,
820
- "幻胧_JP": 779,
821
- "斯薇塔_JP": 780,
822
- "隐书_JP": 781,
823
- "三月七_EN": 782,
824
- "丹恒_EN": 783,
825
- "希儿_EN": 784,
826
- "娜塔莎_EN": 785,
827
- "希露瓦_EN": 786,
828
- "瓦尔特_EN": 787,
829
- "佩拉_EN": 788,
830
- "布洛妮娅_EN": 789,
831
- "虎克_EN": 790,
832
- "素裳_EN": 791,
833
- "克拉拉_EN": 792,
834
- "符玄_EN": 793,
835
- "白露_EN": 794,
836
- "杰帕德_EN": 795,
837
- "景元_EN": 796,
838
- "藿藿_EN": 797,
839
- "姬子_EN": 798,
840
- "卡芙卡_EN": 799,
841
- "_EN": 800,
842
- "_EN": 801,
843
- "桂乃芬_EN": 802,
844
- "艾丝妲_EN": 803,
845
- "彦卿_EN": 804,
846
- "玲可_EN": 805,
847
- "托帕_EN": 806,
848
- "驭空_EN": 807,
849
- "浮烟_EN": 808,
850
- "停云_EN": 809,
851
- "镜流_EN": 810,
852
- "罗刹_EN": 811,
853
- "卢卡_EN": 812,
854
- "史瓦罗_EN": 813,
855
- "黑塔_EN": 814,
856
- "桑博_EN": 815,
857
- "伦纳德_EN": 816,
858
- "明曦_EN": 817,
859
- "银狼_EN": 818,
860
- "帕姆_EN": 819,
861
- "青雀_EN": 820,
862
- "乔瓦尼_EN": 821,
863
- "公输师傅_EN": 822,
864
- "晴霓_EN": 823,
865
- "螺丝咕姆_EN": 824,
866
- "阿兰_EN": 825,
867
- "奥列格_EN": 826,
868
- "丹枢_EN": 827,
869
- "尾巴_EN": 828,
870
- "寒鸦_EN": 829,
871
- "雪衣_EN": 830,
872
- "可可利亚_EN": 831,
873
- "青镞_EN": 832,
874
- "半夏_EN": 833,
875
- "银枝_EN": 834,
876
- "大毫_EN": 835,
877
- "霄翰_EN": 836,
878
- "信使_EN": 837,
879
- "费斯曼_EN": 838,
880
- "绿芙蓉_EN": 839,
881
- "dev_成男_EN": 840,
882
- "金人会长_EN": 841,
883
- "维利特_EN": 842,
884
- "维尔德_EN": 843,
885
- "_EN": 844,
886
- "卡波特_EN": 845,
887
- "岩明_EN": 846,
888
- "浣溪_EN": 847,
889
- "紫月季_EN": 848,
890
- "幻胧_EN": 849,
891
- "女声_EN": 850,
892
- "陆景和": 851,
893
- "莫弈": 852,
894
- "左然": 853,
895
- "夏彦": 854
896
  }
897
  },
898
  "model": {
@@ -947,7 +944,14 @@
947
  ],
948
  "n_layers_q": 3,
949
  "use_spectral_norm": false,
950
- "gin_channels": 256
 
 
 
 
 
 
 
951
  },
952
- "version": "2.2"
953
  }
 
10
  0.99
11
  ],
12
  "eps": 1e-09,
13
+ "batch_size": 16,
14
+ "bf16_run": false,
15
  "lr_decay": 0.99995,
16
  "segment_size": 16384,
17
  "init_lr_ratio": 1,
18
  "warmup_epochs": 0,
19
  "c_mel": 45,
20
  "c_kl": 1.0,
21
+ "c_commit": 100,
22
  "skip_optimizer": true,
23
  "freeze_ZH_bert": false,
24
  "freeze_JP_bert": false,
25
+ "freeze_EN_bert": false,
26
+ "freeze_emo": false
27
  },
28
  "data": {
29
  "training_files": "filelists/train.list",
 
37
  "mel_fmin": 0.0,
38
  "mel_fmax": null,
39
  "add_blank": true,
40
+ "n_speakers": 850,
41
  "cleaned_text": true,
42
  "spk2id": {
43
  "派蒙_ZH": 0,
 
121
  "伊迪娅_ZH": 78,
122
  "留云借风真君_ZH": 79,
123
  "绮良良_ZH": 80,
124
+ "陌生人_ZH": 81,
125
+ "七七_ZH": 82,
126
+ "式大将_ZH": 83,
127
+ "瑶瑶_ZH": 84,
128
+ "奥兹_ZH": 85,
129
+ "菲米尼_ZH": 86,
130
+ "米卡_ZH": 87,
131
+ "哲平_ZH": 88,
132
+ "浮游水蕈兽·元素生命_ZH": 89,
133
+ "大肉丸_ZH": 90,
134
+ "托克_ZH": 91,
135
+ "蒂玛乌斯_ZH": 92,
136
+ "昆钧_ZH": 93,
137
+ "欧菲妮_ZH": 94,
138
+ "塞琉斯_ZH": 95,
139
+ "仆人_ZH": 96,
140
+ "迈勒斯_ZH": 97,
141
+ "希格雯_ZH": 98,
142
+ "阿守_ZH": 99,
143
+ "拉赫曼_ZH": 100,
144
+ "杜拉夫_ZH": 101,
145
+ "伊利亚斯_ZH": 102,
146
+ "阿晃_ZH": 103,
147
+ "旁白_ZH": 104,
148
+ "爱德琳_ZH": 105,
149
+ "埃洛伊_ZH": 106,
150
+ "德沃沙克_ZH": 107,
151
+ "玛乔丽_ZH": 108,
152
+ "塞塔蕾_ZH": 109,
153
+ "柊千里_ZH": 110,
154
+ "海芭夏_ZH": 111,
155
+ "九条镰治_ZH": 112,
156
+ "阿娜耶_ZH": 113,
157
+ "笼钓瓶一心_ZH": 114,
158
+ "回声海螺_ZH": 115,
159
+ "劳维克_ZH": 116,
160
+ "元太_ZH": 117,
161
+ "阿扎尔_ZH": 118,
162
+ "查尔斯_ZH": 119,
163
+ "阿洛瓦_ZH": 120,
164
+ "埃勒曼_ZH": 121,
165
+ "纳比尔_ZH": 122,
166
+ "莎拉_ZH": 123,
167
+ "康纳_ZH": 124,
168
+ "博来_ZH": 125,
169
+ "玛塞勒_ZH": 126,
170
+ "阿祇_ZH": 127,
171
+ "博士_ZH": 128,
172
+ "玛格丽特_ZH": 129,
173
+ "迪尔菲_ZH": 130,
174
+ "宛烟_ZH": 131,
175
+ "羽生田千鹤_ZH": 132,
176
+ "海妮耶_ZH": 133,
177
+ "旅行者_ZH": 134,
178
+ "霍夫曼_ZH": 135,
179
+ "佐西摩斯_ZH": 136,
180
+ "鹿野奈奈_ZH": 137,
181
+ "舒伯特_ZH": 138,
182
+ "天叔_ZH": 139,
183
+ "艾莉丝_ZH": 140,
184
+ "龙二_ZH": 141,
185
+ "莺儿_ZH": 142,
186
+ "嘉良_ZH": 143,
187
+ "一心传名刀_ZH": 144,
188
+ "珊瑚_ZH": 145,
189
+ "言笑_ZH": 146,
190
+ "久利须_ZH": 147,
191
+ "嘉玛_ZH": 148,
192
+ "艾文_ZH": 149,
193
+ "克洛琳德_ZH": 150,
194
+ "丹吉尔_ZH": 151,
195
+ "女士_ZH": 152,
196
+ "白老先生_ZH": 153,
197
+ "天目十五_ZH": 154,
198
+ "老孟_ZH": 155,
199
+ "巴达维_ZH": 156,
200
+ "长生_ZH": 157,
201
+ "吴船长_ZH": 158,
202
+ "拉齐_ZH": 159,
203
+ "艾伯特_ZH": 160,
204
+ "松浦_ZH": 161,
205
+ "埃泽_ZH": 162,
206
+ "阿圆_ZH": 163,
207
+ "莫塞伊思_ZH": 164,
208
+ "阿拉夫_ZH": 165,
209
+ "杜吉耶_ZH": 166,
210
+ "石头_ZH": 167,
211
+ "百闻_ZH": 168,
212
+ "波洛_ZH": 169,
213
+ "斯坦利_ZH": 170,
214
+ "博易_ZH": 171,
215
+ "迈蒙_ZH": 172,
216
+ "掇星攫辰天君_ZH": 173,
217
+ "毗伽尔_ZH": 174,
218
+ "芙卡洛斯_ZH": 175,
219
+ "恶龙_ZH": 176,
220
+ "恕筠_ZH": 177,
221
+ "知易_ZH": 178,
222
+ "克列门特_ZH": 179,
223
+ "大慈树王_ZH": 180,
224
+ "西拉杰_ZH": 181,
225
+ "上杉_ZH": 182,
226
+ "阿尔卡米_ZH": 183,
227
+ "纯水精灵_ZH": 184,
228
+ "常九爷_ZH": 185,
229
+ "沙扎曼_ZH": 186,
230
+ "田铁嘴_ZH": 187,
231
+ "克罗索_ZH": 188,
232
+ "阿巴图伊_ZH": 189,
233
  "阿佩普_ZH": 190,
234
  "埃尔欣根_ZH": 191,
235
  "萨赫哈蒂_ZH": 192,
236
  "塔杰·拉德卡尼_ZH": 193,
237
  "安西_ZH": 194,
238
+ "陆行岩本真蕈·元素生命_ZH": 195,
239
+ "派蒙_JP": 196,
240
+ "纳西妲_JP": 197,
241
+ "凯亚_JP": 198,
242
+ "阿贝多_JP": 199,
243
+ "温迪_JP": 200,
244
+ "枫原万叶_JP": 201,
245
+ "钟离_JP": 202,
246
+ "荒泷一斗_JP": 203,
247
+ "八重神子_JP": 204,
248
+ "艾尔海森_JP": 205,
249
+ "提纳里_JP": 206,
250
+ "迪希雅_JP": 207,
251
+ "卡维_JP": 208,
252
+ "宵宫_JP": 209,
253
+ "那维莱特_JP": 210,
254
+ "莱依拉_JP": 211,
255
+ "赛诺_JP": 212,
256
+ "莫娜_JP": 213,
257
+ "诺艾尔_JP": 214,
258
+ "托马_JP": 215,
259
+ "凝光_JP": 216,
260
+ "林尼_JP": 217,
261
+ "北斗_JP": 218,
262
+ "柯莱_JP": 219,
263
+ "神里绫华_JP": 220,
264
+ "可莉_JP": 221,
265
+ "芭芭拉_JP": 222,
266
+ "雷电将军_JP": 223,
267
+ "娜维娅_JP": 224,
268
+ "芙宁娜_JP": 225,
269
+ "珊瑚宫心海_JP": 226,
270
+ "鹿野院平藏_JP": 227,
271
+ "迪奥娜_JP": 228,
272
+ "_JP": 229,
273
+ "五郎_JP": 230,
274
+ "班尼特_JP": 231,
275
+ "达达利亚_JP": 232,
276
+ "安柏_JP": 233,
277
+ "莱欧斯利_JP": 234,
278
+ "夜兰_JP": 235,
279
+ "妮露_JP": 236,
280
+ "辛焱_JP": 237,
281
+ "丽莎_JP": 238,
282
+ "珐露珊_JP": 239,
283
+ "_JP": 240,
284
+ "香菱_JP": 241,
285
+ "迪卢克_JP": 242,
286
+ "砂糖_JP": 243,
287
+ "烟绯_JP": 244,
288
+ "早柚_JP": 245,
289
+ "云堇_JP": 246,
290
+ "刻晴_JP": 247,
291
+ "重云_JP": 248,
292
+ "优菈_JP": 249,
293
+ "胡桃_JP": 250,
294
+ "流浪者_JP": 251,
295
+ "久岐忍_JP": 252,
296
+ "神里绫人_JP": 253,
297
+ "甘雨_JP": 254,
298
+ "戴因斯雷布_JP": 255,
299
+ "菲谢尔_JP": 256,
300
+ "白术_JP": 257,
301
+ "行秋_JP": 258,
302
+ "九条裟罗_JP": 259,
303
+ "夏洛蒂_JP": 260,
304
+ "雷泽_JP": 261,
305
+ "申鹤_JP": 262,
306
+ "_JP": 263,
307
+ "_JP": 264,
308
+ "迪娜泽黛_JP": 265,
309
+ "凯瑟琳_JP": 266,
310
+ "多莉_JP": 267,
311
+ "坎蒂丝_JP": 268,
312
+ "琳妮特_JP": 269,
313
+ "萍姥姥_JP": 270,
314
+ "罗莎莉亚_JP": 271,
315
+ "埃德_JP": 272,
316
+ "爱贝尔_JP": 273,
317
+ "伊迪娅_JP": 274,
318
+ "留云借风真君_JP": 275,
319
+ "绮良良_JP": 276,
320
+ "陌生人_JP": 277,
321
  "七七_JP": 278,
322
  "式大将_JP": 279,
323
  "瑶瑶_JP": 280,
 
325
  "菲米尼_JP": 282,
326
  "米卡_JP": 283,
327
  "哲平_JP": 284,
328
+ "浮游水蕈兽·元素生命_JP": 285,
329
+ "大肉丸_JP": 286,
330
+ "托克_JP": 287,
331
+ "蒂玛乌斯_JP": 288,
332
+ "昆钧_JP": 289,
333
+ "欧菲妮_JP": 290,
334
+ "塞琉斯_JP": 291,
335
+ "仆人_JP": 292,
336
+ "迈勒斯_JP": 293,
337
+ "希格雯_JP": 294,
338
+ "阿守_JP": 295,
339
+ "拉赫曼_JP": 296,
340
+ "杜拉夫_JP": 297,
341
+ "伊利亚斯_JP": 298,
342
+ "阿晃_JP": 299,
343
+ "旁白_JP": 300,
344
+ "爱德琳_JP": 301,
345
+ "埃洛伊_JP": 302,
346
+ "德沃沙克_JP": 303,
347
+ "玛乔丽_JP": 304,
348
+ "塞塔蕾_JP": 305,
349
+ "柊千里_JP": 306,
350
+ "海芭夏_JP": 307,
351
+ "九条镰治_JP": 308,
352
+ "阿娜耶_JP": 309,
353
+ "笼钓瓶一心_JP": 310,
354
+ "回声海螺_JP": 311,
355
+ "劳维克_JP": 312,
356
+ "元太_JP": 313,
357
+ "阿扎尔_JP": 314,
358
+ "查尔斯_JP": 315,
359
+ "阿洛瓦_JP": 316,
360
+ "埃勒曼_JP": 317,
361
+ "纳比尔_JP": 318,
362
+ "莎拉_JP": 319,
363
+ "康纳_JP": 320,
364
+ "博来_JP": 321,
365
+ "玛塞勒_JP": 322,
366
+ "阿祇_JP": 323,
367
+ "博士_JP": 324,
368
+ "迪尔菲_JP": 325,
369
+ "玛格丽特_JP": 326,
370
+ "宛烟_JP": 327,
371
+ "羽生田千鹤_JP": 328,
372
+ "海妮耶_JP": 329,
373
+ "霍夫曼_JP": 330,
374
+ "旅行者_JP": 331,
375
+ "佐西摩斯_JP": 332,
376
+ "舒伯特_JP": 333,
377
+ "鹿野奈奈_JP": 334,
378
+ "天叔_JP": 335,
379
+ "龙二_JP": 336,
380
+ "艾莉丝_JP": 337,
381
+ "莺儿_JP": 338,
382
+ "嘉良_JP": 339,
383
+ "珊瑚_JP": 340,
384
+ "言笑_JP": 341,
385
+ "一心传名刀_JP": 342,
386
+ "费迪南德_JP": 343,
387
+ "久利须_JP": 344,
388
+ "嘉玛_JP": 345,
389
+ "艾文_JP": 346,
390
+ "克洛琳德_JP": 347,
391
+ "丹吉尔_JP": 348,
392
+ "天目十五_JP": 349,
393
+ "女士_JP": 350,
394
+ "老孟_JP": 351,
395
+ "白老先生_JP": 352,
396
+ "舍利夫_JP": 353,
397
+ "巴达维_JP": 354,
398
+ "拉齐_JP": 355,
399
+ "长生_JP": 356,
400
+ "吴船长_JP": 357,
401
+ "艾伯特_JP": 358,
402
+ "松浦_JP": 359,
403
+ "埃泽_JP": 360,
404
+ "阿圆_JP": 361,
405
+ "阿拉夫_JP": 362,
406
+ "莫塞伊思_JP": 363,
407
+ "石头_JP": 364,
408
+ "百闻_JP": 365,
409
+ "杜吉耶_JP": 366,
410
+ "波洛_JP": 367,
411
+ "掇星攫辰天君_JP": 368,
412
+ "迈蒙_JP": 369,
413
+ "博易_JP": 370,
414
+ "诗筠_JP": 371,
415
+ "斯坦利_JP": 372,
416
+ "毗伽尔_JP": 373,
417
+ "芙卡洛斯_JP": 374,
418
+ "恶龙_JP": 375,
419
+ "小仓澪_JP": 376,
420
+ "恕筠_JP": 377,
421
+ "知易_JP": 378,
422
+ "克列门特_JP": 379,
423
+ "大慈树王_JP": 380,
424
+ "望雅_JP": 381,
425
+ "黑田_JP": 382,
426
+ "卡莉娜_JP": 383,
427
+ "马姆杜_JP": 384,
428
+ "科林斯_JP": 385,
429
+ "上杉_JP": 386,
430
+ "西拉杰_JP": 387,
431
+ "菲尔戈黛特_JP": 388,
432
+ "一平_JP": 389,
433
+ "纯水精灵_JP": 390,
434
+ "阿尔卡米_JP": 391,
435
+ "老戴_JP": 392,
436
+ "谢赫祖拜尔_JP": 393,
437
+ "沙扎曼_JP": 394,
438
+ "田铁嘴_JP": 395,
439
+ "小野寺_JP": 396,
440
+ "百识_JP": 397,
441
+ "克罗索_JP": 398,
442
+ "莱斯格_JP": 399,
443
+ "芷巧_JP": 400,
444
+ "加藤洋平_JP": 401,
445
+ "阿巴图伊_JP": 402,
446
+ "埃尔欣根_JP": 403,
447
+ "斯嘉莉_JP": 404,
448
+ "阿佩普_JP": 405,
449
+ "巫女_JP": 406,
450
+ "卡布斯_JP": 407,
451
+ "洛伦佐_JP": 408,
452
+ "萨赫哈蒂_JP": 409,
453
+ "娜德瓦_JP": 410,
454
+ "塞德娜_JP": 411,
455
+ "塔杰·拉德卡尼_JP": 412,
456
+ "绘星_JP": 413,
457
+ "泽田_JP": 414,
458
+ "安西_JP": 415,
459
+ "拉伊德_JP": 416,
460
+ "亚卡巴_JP": 417,
461
+ "有乐斋_JP": 418,
462
+ "莱昂_JP": 419,
463
+ "尤苏波夫_JP": 420,
464
+ "夏妮_JP": 421,
465
+ "埃舍尔_JP": 422,
466
+ "萨齐因_JP": 423,
467
+ "古山_JP": 424,
468
+ "自称渊上之物_JP": 425,
469
+ "丹羽_JP": 426,
470
+ "塞萨尔的日记_JP": 427,
471
+ "派蒙_EN": 428,
472
+ "纳西妲_EN": 429,
473
+ "凯亚_EN": 430,
474
+ "阿贝多_EN": 431,
475
+ "温迪_EN": 432,
476
+ "枫原万叶_EN": 433,
477
+ "钟离_EN": 434,
478
+ "荒泷一斗_EN": 435,
479
+ "八重神子_EN": 436,
480
+ "艾尔海森_EN": 437,
481
+ "提纳里_EN": 438,
482
+ "迪希雅_EN": 439,
483
+ "卡维_EN": 440,
484
+ "宵宫_EN": 441,
485
+ "莱依拉_EN": 442,
486
+ "那维莱特_EN": 443,
487
+ "赛诺_EN": 444,
488
+ "莫娜_EN": 445,
489
+ "诺艾尔_EN": 446,
490
+ "托马_EN": 447,
491
+ "凝光_EN": 448,
492
+ "林尼_EN": 449,
493
+ "北斗_EN": 450,
494
+ "柯莱_EN": 451,
495
+ "神里绫华_EN": 452,
496
+ "可莉_EN": 453,
497
+ "芭芭拉_EN": 454,
498
+ "雷电将军_EN": 455,
499
+ "娜维娅_EN": 456,
500
+ "芙宁娜_EN": 457,
501
+ "珊瑚宫心海_EN": 458,
502
+ "鹿野院平藏_EN": 459,
503
+ "迪奥娜_EN": 460,
504
+ "五郎_EN": 461,
505
+ "_EN": 462,
506
+ "班尼特_EN": 463,
507
+ "达达利亚_EN": 464,
508
+ "安柏_EN": 465,
509
+ "莱欧斯利_EN": 466,
510
+ "夜兰_EN": 467,
511
+ "妮露_EN": 468,
512
+ "辛焱_EN": 469,
513
+ "珐露珊_EN": 470,
514
+ "丽莎_EN": 471,
515
+ "_EN": 472,
516
+ "香菱_EN": 473,
517
+ "迪卢克_EN": 474,
518
+ "砂糖_EN": 475,
519
+ "烟绯_EN": 476,
520
+ "早柚_EN": 477,
521
+ "云堇_EN": 478,
522
+ "刻晴_EN": 479,
523
+ "重云_EN": 480,
524
+ "优菈_EN": 481,
525
+ "胡桃_EN": 482,
526
+ "流浪者_EN": 483,
527
+ "久岐忍_EN": 484,
528
+ "神里绫人_EN": 485,
529
+ "甘雨_EN": 486,
530
+ "戴因斯雷布_EN": 487,
531
+ "菲谢尔_EN": 488,
532
+ "白术_EN": 489,
533
+ "行秋_EN": 490,
534
+ "九条裟罗_EN": 491,
535
+ "夏洛蒂_EN": 492,
536
+ "雷泽_EN": 493,
537
+ "申鹤_EN": 494,
538
+ "_EN": 495,
539
+ "_EN": 496,
540
+ "迪娜泽黛_EN": 497,
541
+ "凯瑟琳_EN": 498,
542
+ "多莉_EN": 499,
543
+ "坎蒂丝_EN": 500,
544
+ "琳妮特_EN": 501,
545
+ "萍姥姥_EN": 502,
546
+ "罗莎莉亚_EN": 503,
547
+ "埃德_EN": 504,
548
+ "爱贝尔_EN": 505,
549
+ "伊迪娅_EN": 506,
550
+ "留云借风真君_EN": 507,
551
+ "绮良良_EN": 508,
552
+ "陌生人_EN": 509,
553
+ "七七_EN": 510,
554
+ "式大将_EN": 511,
555
+ "瑶瑶_EN": 512,
556
+ "奥兹_EN": 513,
557
+ "菲米尼_EN": 514,
558
+ "米卡_EN": 515,
559
+ "哲平_EN": 516,
560
+ "浮游水蕈兽·元素生命_EN": 517,
561
+ "大肉丸_EN": 518,
562
+ "托克_EN": 519,
563
+ "蒂玛乌斯_EN": 520,
564
+ "昆钧_EN": 521,
565
+ "欧菲妮_EN": 522,
566
+ "塞琉斯_EN": 523,
567
+ "仆人_EN": 524,
568
+ "迈勒斯_EN": 525,
569
+ "希格雯_EN": 526,
570
+ "阿守_EN": 527,
571
+ "拉赫曼_EN": 528,
572
+ "杜拉夫_EN": 529,
573
+ "伊利亚斯_EN": 530,
574
+ "阿晃_EN": 531,
575
+ "旁白_EN": 532,
576
+ "爱德琳_EN": 533,
577
+ "埃洛伊_EN": 534,
578
+ "德沃沙克_EN": 535,
579
+ "玛乔丽_EN": 536,
580
+ "塞塔蕾_EN": 537,
581
+ "柊千里_EN": 538,
582
+ "海芭夏_EN": 539,
583
+ "九条镰治_EN": 540,
584
+ "阿娜耶_EN": 541,
585
+ "笼钓瓶一心_EN": 542,
586
+ "回声海螺_EN": 543,
587
+ "劳维克_EN": 544,
588
+ "元太_EN": 545,
589
+ "阿扎尔_EN": 546,
590
+ "查尔斯_EN": 547,
591
+ "阿洛瓦_EN": 548,
592
+ "埃勒曼_EN": 549,
593
+ "纳比尔_EN": 550,
594
+ "莎拉_EN": 551,
595
+ "康纳_EN": 552,
596
+ "博来_EN": 553,
597
+ "玛塞勒_EN": 554,
598
+ "阿祇_EN": 555,
599
+ "博士_EN": 556,
600
+ "迪尔菲_EN": 557,
601
+ "宛烟_EN": 558,
602
+ "玛格丽特_EN": 559,
603
+ "羽生田千鹤_EN": 560,
604
+ "海妮耶_EN": 561,
605
+ "霍夫曼_EN": 562,
606
+ "旅行者_EN": 563,
607
+ "佐西摩斯_EN": 564,
608
+ "鹿野奈奈_EN": 565,
609
+ "舒伯特_EN": 566,
610
+ "天叔_EN": 567,
611
+ "艾莉丝_EN": 568,
612
+ "龙二_EN": 569,
613
+ "莺儿_EN": 570,
614
+ "嘉良_EN": 571,
615
+ "珊瑚_EN": 572,
616
+ "费迪南德_EN": 573,
617
+ "言笑_EN": 574,
618
+ "一心传名刀_EN": 575,
619
+ "久利须_EN": 576,
620
+ "嘉玛_EN": 577,
621
+ "艾文_EN": 578,
622
+ "克洛琳德_EN": 579,
623
+ "丹吉尔_EN": 580,
624
+ "女士_EN": 581,
625
+ "天目十五_EN": 582,
626
+ "老孟_EN": 583,
627
+ "白老先生_EN": 584,
628
+ "舍利夫_EN": 585,
629
+ "巴达维_EN": 586,
630
+ "拉齐_EN": 587,
631
+ "长生_EN": 588,
632
+ "吴船长_EN": 589,
633
+ "艾伯特_EN": 590,
634
+ "松浦_EN": 591,
635
+ "埃泽_EN": 592,
636
+ "阿圆_EN": 593,
637
+ "阿拉夫_EN": 594,
638
+ "莫塞伊思_EN": 595,
639
+ "石头_EN": 596,
640
+ "百闻_EN": 597,
641
+ "杜吉耶_EN": 598,
642
+ "波洛_EN": 599,
643
+ "斯坦利_EN": 600,
644
+ "掇星攫辰天君_EN": 601,
645
+ "迈蒙_EN": 602,
646
+ "博易_EN": 603,
647
+ "诗筠_EN": 604,
648
+ "毗伽尔_EN": 605,
649
+ "慧心_EN": 606,
650
+ "芙卡洛斯_EN": 607,
651
+ "恶龙_EN": 608,
652
+ "小仓澪_EN": 609,
653
+ "恕筠_EN": 610,
654
+ "知易_EN": 611,
655
+ "克列门特_EN": 612,
656
+ "大慈树王_EN": 613,
657
+ "维多利亚_EN": 614,
658
+ "黑田_EN": 615,
659
+ "马姆杜_EN": 616,
660
+ "科林斯_EN": 617,
661
+ "上杉_EN": 618,
662
+ "西拉杰_EN": 619,
663
+ "宁禄_EN": 620,
664
+ "纯水精灵_EN": 621,
665
+ "常九爷_EN": 622,
666
+ "阿尔卡米_EN": 623,
667
+ "沙扎曼_EN": 624,
668
+ "田铁嘴_EN": 625,
669
+ "加萨尼_EN": 626,
670
+ "克罗索_EN": 627,
671
+ "星稀_EN": 628,
672
+ "莱斯格_EN": 629,
673
+ "阿巴图伊_EN": 630,
674
+ "埃尔欣根_EN": 631,
675
+ "阿佩普_EN": 632,
676
+ "萨赫哈蒂_EN": 633,
677
+ "洛伦佐_EN": 634,
678
+ "塔杰·拉德卡尼_EN": 635,
679
+ "泽田_EN": 636,
680
+ "安西_EN": 637,
681
  "埃舍尔_EN": 638,
682
+ "三月七_ZH": 639,
683
+ "丹恒_ZH": 640,
684
+ "希儿_ZH": 641,
685
+ "娜塔莎_ZH": 642,
686
+ "希露瓦_ZH": 643,
687
+ "瓦尔特_ZH": 644,
688
+ "佩拉_ZH": 645,
689
+ "布洛妮娅_ZH": 646,
690
+ "虎克_ZH": 647,
691
+ "素裳_ZH": 648,
692
+ "克拉拉_ZH": 649,
693
+ "符玄_ZH": 650,
694
+ "白露_ZH": 651,
695
+ "杰帕德_ZH": 652,
696
+ "景元_ZH": 653,
697
+ "藿藿_ZH": 654,
698
+ "姬子_ZH": 655,
699
+ "_ZH": 656,
700
+ "_ZH": 657,
701
+ "卡芙卡_ZH": 658,
702
+ "桂乃芬_ZH": 659,
703
+ "艾丝妲_ZH": 660,
704
+ "玲可_ZH": 661,
705
+ "彦卿_ZH": 662,
706
+ "托帕_ZH": 663,
707
+ "驭空_ZH": 664,
708
+ "浮烟_ZH": 665,
709
+ "停云_ZH": 666,
710
+ "镜流_ZH": 667,
711
+ "罗刹_ZH": 668,
712
+ "卢卡_ZH": 669,
713
+ "史瓦罗_ZH": 670,
714
+ "黑塔_ZH": 671,
715
+ "桑博_ZH": 672,
716
+ "伦纳德_ZH": 673,
717
+ "明曦_ZH": 674,
718
+ "银狼_ZH": 675,
719
+ "帕姆_ZH": 676,
720
+ "青雀_ZH": 677,
721
+ "乔瓦尼_ZH": 678,
722
+ "公输师傅_ZH": 679,
723
+ "晴霓_ZH": 680,
724
+ "螺丝咕姆_ZH": 681,
725
+ "阿兰_ZH": 682,
726
+ "奥列格_ZH": 683,
727
+ "丹枢_ZH": 684,
728
+ "尾巴_ZH": 685,
729
+ "寒鸦_ZH": 686,
730
+ "雪衣_ZH": 687,
731
+ "可可利亚_ZH": 688,
732
+ "青镞_ZH": 689,
733
+ "半夏_ZH": 690,
734
+ "银枝_ZH": 691,
735
+ "大毫_ZH": 692,
736
+ "霄翰_ZH": 693,
737
+ "信使_ZH": 694,
738
+ "费斯曼_ZH": 695,
739
+ "绿芙蓉_ZH": 696,
740
+ "金人会长_ZH": 697,
741
+ "维利特_ZH": 698,
742
+ "维尔德_ZH": 699,
743
+ "斯科特_ZH": 700,
744
+ "卡波特_ZH": 701,
745
+ "_ZH": 702,
746
+ "岩明_ZH": 703,
747
+ "浣溪_ZH": 704,
748
+ "三月七_JP": 705,
749
+ "丹恒_JP": 706,
750
+ "希儿_JP": 707,
751
+ "娜塔莎_JP": 708,
752
+ "希露瓦_JP": 709,
753
+ "瓦尔特_JP": 710,
754
+ "佩拉_JP": 711,
755
+ "布洛妮娅_JP": 712,
756
+ "虎克_JP": 713,
757
+ "素裳_JP": 714,
758
+ "克拉拉_JP": 715,
759
+ "符玄_JP": 716,
760
+ "白露_JP": 717,
761
+ "杰帕德_JP": 718,
762
+ "景元_JP": 719,
763
+ "藿藿_JP": 720,
764
+ "姬子_JP": 721,
765
+ "卡芙卡_JP": 722,
766
+ "_JP": 723,
767
+ "_JP": 724,
768
+ "桂乃芬_JP": 725,
769
+ "艾丝妲_JP": 726,
770
+ "彦卿_JP": 727,
771
+ "玲可_JP": 728,
772
+ "托帕_JP": 729,
773
+ "驭空_JP": 730,
774
+ "浮烟_JP": 731,
775
+ "停云_JP": 732,
776
+ "镜流_JP": 733,
777
+ "罗刹_JP": 734,
778
+ "卢卡_JP": 735,
779
+ "史瓦罗_JP": 736,
780
+ "黑塔_JP": 737,
781
+ "桑博_JP": 738,
782
+ "伦纳德_JP": 739,
783
+ "明曦_JP": 740,
784
+ "银狼_JP": 741,
785
+ "帕姆_JP": 742,
786
+ "青雀_JP": 743,
787
+ "乔瓦尼_JP": 744,
788
+ "公输师傅_JP": 745,
789
+ "晴霓_JP": 746,
790
+ "螺丝咕姆_JP": 747,
791
+ "阿兰_JP": 748,
792
+ "奥列格_JP": 749,
793
+ "丹枢_JP": 750,
794
+ "尾巴_JP": 751,
795
+ "寒鸦_JP": 752,
796
+ "雪衣_JP": 753,
797
+ "可可利亚_JP": 754,
798
+ "青镞_JP": 755,
799
+ "半夏_JP": 756,
800
+ "银枝_JP": 757,
801
+ "大毫_JP": 758,
802
+ "霄翰_JP": 759,
803
+ "信使_JP": 760,
804
+ "费斯曼_JP": 761,
805
+ "绿芙蓉_JP": 762,
806
+ "金人会长_JP": 763,
807
+ "维利特_JP": 764,
808
+ "维尔德_JP": 765,
809
+ "斯科特_JP": 766,
810
+ "_JP": 767,
811
+ "卡波特_JP": 768,
812
+ "岩明_JP": 769,
813
+ "浣溪_JP": 770,
814
+ "净砚_JP": 771,
815
+ "紫月季_JP": 772,
816
+ "歌蒂_JP": 773,
817
+ "奇怪的云骑_JP": 774,
818
+ "幻胧_JP": 775,
819
+ "斯薇塔_JP": 776,
820
+ "隐书_JP": 777,
821
+ "三月七_EN": 778,
822
+ "丹恒_EN": 779,
823
+ "希儿_EN": 780,
824
+ "娜塔莎_EN": 781,
825
+ "希露瓦_EN": 782,
826
+ "瓦尔特_EN": 783,
827
+ "佩拉_EN": 784,
828
+ "布洛妮娅_EN": 785,
829
+ "虎克_EN": 786,
830
+ "素裳_EN": 787,
831
+ "克拉拉_EN": 788,
832
+ "符玄_EN": 789,
833
+ "白露_EN": 790,
834
+ "杰帕德_EN": 791,
835
+ "景元_EN": 792,
836
+ "藿藿_EN": 793,
837
+ "姬子_EN": 794,
838
+ "卡芙卡_EN": 795,
839
+ "_EN": 796,
840
+ "_EN": 797,
841
+ "桂乃芬_EN": 798,
842
+ "艾丝妲_EN": 799,
843
+ "彦卿_EN": 800,
844
+ "玲可_EN": 801,
845
+ "托帕_EN": 802,
846
+ "驭空_EN": 803,
847
+ "浮烟_EN": 804,
848
+ "停云_EN": 805,
849
+ "镜流_EN": 806,
850
+ "罗刹_EN": 807,
851
+ "卢卡_EN": 808,
852
+ "史瓦罗_EN": 809,
853
+ "黑塔_EN": 810,
854
+ "桑博_EN": 811,
855
+ "伦纳德_EN": 812,
856
+ "明曦_EN": 813,
857
+ "银狼_EN": 814,
858
+ "帕姆_EN": 815,
859
+ "青雀_EN": 816,
860
+ "乔瓦尼_EN": 817,
861
+ "公输师傅_EN": 818,
862
+ "晴霓_EN": 819,
863
+ "螺丝咕姆_EN": 820,
864
+ "阿兰_EN": 821,
865
+ "奥列格_EN": 822,
866
+ "丹枢_EN": 823,
867
+ "尾巴_EN": 824,
868
+ "寒鸦_EN": 825,
869
+ "雪衣_EN": 826,
870
+ "可可利亚_EN": 827,
871
+ "青镞_EN": 828,
872
+ "半夏_EN": 829,
873
+ "银枝_EN": 830,
874
+ "大毫_EN": 831,
875
+ "霄翰_EN": 832,
876
+ "信使_EN": 833,
877
+ "费斯曼_EN": 834,
878
+ "绿芙蓉_EN": 835,
879
+ "金人会长_EN": 836,
880
+ "维利特_EN": 837,
881
+ "维尔德_EN": 838,
882
+ "_EN": 839,
883
+ "卡波特_EN": 840,
884
+ "岩明_EN": 841,
885
+ "浣溪_EN": 842,
886
+ "紫月季_EN": 843,
887
+ "幻胧_EN": 844,
888
+ "女声_EN": 845,
889
+ "陆景和": 846,
890
+ "莫弈": 847,
891
+ "左然": 848,
892
+ "夏彦": 849
 
 
 
 
 
893
  }
894
  },
895
  "model": {
 
944
  ],
945
  "n_layers_q": 3,
946
  "use_spectral_norm": false,
947
+ "gin_channels": 512,
948
+ "slm": {
949
+ "model": "./slm/wavlm-base-plus",
950
+ "sr": 16000,
951
+ "hidden": 768,
952
+ "nlayers": 13,
953
+ "initial_channel": 64
954
+ }
955
  },
956
+ "version": "2.3"
957
  }
monotonic_align/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/monotonic_align/__pycache__/__init__.cpython-311.pyc and b/monotonic_align/__pycache__/__init__.cpython-311.pyc differ
 
monotonic_align/__pycache__/core.cpython-311.pyc CHANGED
Binary files a/monotonic_align/__pycache__/core.cpython-311.pyc and b/monotonic_align/__pycache__/core.cpython-311.pyc differ
 
slm/wavlm-base-plus/.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
slm/wavlm-base-plus/README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ datasets:
5
+ tags:
6
+ - speech
7
+ inference: false
8
+ ---
9
+
10
+ # WavLM-Base-Plus
11
+
12
+ [Microsoft's WavLM](https://github.com/microsoft/unilm/tree/master/wavlm)
13
+
14
+ The base model pretrained on 16kHz sampled speech audio. When using the model, make sure that your speech input is also sampled at 16kHz.
15
+
16
+ **Note**: This model does not have a tokenizer as it was pretrained on audio alone. In order to use this model **speech recognition**, a tokenizer should be created and the model should be fine-tuned on labeled text data. Check out [this blog](https://huggingface.co/blog/fine-tune-wav2vec2-english) for more in-detail explanation of how to fine-tune the model.
17
+
18
+ The model was pre-trained on:
19
+
20
+ - 60,000 hours of [Libri-Light](https://arxiv.org/abs/1912.07875)
21
+ - 10,000 hours of [GigaSpeech](https://arxiv.org/abs/2106.06909)
22
+ - 24,000 hours of [VoxPopuli](https://arxiv.org/abs/2101.00390)
23
+
24
+ [Paper: WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900)
25
+
26
+ Authors: Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen, Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu, Michael Zeng, Furu Wei
27
+
28
+ **Abstract**
29
+ *Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks. WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where additional overlapped utterances are created unsupervisely and incorporated during model training. Lastly, we scale up the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.*
30
+
31
+ The original model can be found under https://github.com/microsoft/unilm/tree/master/wavlm.
32
+
33
+ # Usage
34
+
35
+ This is an English pre-trained speech model that has to be fine-tuned on a downstream task like speech recognition or audio classification before it can be
36
+ used in inference. The model was pre-trained in English and should therefore perform well only in English. The model has been shown to work well on the [SUPERB benchmark](https://superbbenchmark.org/).
37
+
38
+ **Note**: The model was pre-trained on phonemes rather than characters. This means that one should make sure that the input text is converted to a sequence
39
+ of phonemes before fine-tuning.
40
+
41
+ ## Speech Recognition
42
+
43
+ To fine-tune the model for speech recognition, see [the official speech recognition example](https://github.com/huggingface/transformers/tree/master/examples/pytorch/speech-recognition).
44
+
45
+ ## Speech Classification
46
+
47
+ To fine-tune the model for speech classification, see [the official audio classification example](https://github.com/huggingface/transformers/tree/master/examples/pytorch/audio-classification).
48
+
49
+ ## Speaker Verification
50
+
51
+ TODO
52
+
53
+ ## Speaker Diarization
54
+
55
+ TODO
56
+
57
+ # Contribution
58
+
59
+ The model was contributed by [cywang](https://huggingface.co/cywang) and [patrickvonplaten](https://huggingface.co/patrickvonplaten).
60
+
61
+ # License
62
+
63
+ The official license can be found [here](https://github.com/microsoft/UniSpeech/blob/main/LICENSE)
64
+
65
+ ![design](https://raw.githubusercontent.com/patrickvonplaten/scientific_images/master/wavlm.png)
slm/wavlm-base-plus/config.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "wavlm-base-plus",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "WavLMModel"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 256,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": false,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "sum",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": false,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_norm": "group",
51
+ "feat_proj_dropout": 0.1,
52
+ "feat_quantizer_dropout": 0.0,
53
+ "final_dropout": 0.0,
54
+ "freeze_feat_extract_train": true,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.1,
57
+ "hidden_size": 768,
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.05,
62
+ "mask_channel_length": 10,
63
+ "mask_channel_min_space": 1,
64
+ "mask_channel_other": 0.0,
65
+ "mask_channel_prob": 0.0,
66
+ "mask_channel_selection": "static",
67
+ "mask_feature_length": 10,
68
+ "mask_feature_min_masks": 0,
69
+ "mask_feature_prob": 0.0,
70
+ "mask_time_length": 10,
71
+ "mask_time_min_masks": 2,
72
+ "mask_time_min_space": 1,
73
+ "mask_time_other": 0.0,
74
+ "mask_time_prob": 0.05,
75
+ "mask_time_selection": "static",
76
+ "model_type": "wavlm",
77
+ "no_mask_channel_overlap": false,
78
+ "no_mask_time_overlap": false,
79
+ "num_adapter_layers": 3,
80
+ "num_attention_heads": 12,
81
+ "num_buckets": 320,
82
+ "num_codevector_groups": 2,
83
+ "num_codevectors_per_group": 320,
84
+ "num_conv_pos_embedding_groups": 16,
85
+ "num_conv_pos_embeddings": 128,
86
+ "num_ctc_classes": 80,
87
+ "num_feat_extract_layers": 7,
88
+ "num_hidden_layers": 12,
89
+ "num_negatives": 100,
90
+ "output_hidden_size": 768,
91
+ "pad_token_id": 0,
92
+ "proj_codevector_dim": 256,
93
+ "replace_prob": 0.5,
94
+ "torch_dtype": "float32",
95
+ "transformers_version": "4.13.0.dev0",
96
+ "use_weighted_layer_sum": false,
97
+ "vocab_size": 32,
98
+ "tokenizer_class": "Wav2Vec2CTCTokenizer"
99
+ }
slm/wavlm-base-plus/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": false,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
slm/wavlm-base-plus/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb273a6ace99408b50cfc81afdbb7ef2de02da2eab0234e18db608ce692fe51
3
+ size 377617425
text/__init__.py CHANGED
@@ -18,13 +18,15 @@ def cleaned_text_to_sequence(cleaned_text, tones, language):
18
  return phones, tones, lang_ids
19
 
20
 
21
- def get_bert(norm_text, word2ph, language, device):
22
  from .chinese_bert import get_bert_feature as zh_bert
23
  from .english_bert_mock import get_bert_feature as en_bert
24
  from .japanese_bert import get_bert_feature as jp_bert
25
 
26
  lang_bert_func_map = {"ZH": zh_bert, "EN": en_bert, "JP": jp_bert}
27
- bert = lang_bert_func_map[language](norm_text, word2ph, device)
 
 
28
  return bert
29
 
30
 
 
18
  return phones, tones, lang_ids
19
 
20
 
21
+ def get_bert(norm_text, word2ph, language, device, style_text=None, style_weight=0.7):
22
  from .chinese_bert import get_bert_feature as zh_bert
23
  from .english_bert_mock import get_bert_feature as en_bert
24
  from .japanese_bert import get_bert_feature as jp_bert
25
 
26
  lang_bert_func_map = {"ZH": zh_bert, "EN": en_bert, "JP": jp_bert}
27
+ bert = lang_bert_func_map[language](
28
+ norm_text, word2ph, device, style_text, style_weight
29
+ )
30
  return bert
31
 
32
 
text/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/__init__.cpython-311.pyc and b/text/__pycache__/__init__.cpython-311.pyc differ
 
text/__pycache__/bert_utils.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/bert_utils.cpython-311.pyc and b/text/__pycache__/bert_utils.cpython-311.pyc differ
 
text/__pycache__/chinese.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/chinese.cpython-311.pyc and b/text/__pycache__/chinese.cpython-311.pyc differ
 
text/__pycache__/chinese_bert.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/chinese_bert.cpython-311.pyc and b/text/__pycache__/chinese_bert.cpython-311.pyc differ
 
text/__pycache__/cleaner.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/cleaner.cpython-311.pyc and b/text/__pycache__/cleaner.cpython-311.pyc differ
 
text/__pycache__/english.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/english.cpython-311.pyc and b/text/__pycache__/english.cpython-311.pyc differ
 
text/__pycache__/english_bert_mock.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/english_bert_mock.cpython-311.pyc and b/text/__pycache__/english_bert_mock.cpython-311.pyc differ
 
text/__pycache__/japanese.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/japanese.cpython-311.pyc and b/text/__pycache__/japanese.cpython-311.pyc differ
 
text/__pycache__/japanese_bert.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/japanese_bert.cpython-311.pyc and b/text/__pycache__/japanese_bert.cpython-311.pyc differ
 
text/__pycache__/symbols.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/symbols.cpython-311.pyc and b/text/__pycache__/symbols.cpython-311.pyc differ
 
text/__pycache__/tone_sandhi.cpython-311.pyc CHANGED
Binary files a/text/__pycache__/tone_sandhi.cpython-311.pyc and b/text/__pycache__/tone_sandhi.cpython-311.pyc differ
 
text/chinese.py CHANGED
@@ -1,12 +1,21 @@
1
  import os
2
  import re
3
 
4
- import cn2an
5
  from pypinyin import lazy_pinyin, Style
6
 
7
  from text.symbols import punctuation
8
  from text.tone_sandhi import ToneSandhi
9
 
 
 
 
 
 
 
 
 
 
 
10
  current_file_path = os.path.dirname(__file__)
11
  pinyin_to_symbol_map = {
12
  line.split("\t")[0]: line.strip().split("\t")[1]
@@ -169,9 +178,7 @@ def _g2p(segments):
169
 
170
 
171
  def text_normalize(text):
172
- numbers = re.findall(r"\d+(?:\.?\d+)?", text)
173
- for number in numbers:
174
- text = text.replace(number, cn2an.an2cn(number), 1)
175
  text = replace_punctuation(text)
176
  return text
177
 
 
1
  import os
2
  import re
3
 
 
4
  from pypinyin import lazy_pinyin, Style
5
 
6
  from text.symbols import punctuation
7
  from text.tone_sandhi import ToneSandhi
8
 
9
+ try:
10
+ from tn.chinese.normalizer import Normalizer
11
+
12
+ normalizer = Normalizer().normalize
13
+ except ImportError:
14
+ import cn2an
15
+
16
+ print("tn.chinese.normalizer not found, use cn2an normalizer")
17
+ normalizer = lambda x: cn2an.transform(x, "an2cn")
18
+
19
  current_file_path = os.path.dirname(__file__)
20
  pinyin_to_symbol_map = {
21
  line.split("\t")[0]: line.strip().split("\t")[1]
 
178
 
179
 
180
  def text_normalize(text):
181
+ text = normalizer(text)
 
 
182
  text = replace_punctuation(text)
183
  return text
184
 
text/chinese_bert.py CHANGED
@@ -12,7 +12,13 @@ tokenizer = AutoTokenizer.from_pretrained(LOCAL_PATH)
12
  models = dict()
13
 
14
 
15
- def get_bert_feature(text, word2ph, device=config.bert_gen_config.device):
 
 
 
 
 
 
16
  if (
17
  sys.platform == "darwin"
18
  and torch.backends.mps.is_available()
@@ -29,12 +35,24 @@ def get_bert_feature(text, word2ph, device=config.bert_gen_config.device):
29
  inputs[i] = inputs[i].to(device)
30
  res = models[device](**inputs, output_hidden_states=True)
31
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
32
-
 
 
 
 
 
 
33
  assert len(word2ph) == len(text) + 2
34
  word2phone = word2ph
35
  phone_level_feature = []
36
  for i in range(len(word2phone)):
37
- repeat_feature = res[i].repeat(word2phone[i], 1)
 
 
 
 
 
 
38
  phone_level_feature.append(repeat_feature)
39
 
40
  phone_level_feature = torch.cat(phone_level_feature, dim=0)
 
12
  models = dict()
13
 
14
 
15
+ def get_bert_feature(
16
+ text,
17
+ word2ph,
18
+ device=config.bert_gen_config.device,
19
+ style_text=None,
20
+ style_weight=0.7,
21
+ ):
22
  if (
23
  sys.platform == "darwin"
24
  and torch.backends.mps.is_available()
 
35
  inputs[i] = inputs[i].to(device)
36
  res = models[device](**inputs, output_hidden_states=True)
37
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
38
+ if style_text:
39
+ style_inputs = tokenizer(style_text, return_tensors="pt")
40
+ for i in style_inputs:
41
+ style_inputs[i] = style_inputs[i].to(device)
42
+ style_res = models[device](**style_inputs, output_hidden_states=True)
43
+ style_res = torch.cat(style_res["hidden_states"][-3:-2], -1)[0].cpu()
44
+ style_res_mean = style_res.mean(0)
45
  assert len(word2ph) == len(text) + 2
46
  word2phone = word2ph
47
  phone_level_feature = []
48
  for i in range(len(word2phone)):
49
+ if style_text:
50
+ repeat_feature = (
51
+ res[i].repeat(word2phone[i], 1) * (1 - style_weight)
52
+ + style_res_mean.repeat(word2phone[i], 1) * style_weight
53
+ )
54
+ else:
55
+ repeat_feature = res[i].repeat(word2phone[i], 1)
56
  phone_level_feature.append(repeat_feature)
57
 
58
  phone_level_feature = torch.cat(phone_level_feature, dim=0)
text/cleaner.py CHANGED
@@ -1,7 +1,7 @@
1
- from text import chinese, japanese, cleaned_text_to_sequence
2
 
3
 
4
- language_module_map = {"ZH": chinese, "JP": japanese}
5
 
6
 
7
  def clean_text(text, language):
 
1
+ from text import chinese, japanese, english, cleaned_text_to_sequence
2
 
3
 
4
+ language_module_map = {"ZH": chinese, "JP": japanese, "EN": english}
5
 
6
 
7
  def clean_text(text, language):
text/english.py CHANGED
@@ -5,6 +5,7 @@ from g2p_en import G2p
5
  from transformers import DebertaV2Tokenizer
6
 
7
  from text import symbols
 
8
 
9
  current_file_path = os.path.dirname(__file__)
10
  CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
@@ -217,6 +218,8 @@ def refine_ph(phn):
217
  if re.search(r"\d$", phn):
218
  tone = int(phn[-1]) + 1
219
  phn = phn[:-1]
 
 
220
  return phn.lower(), tone
221
 
222
 
@@ -389,45 +392,84 @@ def sep_text(text):
389
  return words
390
 
391
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  def g2p(text):
393
  phones = []
394
  tones = []
395
- # word2ph = []
396
- words = sep_text(text)
397
- tokens = [tokenizer.tokenize(i) for i in words]
 
 
398
  for word in words:
399
- if word.upper() in eng_dict:
400
- phns, tns = refine_syllables(eng_dict[word.upper()])
401
- phones.append([post_replace_ph(i) for i in phns])
402
- tones.append(tns)
403
- # word2ph.append(len(phns))
404
- else:
405
- phone_list = list(filter(lambda p: p != " ", _g2p(word)))
406
- phns = []
407
- tns = []
408
- for ph in phone_list:
409
- if ph in arpa:
410
- ph, tn = refine_ph(ph)
411
- phns.append(ph)
412
- tns.append(tn)
413
- else:
414
- phns.append(ph)
415
- tns.append(0)
416
- phones.append([post_replace_ph(i) for i in phns])
417
- tones.append(tns)
418
- # word2ph.append(len(phns))
419
- # phones = [post_replace_ph(i) for i in phones]
 
 
 
 
 
 
 
 
 
 
 
420
 
421
  word2ph = []
422
- for token, phoneme in zip(tokens, phones):
423
- phone_len = len(phoneme)
424
  word_len = len(token)
425
 
426
- aaa = distribute_phone(phone_len, word_len)
427
  word2ph += aaa
428
 
429
- phones = ["_"] + [j for i in phones for j in i] + ["_"]
430
- tones = [0] + [j for i in tones for j in i] + [0]
431
  word2ph = [1] + word2ph + [1]
432
  assert len(phones) == len(tones), text
433
  assert len(phones) == sum(word2ph), text
 
5
  from transformers import DebertaV2Tokenizer
6
 
7
  from text import symbols
8
+ from text.symbols import punctuation
9
 
10
  current_file_path = os.path.dirname(__file__)
11
  CMU_DICT_PATH = os.path.join(current_file_path, "cmudict.rep")
 
218
  if re.search(r"\d$", phn):
219
  tone = int(phn[-1]) + 1
220
  phn = phn[:-1]
221
+ else:
222
+ tone = 3
223
  return phn.lower(), tone
224
 
225
 
 
392
  return words
393
 
394
 
395
+ def text_to_words(text):
396
+ tokens = tokenizer.tokenize(text)
397
+ words = []
398
+ for idx, t in enumerate(tokens):
399
+ if t.startswith("▁"):
400
+ words.append([t[1:]])
401
+ else:
402
+ if t in punctuation:
403
+ if idx == len(tokens) - 1:
404
+ words.append([f"{t}"])
405
+ else:
406
+ if (
407
+ not tokens[idx + 1].startswith("▁")
408
+ and tokens[idx + 1] not in punctuation
409
+ ):
410
+ if idx == 0:
411
+ words.append([])
412
+ words[-1].append(f"{t}")
413
+ else:
414
+ words.append([f"{t}"])
415
+ else:
416
+ if idx == 0:
417
+ words.append([])
418
+ words[-1].append(f"{t}")
419
+ return words
420
+
421
+
422
  def g2p(text):
423
  phones = []
424
  tones = []
425
+ phone_len = []
426
+ # words = sep_text(text)
427
+ # tokens = [tokenizer.tokenize(i) for i in words]
428
+ words = text_to_words(text)
429
+
430
  for word in words:
431
+ temp_phones, temp_tones = [], []
432
+ if len(word) > 1:
433
+ if "'" in word:
434
+ word = ["".join(word)]
435
+ for w in word:
436
+ if w in punctuation:
437
+ temp_phones.append(w)
438
+ temp_tones.append(0)
439
+ continue
440
+ if w.upper() in eng_dict:
441
+ phns, tns = refine_syllables(eng_dict[w.upper()])
442
+ temp_phones += [post_replace_ph(i) for i in phns]
443
+ temp_tones += tns
444
+ # w2ph.append(len(phns))
445
+ else:
446
+ phone_list = list(filter(lambda p: p != " ", _g2p(w)))
447
+ phns = []
448
+ tns = []
449
+ for ph in phone_list:
450
+ if ph in arpa:
451
+ ph, tn = refine_ph(ph)
452
+ phns.append(ph)
453
+ tns.append(tn)
454
+ else:
455
+ phns.append(ph)
456
+ tns.append(0)
457
+ temp_phones += [post_replace_ph(i) for i in phns]
458
+ temp_tones += tns
459
+ phones += temp_phones
460
+ tones += temp_tones
461
+ phone_len.append(len(temp_phones))
462
+ # phones = [post_replace_ph(i) for i in phones]
463
 
464
  word2ph = []
465
+ for token, pl in zip(words, phone_len):
 
466
  word_len = len(token)
467
 
468
+ aaa = distribute_phone(pl, word_len)
469
  word2ph += aaa
470
 
471
+ phones = ["_"] + phones + ["_"]
472
+ tones = [0] + tones + [0]
473
  word2ph = [1] + word2ph + [1]
474
  assert len(phones) == len(tones), text
475
  assert len(phones) == sum(word2ph), text
text/english_bert_mock.py CHANGED
@@ -13,7 +13,13 @@ tokenizer = DebertaV2Tokenizer.from_pretrained(LOCAL_PATH)
13
  models = dict()
14
 
15
 
16
- def get_bert_feature(text, word2ph, device=config.bert_gen_config.device):
 
 
 
 
 
 
17
  if (
18
  sys.platform == "darwin"
19
  and torch.backends.mps.is_available()
@@ -30,11 +36,24 @@ def get_bert_feature(text, word2ph, device=config.bert_gen_config.device):
30
  inputs[i] = inputs[i].to(device)
31
  res = models[device](**inputs, output_hidden_states=True)
32
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
 
 
 
 
 
 
 
33
  assert len(word2ph) == res.shape[0], (text, res.shape[0], len(word2ph))
34
  word2phone = word2ph
35
  phone_level_feature = []
36
  for i in range(len(word2phone)):
37
- repeat_feature = res[i].repeat(word2phone[i], 1)
 
 
 
 
 
 
38
  phone_level_feature.append(repeat_feature)
39
 
40
  phone_level_feature = torch.cat(phone_level_feature, dim=0)
 
13
  models = dict()
14
 
15
 
16
+ def get_bert_feature(
17
+ text,
18
+ word2ph,
19
+ device=config.bert_gen_config.device,
20
+ style_text=None,
21
+ style_weight=0.7,
22
+ ):
23
  if (
24
  sys.platform == "darwin"
25
  and torch.backends.mps.is_available()
 
36
  inputs[i] = inputs[i].to(device)
37
  res = models[device](**inputs, output_hidden_states=True)
38
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
39
+ if style_text:
40
+ style_inputs = tokenizer(style_text, return_tensors="pt")
41
+ for i in style_inputs:
42
+ style_inputs[i] = style_inputs[i].to(device)
43
+ style_res = models[device](**style_inputs, output_hidden_states=True)
44
+ style_res = torch.cat(style_res["hidden_states"][-3:-2], -1)[0].cpu()
45
+ style_res_mean = style_res.mean(0)
46
  assert len(word2ph) == res.shape[0], (text, res.shape[0], len(word2ph))
47
  word2phone = word2ph
48
  phone_level_feature = []
49
  for i in range(len(word2phone)):
50
+ if style_text:
51
+ repeat_feature = (
52
+ res[i].repeat(word2phone[i], 1) * (1 - style_weight)
53
+ + style_res_mean.repeat(word2phone[i], 1) * style_weight
54
+ )
55
+ else:
56
+ repeat_feature = res[i].repeat(word2phone[i], 1)
57
  phone_level_feature.append(repeat_feature)
58
 
59
  phone_level_feature = torch.cat(phone_level_feature, dim=0)
text/japanese.py CHANGED
@@ -13,6 +13,341 @@ import pyopenjtalk
13
  import jaconv
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def kata2phoneme(text: str) -> str:
17
  """Convert katakana text to phonemes."""
18
  text = text.strip()
@@ -32,16 +367,12 @@ def kata2phoneme(text: str) -> str:
32
  res.append(prev[-1])
33
  text = text[1:]
34
  continue
35
- res += pyopenjtalk.g2p(text).lower().replace("cl", "q").split(" ")
36
  break
37
  # res = _COLON_RX.sub(":", res)
38
  return res
39
 
40
 
41
- def hira2kata(text: str) -> str:
42
- return jaconv.hira2kata(text)
43
-
44
-
45
  _SYMBOL_TOKENS = set(list("・、。?!"))
46
  _NO_YOMI_TOKENS = set(list("「」『』―()[][]"))
47
  _MARKS = re.compile(
@@ -49,41 +380,8 @@ _MARKS = re.compile(
49
  )
50
 
51
 
52
- def text2kata(text: str) -> str:
53
  parsed = pyopenjtalk.run_frontend(text)
54
-
55
- res = []
56
- for parts in parsed:
57
- word, yomi = replace_punctuation(parts["string"]), parts["pron"].replace(
58
- "’", ""
59
- )
60
- if yomi:
61
- if re.match(_MARKS, yomi):
62
- if len(word) > 1:
63
- word = [replace_punctuation(i) for i in list(word)]
64
- yomi = word
65
- res += yomi
66
- sep += word
67
- continue
68
- elif word not in rep_map.keys() and word not in rep_map.values():
69
- word = ","
70
- yomi = word
71
- res.append(yomi)
72
- else:
73
- if word in _SYMBOL_TOKENS:
74
- res.append(word)
75
- elif word in ("っ", "ッ"):
76
- res.append("ッ")
77
- elif word in _NO_YOMI_TOKENS:
78
- pass
79
- else:
80
- res.append(word)
81
- return hira2kata("".join(res))
82
-
83
-
84
- def text2sep_kata(text: str) -> (list, list):
85
- parsed = pyopenjtalk.run_frontend(text)
86
-
87
  res = []
88
  sep = []
89
  for parts in parsed:
@@ -112,7 +410,7 @@ def text2sep_kata(text: str) -> (list, list):
112
  else:
113
  res.append(word)
114
  sep.append(word)
115
- return sep, [hira2kata(i) for i in res], get_accent(parsed)
116
 
117
 
118
  def get_accent(parsed):
@@ -225,16 +523,6 @@ def japanese_convert_alpha_symbols_to_words(text: str) -> str:
225
  return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()])
226
 
227
 
228
- def japanese_text_to_phonemes(text: str) -> str:
229
- """Convert Japanese text to phonemes."""
230
- res = unicodedata.normalize("NFKC", text)
231
- res = japanese_convert_numbers_to_words(res)
232
- # res = japanese_convert_alpha_symbols_to_words(res)
233
- res = text2kata(res)
234
- res = kata2phoneme(res)
235
- return res
236
-
237
-
238
  def is_japanese_character(char):
239
  # 定义日语文字系统的 Unicode 范围
240
  japanese_ranges = [
 
13
  import jaconv
14
 
15
 
16
+ # Mapping of hiragana to phonetic representation
17
+ hiragana_map = {
18
+ "う゛ぁ": " v a",
19
+ "う゛ぃ": " v i",
20
+ "う゛ぇ": " v e",
21
+ "う゛ぉ": " v o",
22
+ "う゛ゅ": " by u",
23
+ "ぅ゛": " v u",
24
+ # ゔ等の処理を追加
25
+ "ゔぁ": " v a",
26
+ "ゔぃ": " v i",
27
+ "ゔぇ": " v e",
28
+ "ゔぉ": " v o",
29
+ "ゔゅ": " by u",
30
+ # 2文字からなる変換規則
31
+ "あぁ": " a a",
32
+ "いぃ": " i i",
33
+ "いぇ": " i e",
34
+ "いゃ": " y a",
35
+ "うぅ": " u:",
36
+ "えぇ": " e e",
37
+ "おぉ": " o:",
38
+ "かぁ": " k a:",
39
+ "きぃ": " k i:",
40
+ "くぅ": " k u:",
41
+ "くゃ": " ky a",
42
+ "くゅ": " ky u",
43
+ "くょ": " ky o",
44
+ "けぇ": " k e:",
45
+ "こぉ": " k o:",
46
+ "がぁ": " g a:",
47
+ "ぎぃ": " g i:",
48
+ "ぐぅ": " g u:",
49
+ "ぐゃ": " gy a",
50
+ "ぐゅ": " gy u",
51
+ "ぐょ": " gy o",
52
+ "げぇ": " g e:",
53
+ "ごぉ": " g o:",
54
+ "さぁ": " s a:",
55
+ "しぃ": " sh i",
56
+ "すぅ": " s u:",
57
+ "すゃ": " sh a",
58
+ "すゅ": " sh u",
59
+ "すょ": " sh o",
60
+ "せぇ": " s e:",
61
+ "そぉ": " s o:",
62
+ "ざぁ": " z a:",
63
+ "じぃ": " j i:",
64
+ "ずぅ": " z u:",
65
+ "ずゃ": " zy a",
66
+ "ずゅ": " zy u",
67
+ "ずょ": " zy o",
68
+ "ぜぇ": " z e:",
69
+ "ぞぉ": " z o:",
70
+ "たぁ": " t a:",
71
+ "ちぃ": " ch i",
72
+ "つぁ": " ts a",
73
+ "つぃ": " ts i",
74
+ "つぅ": " ts u",
75
+ "つゃ": " ch a",
76
+ "つゅ": " ch u",
77
+ "つょ": " ch o",
78
+ "つぇ": " ts e",
79
+ "つぉ": " ts o",
80
+ "てぇ": " t e:",
81
+ "とぉ": " t o:",
82
+ "だぁ": " d a:",
83
+ "ぢぃ": " j i:",
84
+ "づぅ": " d u:",
85
+ "づゃ": " zy a",
86
+ "づゅ": " zy u",
87
+ "づょ": " zy o",
88
+ "でぇ": " d e:",
89
+ "なぁ": " n a:",
90
+ "にぃ": " n i:",
91
+ "ぬぅ": " n u:",
92
+ "ぬゃ": " ny a",
93
+ "ぬゅ": " ny u",
94
+ "ぬょ": " ny o",
95
+ "ねぇ": " n e:",
96
+ "のぉ": " n o:",
97
+ "はぁ": " h a:",
98
+ "ひぃ": " h i:",
99
+ "ふぅ": " f u:",
100
+ "ふゃ": " hy a",
101
+ "へぇ": " h e:",
102
+ "ほぉ": " h o:",
103
+ "ばぁ": " b a:",
104
+ "びぃ": " b i:",
105
+ "ぶぅ": " b u:",
106
+ "ぶゅ": " by u",
107
+ "べぇ": " b e:",
108
+ "ぼぉ": " b o:",
109
+ "ぱぁ": " p a:",
110
+ "ぴぃ": " p i:",
111
+ "ぷぅ": " p u:",
112
+ "ぷゃ": " py a",
113
+ "ぷゅ": " py u",
114
+ "ぷょ": " py o",
115
+ "ぺぇ": " p e:",
116
+ "ぽぉ": " p o:",
117
+ "まぁ": " m a:",
118
+ "みぃ": " m i:",
119
+ "むぅ": " m u:",
120
+ "むゃ": " my a",
121
+ "むゅ": " my u",
122
+ "むょ": " my o",
123
+ "めぇ": " m e:",
124
+ "もぉ": " m o:",
125
+ "やぁ": " y a:",
126
+ "ゆぅ": " y u:",
127
+ "ゆゃ": " y a:",
128
+ "ゆゅ": " y u:",
129
+ "ゆょ": " y o:",
130
+ "よぉ": " y o:",
131
+ "らぁ": " r a:",
132
+ "りぃ": " r i:",
133
+ "るぅ": " r u:",
134
+ "るゃ": " ry a",
135
+ "るゅ": " ry u",
136
+ "るょ": " ry o",
137
+ "れぇ": " r e:",
138
+ "ろぉ": " r o:",
139
+ "わぁ": " w a:",
140
+ "をぉ": " o:",
141
+ "う゛": " b u",
142
+ "でぃ": " d i",
143
+ "でゃ": " dy a",
144
+ "でゅ": " dy u",
145
+ "でょ": " dy o",
146
+ "てぃ": " t i",
147
+ "てゃ": " ty a",
148
+ "てゅ": " ty u",
149
+ "てょ": " ty o",
150
+ "すぃ": " s i",
151
+ "ずぁ": " z u",
152
+ "ずぃ": " z i",
153
+ "ずぇ": " z e",
154
+ "ずぉ": " z o",
155
+ "きゃ": " ky a",
156
+ "きゅ": " ky u",
157
+ "きょ": " ky o",
158
+ "しゃ": " sh a",
159
+ "しゅ": " sh u",
160
+ "しぇ": " sh e",
161
+ "しょ": " sh o",
162
+ "ちゃ": " ch a",
163
+ "ちゅ": " ch u",
164
+ "ちぇ": " ch e",
165
+ "ちょ": " ch o",
166
+ "とぅ": " t u",
167
+ "とゃ": " ty a",
168
+ "とゅ": " ty u",
169
+ "とょ": " ty o",
170
+ "どぁ": " d o ",
171
+ "どぅ": " d u",
172
+ "どゃ": " dy a",
173
+ "どゅ": " dy u",
174
+ "どょ": " dy o",
175
+ "どぉ": " d o:",
176
+ "にゃ": " ny a",
177
+ "にゅ": " ny u",
178
+ "にょ": " ny o",
179
+ "ひゃ": " hy a",
180
+ "ひゅ": " hy u",
181
+ "ひょ": " hy o",
182
+ "みゃ": " my a",
183
+ "みゅ": " my u",
184
+ "みょ": " my o",
185
+ "りゃ": " ry a",
186
+ "りゅ": " ry u",
187
+ "りょ": " ry o",
188
+ "ぎゃ": " gy a",
189
+ "ぎゅ": " gy u",
190
+ "ぎょ": " gy o",
191
+ "ぢぇ": " j e",
192
+ "ぢゃ": " j a",
193
+ "ぢゅ": " j u",
194
+ "ぢょ": " j o",
195
+ "じぇ": " j e",
196
+ "じゃ": " j a",
197
+ "じゅ": " j u",
198
+ "じょ": " j o",
199
+ "びゃ": " by a",
200
+ "びゅ": " by u",
201
+ "びょ": " by o",
202
+ "ぴゃ": " py a",
203
+ "ぴゅ": " py u",
204
+ "ぴょ": " py o",
205
+ "うぁ": " u a",
206
+ "うぃ": " w i",
207
+ "うぇ": " w e",
208
+ "うぉ": " w o",
209
+ "ふぁ": " f a",
210
+ "ふぃ": " f i",
211
+ "ふゅ": " hy u",
212
+ "ふょ": " hy o",
213
+ "ふぇ": " f e",
214
+ "ふぉ": " f o",
215
+ # 1音からなる変換規則
216
+ "あ": " a",
217
+ "い": " i",
218
+ "う": " u",
219
+ "ゔ": " v u", # ゔの処理を追加
220
+ "え": " e",
221
+ "お": " o",
222
+ "か": " k a",
223
+ "き": " k i",
224
+ "く": " k u",
225
+ "け": " k e",
226
+ "こ": " k o",
227
+ "さ": " s a",
228
+ "し": " sh i",
229
+ "す": " s u",
230
+ "せ": " s e",
231
+ "そ": " s o",
232
+ "た": " t a",
233
+ "ち": " ch i",
234
+ "つ": " ts u",
235
+ "て": " t e",
236
+ "と": " t o",
237
+ "な": " n a",
238
+ "に": " n i",
239
+ "ぬ": " n u",
240
+ "ね": " n e",
241
+ "の": " n o",
242
+ "は": " h a",
243
+ "ひ": " h i",
244
+ "ふ": " f u",
245
+ "へ": " h e",
246
+ "ほ": " h o",
247
+ "ま": " m a",
248
+ "み": " m i",
249
+ "む": " m u",
250
+ "め": " m e",
251
+ "も": " m o",
252
+ "ら": " r a",
253
+ "り": " r i",
254
+ "る": " r u",
255
+ "れ": " r e",
256
+ "ろ": " r o",
257
+ "が": " g a",
258
+ "ぎ": " g i",
259
+ "ぐ": " g u",
260
+ "げ": " g e",
261
+ "ご": " g o",
262
+ "ざ": " z a",
263
+ "じ": " j i",
264
+ "ず": " z u",
265
+ "ぜ": " z e",
266
+ "ぞ": " z o",
267
+ "だ": " d a",
268
+ "ぢ": " j i",
269
+ "づ": " z u",
270
+ "で": " d e",
271
+ "ど": " d o",
272
+ "ば": " b a",
273
+ "び": " b i",
274
+ "ぶ": " b u",
275
+ "べ": " b e",
276
+ "ぼ": " b o",
277
+ "ぱ": " p a",
278
+ "ぴ": " p i",
279
+ "ぷ": " p u",
280
+ "ぺ": " p e",
281
+ "ぽ": " p o",
282
+ "や": " y a",
283
+ "ゆ": " y u",
284
+ "よ": " y o",
285
+ "わ": " w a",
286
+ "ゐ": " i",
287
+ "ゑ": " e",
288
+ "ん": " N",
289
+ "っ": " q",
290
+ # ここまでに処理されてない ぁぃぅぇぉ はそのまま大文字扱い
291
+ "ぁ": " a",
292
+ "ぃ": " i",
293
+ "ぅ": " u",
294
+ "ぇ": " e",
295
+ "ぉ": " o",
296
+ "ゎ": " w a",
297
+ # 長音の処理
298
+ # for (pattern, replace_str) in JULIUS_LONG_VOWEL:
299
+ # text = pattern.sub(replace_str, text)
300
+ # text = text.replace("o u", "o:") # おう -> おーの音便
301
+ "ー": ":",
302
+ "〜": ":",
303
+ "−": ":",
304
+ "-": ":",
305
+ # その他特別な処理
306
+ "を": " o",
307
+ # ここまでに処理されていないゅ等もそのまま大文字扱い(追加)
308
+ "ゃ": " y a",
309
+ "ゅ": " y u",
310
+ "ょ": " y o",
311
+ }
312
+
313
+
314
+ def hiragana2p(txt: str) -> str:
315
+ """
316
+ Modification of `jaconv.hiragana2julius`.
317
+ - avoid using `:`, instead, `あーーー` -> `a a a a`.
318
+ - avoid converting `o u` to `o o` (because the input is already actual `yomi`).
319
+ - avoid using `N` for `ん` (for compatibility)
320
+ - use `v` for `ゔ` related text.
321
+ - add bare `ゃ` `ゅ` `ょ` to `y a` `y u` `y o` (for compatibility).
322
+ """
323
+
324
+ result = []
325
+ skip = 0
326
+ for i in range(len(txt)):
327
+ if skip:
328
+ skip -= 1
329
+ continue
330
+
331
+ for length in range(3, 0, -1):
332
+ if txt[i : i + length] in hiragana_map:
333
+ result.append(hiragana_map[txt[i : i + length]])
334
+ skip = length - 1
335
+ break
336
+
337
+ txt = "".join(result)
338
+ txt = txt.strip()
339
+ txt = txt.replace(":+", ":")
340
+
341
+ # ここまで`jaconv.hiragana2julius`と音便処理と長音処理をのぞいて同じ
342
+ # ここから`k a:: k i:`→`k a a a k i i`のように`:`の数だけ繰り返す処理
343
+ pattern = r"(\w)(:*)"
344
+ replacement = lambda m: m.group(1) + (" " + m.group(1)) * len(m.group(2))
345
+
346
+ txt = re.sub(pattern, replacement, txt)
347
+ txt = txt.replace("N", "n") # 促音のNをnに変換
348
+ return txt
349
+
350
+
351
  def kata2phoneme(text: str) -> str:
352
  """Convert katakana text to phonemes."""
353
  text = text.strip()
 
367
  res.append(prev[-1])
368
  text = text[1:]
369
  continue
370
+ res += hiragana2p(jaconv.kata2hira(text)).split(" ")
371
  break
372
  # res = _COLON_RX.sub(":", res)
373
  return res
374
 
375
 
 
 
 
 
376
  _SYMBOL_TOKENS = set(list("・、。?!"))
377
  _NO_YOMI_TOKENS = set(list("「」『』―()[][]"))
378
  _MARKS = re.compile(
 
380
  )
381
 
382
 
383
+ def text2sep_kata(text: str):
384
  parsed = pyopenjtalk.run_frontend(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  res = []
386
  sep = []
387
  for parts in parsed:
 
410
  else:
411
  res.append(word)
412
  sep.append(word)
413
+ return sep, res, get_accent(parsed)
414
 
415
 
416
  def get_accent(parsed):
 
523
  return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()])
524
 
525
 
 
 
 
 
 
 
 
 
 
 
526
  def is_japanese_character(char):
527
  # 定义日语文字系统的 Unicode 范围
528
  japanese_ranges = [
text/japanese_bert.py CHANGED
@@ -13,8 +13,16 @@ tokenizer = AutoTokenizer.from_pretrained(LOCAL_PATH)
13
  models = dict()
14
 
15
 
16
- def get_bert_feature(text, word2ph, device=config.bert_gen_config.device):
 
 
 
 
 
 
17
  text = "".join(text2sep_kata(text)[0])
 
 
18
  if (
19
  sys.platform == "darwin"
20
  and torch.backends.mps.is_available()
@@ -31,12 +39,25 @@ def get_bert_feature(text, word2ph, device=config.bert_gen_config.device):
31
  inputs[i] = inputs[i].to(device)
32
  res = models[device](**inputs, output_hidden_states=True)
33
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
 
 
 
 
 
 
 
34
 
35
  assert len(word2ph) == len(text) + 2
36
  word2phone = word2ph
37
  phone_level_feature = []
38
  for i in range(len(word2phone)):
39
- repeat_feature = res[i].repeat(word2phone[i], 1)
 
 
 
 
 
 
40
  phone_level_feature.append(repeat_feature)
41
 
42
  phone_level_feature = torch.cat(phone_level_feature, dim=0)
 
13
  models = dict()
14
 
15
 
16
+ def get_bert_feature(
17
+ text,
18
+ word2ph,
19
+ device=config.bert_gen_config.device,
20
+ style_text=None,
21
+ style_weight=0.7,
22
+ ):
23
  text = "".join(text2sep_kata(text)[0])
24
+ if style_text:
25
+ style_text = "".join(text2sep_kata(style_text)[0])
26
  if (
27
  sys.platform == "darwin"
28
  and torch.backends.mps.is_available()
 
39
  inputs[i] = inputs[i].to(device)
40
  res = models[device](**inputs, output_hidden_states=True)
41
  res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu()
42
+ if style_text:
43
+ style_inputs = tokenizer(style_text, return_tensors="pt")
44
+ for i in style_inputs:
45
+ style_inputs[i] = style_inputs[i].to(device)
46
+ style_res = models[device](**style_inputs, output_hidden_states=True)
47
+ style_res = torch.cat(style_res["hidden_states"][-3:-2], -1)[0].cpu()
48
+ style_res_mean = style_res.mean(0)
49
 
50
  assert len(word2ph) == len(text) + 2
51
  word2phone = word2ph
52
  phone_level_feature = []
53
  for i in range(len(word2phone)):
54
+ if style_text:
55
+ repeat_feature = (
56
+ res[i].repeat(word2phone[i], 1) * (1 - style_weight)
57
+ + style_res_mean.repeat(word2phone[i], 1) * style_weight
58
+ )
59
+ else:
60
+ repeat_feature = res[i].repeat(word2phone[i], 1)
61
  phone_level_feature.append(repeat_feature)
62
 
63
  phone_level_feature = torch.cat(phone_level_feature, dim=0)
text/tone_sandhi.py CHANGED
@@ -634,9 +634,11 @@ class ToneSandhi:
634
  # input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
635
  # output seg: [['听一听', 'v']]
636
  def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
637
- new_seg = []
638
  # function 1
639
- for i, (word, pos) in enumerate(seg):
 
 
640
  if (
641
  i - 1 >= 0
642
  and word == "一"
@@ -645,6 +647,7 @@ class ToneSandhi:
645
  and seg[i - 1][1] == "v"
646
  ):
647
  new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
 
648
  else:
649
  if (
650
  i - 2 >= 0
@@ -655,7 +658,8 @@ class ToneSandhi:
655
  continue
656
  else:
657
  new_seg.append([word, pos])
658
- seg = new_seg
 
659
  new_seg = []
660
  # function 2
661
  for i, (word, pos) in enumerate(seg):
 
634
  # input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
635
  # output seg: [['听一听', 'v']]
636
  def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
637
+ new_seg = [] * len(seg)
638
  # function 1
639
+ i = 0
640
+ while i < len(seg):
641
+ word, pos = seg[i]
642
  if (
643
  i - 1 >= 0
644
  and word == "一"
 
647
  and seg[i - 1][1] == "v"
648
  ):
649
  new_seg[i - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0]
650
+ i += 2
651
  else:
652
  if (
653
  i - 2 >= 0
 
658
  continue
659
  else:
660
  new_seg.append([word, pos])
661
+ i += 1
662
+ seg = [i for i in new_seg if len(i) > 0]
663
  new_seg = []
664
  # function 2
665
  for i, (word, pos) in enumerate(seg):
tools/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (193 Bytes). View file
 
tools/__pycache__/classify_language.cpython-311.pyc ADDED
Binary file (4.74 kB). View file
 
tools/__pycache__/sentence.cpython-311.pyc ADDED
Binary file (19.3 kB). View file
 
tools/__pycache__/translate.cpython-311.pyc ADDED
Binary file (2.68 kB). View file
 
tools/sentence.py CHANGED
@@ -1,173 +1,271 @@
1
- import logging
2
-
3
- import regex as re
4
-
5
- from tools.classify_language import classify_language, split_alpha_nonalpha
6
-
7
-
8
- def check_is_none(item) -> bool:
9
- """none -> True, not none -> False"""
10
- return (
11
- item is None
12
- or (isinstance(item, str) and str(item).isspace())
13
- or str(item) == ""
14
- )
15
-
16
-
17
- def markup_language(text: str, target_languages: list = None) -> str:
18
- pattern = (
19
- r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`"
20
- r"\!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」"
21
- r"『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+"
22
- )
23
- sentences = re.split(pattern, text)
24
-
25
- pre_lang = ""
26
- p = 0
27
-
28
- if target_languages is not None:
29
- sorted_target_languages = sorted(target_languages)
30
- if sorted_target_languages in [["en", "zh"], ["en", "ja"], ["en", "ja", "zh"]]:
31
- new_sentences = []
32
- for sentence in sentences:
33
- new_sentences.extend(split_alpha_nonalpha(sentence))
34
- sentences = new_sentences
35
 
36
  for sentence in sentences:
37
- if check_is_none(sentence):
38
- continue
39
-
40
- lang = classify_language(sentence, target_languages)
41
-
42
- if pre_lang == "":
43
- text = text[:p] + text[p:].replace(
44
- sentence, f"[{lang.upper()}]{sentence}", 1
45
- )
46
- p += len(f"[{lang.upper()}]")
47
- elif pre_lang != lang:
48
- text = text[:p] + text[p:].replace(
49
- sentence, f"[{pre_lang.upper()}][{lang.upper()}]{sentence}", 1
50
- )
51
- p += len(f"[{pre_lang.upper()}][{lang.upper()}]")
52
- pre_lang = lang
53
- p += text[p:].index(sentence) + len(sentence)
54
- text += f"[{pre_lang.upper()}]"
55
-
56
- return text
57
-
58
-
59
- def split_by_language(text: str, target_languages: list = None) -> list:
60
- pattern = (
61
- r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`"
62
- r"\!?\。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」"
63
- r"『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+"
64
- )
65
- sentences = re.split(pattern, text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- pre_lang = ""
68
- start = 0
69
- end = 0
70
- sentences_list = []
71
 
72
- if target_languages is not None:
73
- sorted_target_languages = sorted(target_languages)
74
- if sorted_target_languages in [["en", "zh"], ["en", "ja"], ["en", "ja", "zh"]]:
75
- new_sentences = []
76
- for sentence in sentences:
77
- new_sentences.extend(split_alpha_nonalpha(sentence))
78
- sentences = new_sentences
79
 
80
- for sentence in sentences:
81
- if check_is_none(sentence):
82
- continue
83
-
84
- lang = classify_language(sentence, target_languages)
85
-
86
- end += text[end:].index(sentence)
87
- if pre_lang != "" and pre_lang != lang:
88
- sentences_list.append((text[start:end], pre_lang))
89
- start = end
90
- end += len(sentence)
91
- pre_lang = lang
92
- sentences_list.append((text[start:], pre_lang))
93
-
94
- return sentences_list
95
-
96
-
97
- def sentence_split(text: str, max: int) -> list:
98
- pattern = r"[!(),—+\-.:;??。,、;:]+"
99
- sentences = re.split(pattern, text)
100
- discarded_chars = re.findall(pattern, text)
101
-
102
- sentences_list, count, p = [], 0, 0
103
-
104
- # 按被分割的符号遍历
105
- for i, discarded_chars in enumerate(discarded_chars):
106
- count += len(sentences[i]) + len(discarded_chars)
107
- if count >= max:
108
- sentences_list.append(text[p : p + count].strip())
109
- p += count
110
- count = 0
111
-
112
- # 加入最后剩余的文本
113
- if p < len(text):
114
- sentences_list.append(text[p:])
115
-
116
- return sentences_list
117
-
118
-
119
- def sentence_split_and_markup(text, max=50, lang="auto", speaker_lang=None):
120
- # 如果该speaker只支持一种语言
121
- if speaker_lang is not None and len(speaker_lang) == 1:
122
- if lang.upper() not in ["AUTO", "MIX"] and lang.lower() != speaker_lang[0]:
123
- logging.debug(
124
- f'lang "{lang}" is not in speaker_lang {speaker_lang},automatically set lang={speaker_lang[0]}'
125
- )
126
- lang = speaker_lang[0]
127
-
128
- sentences_list = []
129
- if lang.upper() != "MIX":
130
- if max <= 0:
131
- sentences_list.append(
132
- markup_language(text, speaker_lang)
133
- if lang.upper() == "AUTO"
134
- else f"[{lang.upper()}]{text}[{lang.upper()}]"
135
- )
136
  else:
137
- for i in sentence_split(text, max):
138
- if check_is_none(i):
139
- continue
140
- sentences_list.append(
141
- markup_language(i, speaker_lang)
142
- if lang.upper() == "AUTO"
143
- else f"[{lang.upper()}]{i}[{lang.upper()}]"
144
- )
145
- else:
146
- sentences_list.append(text)
147
-
148
- for i in sentences_list:
149
- logging.debug(i)
150
-
151
- return sentences_list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  if __name__ == "__main__":
155
- text = "这几天心里颇不宁静。今晚在院子里坐着乘凉,忽然想起日日走过的荷塘,在这满月的光里,总该另有一番样子吧。月亮渐渐地升高了,墙外马路上孩子们的欢笑,已经听不见了;妻在屋里拍着闰儿,迷迷糊糊地哼着眠歌。我悄悄地披了大衫,带上门出去。"
156
- print(markup_language(text, target_languages=None))
157
- print(sentence_split(text, max=50))
158
- print(sentence_split_and_markup(text, max=50, lang="auto", speaker_lang=None))
159
-
160
- text = "你好,这是一段用来测试自动标注的文本。こんにちは,これは自動ラベリングのテスト用テキストです.Hello, this is a piece of text to test autotagging.你好!今天我们要介绍VITS项目,其重点是使用了GAN Duration predictor和transformer flow,并且接入了Bert模型来提升韵律。Bert embedding会在稍后介绍。"
161
- print(split_by_language(text, ["zh", "ja", "en"]))
162
-
163
- text = "vits和Bert-VITS2是tts模型。花费3days.花费3天。Take 3 days"
164
-
165
- print(split_by_language(text, ["zh", "ja", "en"]))
166
- # output: [('vits', 'en'), ('和', 'ja'), ('Bert-VITS', 'en'), ('2是', 'zh'), ('tts', 'en'), ('模型。花费3', 'zh'), ('days.', 'en'), ('花费3天。', 'zh'), ('Take 3 days', 'en')]
167
-
168
- print(split_by_language(text, ["zh", "en"]))
169
- # output: [('vits', 'en'), ('和', 'zh'), ('Bert-VITS', 'en'), ('2是', 'zh'), ('tts', 'en'), ('模型。花费3', 'zh'), ('days.', 'en'), ('花费3天。', 'zh'), ('Take 3 days', 'en')]
170
-
171
- text = "vits 和 Bert-VITS2 是 tts 模型。花费 3 days. 花费 3天。Take 3 days"
172
- print(split_by_language(text, ["zh", "en"]))
173
- # output: [('vits ', 'en'), ('和 ', 'zh'), ('Bert-VITS2 ', 'en'), ('是 ', 'zh'), ('tts ', 'en'), ('模型。花费 ', 'zh'), ('3 days. ', 'en'), ('花费 3天。', 'zh'), ('Take 3 days', 'en')]
 
1
+ import re, os
2
+
3
+ from ebooklib import epub
4
+ import PyPDF2
5
+ from PyPDF2 import PdfReader
6
+ from bs4 import BeautifulSoup
7
+ import jieba
8
+ import romajitable
9
+
10
+ def is_japanese(string):
11
+ for ch in string:
12
+ if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
13
+ return True
14
+ return False
15
+
16
+ def is_chinese(string):
17
+ for ch in string:
18
+ if '\u4e00' <= ch <= '\u9fff':
19
+ return True
20
+ return False
21
+
22
+ def is_single_language(sentence):
23
+ # 检查句子是否为单一语言
24
+ contains_chinese = re.search(r'[\u4e00-\u9fff]', sentence) is not None
25
+ contains_japanese = re.search(r'[\u3040-\u30ff\u31f0-\u31ff]', sentence) is not None
26
+ contains_english = re.search(r'[a-zA-Z]', sentence) is not None
27
+ language_count = sum([contains_chinese, contains_japanese, contains_english])
28
+ return language_count == 1
29
+
30
+ def merge_scattered_parts(sentences):
31
+ """合并零散的部分到相邻的句子中,并确保单一语言性"""
32
+ merged_sentences = []
33
+ buffer_sentence = ""
 
34
 
35
  for sentence in sentences:
36
+ # 检查是否是单一语言或者太短(可能是标点或单个词)
37
+ if is_single_language(sentence) and len(sentence) > 1:
38
+ # 如果缓冲区有内容,先将缓冲区的内容添加到列表
39
+ if buffer_sentence:
40
+ merged_sentences.append(buffer_sentence)
41
+ buffer_sentence = ""
42
+ merged_sentences.append(sentence)
43
+ else:
44
+ # 如果是零散的部分,将其添加到缓冲区
45
+ buffer_sentence += sentence
46
+
47
+ # 确保最后的缓冲区内容被添加
48
+ if buffer_sentence:
49
+ merged_sentences.append(buffer_sentence)
50
+
51
+ return merged_sentences
52
+
53
+ def is_only_punctuation(s):
54
+ """检查字符串是否只包含标点符号"""
55
+ # 此处列出中文、日文、英文常见标点符号
56
+ punctuation_pattern = re.compile(r'^[\s。*;,:“”()、!?《》\u3000\.,;:"\'?!()]+$')
57
+ return punctuation_pattern.match(s) is not None
58
+
59
+ def split_mixed_language(sentence):
60
+ # 分割混合语言句子
61
+ # 逐字符检查,分割不同语言部分
62
+ sub_sentences = []
63
+ current_language = None
64
+ current_part = ""
65
+
66
+ for char in sentence:
67
+ if re.match(r'[\u4e00-\u9fff]', char): # Chinese character
68
+ if current_language != 'chinese':
69
+ if current_part:
70
+ sub_sentences.append(current_part)
71
+ current_part = char
72
+ current_language = 'chinese'
73
+ else:
74
+ current_part += char
75
+ elif re.match(r'[\u3040-\u30ff\u31f0-\u31ff]', char): # Japanese character
76
+ if current_language != 'japanese':
77
+ if current_part:
78
+ sub_sentences.append(current_part)
79
+ current_part = char
80
+ current_language = 'japanese'
81
+ else:
82
+ current_part += char
83
+ elif re.match(r'[a-zA-Z]', char): # English character
84
+ if current_language != 'english':
85
+ if current_part:
86
+ sub_sentences.append(current_part)
87
+ current_part = char
88
+ current_language = 'english'
89
+ else:
90
+ current_part += char
91
+ else:
92
+ current_part += char # For punctuation and other characters
93
+
94
+ if current_part:
95
+ sub_sentences.append(current_part)
96
 
97
+ return sub_sentences
 
 
 
98
 
99
+ def replace_quotes(text):
100
+ # 替换中文、日文引号为英文引号
101
+ text = re.sub(r'[“”‘’『』「」()()]', '"', text)
102
+ return text
 
 
 
103
 
104
+ def remove_numeric_annotations(text):
105
+ # 定义用于匹配数字注释的正则表达式
106
+ # 包括 “”、【】和〔〕包裹的数字
107
+ pattern = r'“\d+”|【\d+】|〔\d+〕'
108
+ # 使用正则表达式替换掉这些注释
109
+ cleaned_text = re.sub(pattern, '', text)
110
+ return cleaned_text
111
+
112
+ def merge_adjacent_japanese(sentences):
113
+ """合并相邻且都只包含日语的句子"""
114
+ merged_sentences = []
115
+ i = 0
116
+ while i < len(sentences):
117
+ current_sentence = sentences[i]
118
+ if i + 1 < len(sentences) and is_japanese(current_sentence) and is_japanese(sentences[i + 1]):
119
+ # 当前句子和下一句都是日语,合并它们
120
+ while i + 1 < len(sentences) and is_japanese(sentences[i + 1]):
121
+ current_sentence += sentences[i + 1]
122
+ i += 1
123
+ merged_sentences.append(current_sentence)
124
+ i += 1
125
+ return merged_sentences
126
+
127
+ def extrac(text):
128
+ text = replace_quotes(remove_numeric_annotations(text)) # 替换引号
129
+ text = re.sub("<[^>]*>", "", text) # 移除 HTML 标签
130
+ # 使用换行符和标点符号进行初步分割
131
+ preliminary_sentences = re.split(r'([\n。;!?\.\?!])', text)
132
+ final_sentences = []
133
+
134
+ preliminary_sentences = re.split(r'([\n。;!?\.\?!])', text)
135
+
136
+ for piece in preliminary_sentences:
137
+ if is_single_language(piece):
138
+ final_sentences.append(piece)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  else:
140
+ sub_sentences = split_mixed_language(piece)
141
+ final_sentences.extend(sub_sentences)
142
+
143
+ # 处理长句子,使用jieba进行分词
144
+ split_sentences = []
145
+ for sentence in final_sentences:
146
+ split_sentences.extend(split_long_sentences(sentence))
147
+
148
+ # 合并相邻的日语句子
149
+ merged_japanese_sentences = merge_adjacent_japanese(split_sentences)
150
+
151
+ # 剔除只包含标点符号的元素
152
+ clean_sentences = [s for s in merged_japanese_sentences if not is_only_punctuation(s)]
153
+
154
+ # 移除空字符串并去除多余引号
155
+ return [s.replace('"','').strip() for s in clean_sentences if s]
156
+
157
+
158
+
159
+ # 移除空字符串
160
+
161
+ def is_mixed_language(sentence):
162
+ contains_chinese = re.search(r'[\u4e00-\u9fff]', sentence) is not None
163
+ contains_japanese = re.search(r'[\u3040-\u30ff\u31f0-\u31ff]', sentence) is not None
164
+ contains_english = re.search(r'[a-zA-Z]', sentence) is not None
165
+ languages_count = sum([contains_chinese, contains_japanese, contains_english])
166
+ return languages_count > 1
167
+
168
+ def split_mixed_language(sentence):
169
+ # 分割混合语言句子
170
+ sub_sentences = re.split(r'(?<=[。!?\.\?!])(?=")|(?<=")(?=[\u4e00-\u9fff\u3040-\u30ff\u31f0-\u31ff]|[a-zA-Z])', sentence)
171
+ return [s.strip() for s in sub_sentences if s.strip()]
172
+
173
+ def seconds_to_ass_time(seconds):
174
+ """将秒数转换为ASS时间格式"""
175
+ hours = int(seconds / 3600)
176
+ minutes = int((seconds % 3600) / 60)
177
+ seconds = int(seconds) % 60
178
+ milliseconds = int((seconds - int(seconds)) * 1000)
179
+ return "{:01d}:{:02d}:{:02d}.{:02d}".format(hours, minutes, seconds, int(milliseconds / 10))
180
+
181
+ def extract_text_from_epub(file_path):
182
+ book = epub.read_epub(file_path)
183
+ content = []
184
+ for item in book.items:
185
+ if isinstance(item, epub.EpubHtml):
186
+ soup = BeautifulSoup(item.content, 'html.parser')
187
+ content.append(soup.get_text())
188
+ return '\n'.join(content)
189
+
190
+ def extract_text_from_pdf(file_path):
191
+ with open(file_path, 'rb') as file:
192
+ reader = PdfReader(file)
193
+ content = [page.extract_text() for page in reader.pages]
194
+ return '\n'.join(content)
195
+
196
+ def remove_annotations(text):
197
+ # 移除方括号、尖括号和中文方括号中的内容
198
+ text = re.sub(r'\[.*?\]', '', text)
199
+ text = re.sub(r'\<.*?\>', '', text)
200
+ text = re.sub(r'&#8203;``【oaicite:1】``&#8203;', '', text)
201
+ return text
202
 
203
+ def extract_text_from_file(inputFile):
204
+ file_extension = os.path.splitext(inputFile)[1].lower()
205
+ if file_extension == ".epub":
206
+ return extract_text_from_epub(inputFile)
207
+ elif file_extension == ".pdf":
208
+ return extract_text_from_pdf(inputFile)
209
+ elif file_extension == ".txt":
210
+ with open(inputFile, 'r', encoding='utf-8') as f:
211
+ return f.read()
212
+ else:
213
+ raise ValueError(f"Unsupported file format: {file_extension}")
214
+
215
+ def split_by_punctuation(sentence):
216
+ """按照中文次级标点符号分割句子"""
217
+ # 常见的中文次级分隔符号:逗号、分号等
218
+ parts = re.split(r'([,,;;])', sentence)
219
+ # 将标点符号与前面的词语合并,避免单独标点符号成为一个部分
220
+ merged_parts = []
221
+ for part in parts:
222
+ if part and not part in ',,;;':
223
+ merged_parts.append(part)
224
+ elif merged_parts:
225
+ merged_parts[-1] += part
226
+ return merged_parts
227
+
228
+ def split_long_sentences(sentence, max_length=30):
229
+ """如果中文句子太长,先按标点分割,必要时使用jieba进行分词并分割"""
230
+ if len(sentence) > max_length and is_chinese(sentence):
231
+ # 首先尝试按照次级标点符号分割
232
+ preliminary_parts = split_by_punctuation(sentence)
233
+ new_sentences = []
234
+
235
+ for part in preliminary_parts:
236
+ # 如果部分仍然太长,使用jieba进行分词
237
+ if len(part) > max_length:
238
+ words = jieba.lcut(part)
239
+ current_sentence = ""
240
+ for word in words:
241
+ if len(current_sentence) + len(word) > max_length:
242
+ new_sentences.append(current_sentence)
243
+ current_sentence = word
244
+ else:
245
+ current_sentence += word
246
+ if current_sentence:
247
+ new_sentences.append(current_sentence)
248
+ else:
249
+ new_sentences.append(part)
250
+
251
+ return new_sentences
252
+ return [sentence] # 如果句子不长或不是中文,直接返回
253
+
254
+ def extract_and_convert(text):
255
+
256
+ # 使用正则表达式找出所有英文单词
257
+ english_parts = re.findall(r'\b[A-Za-z]+\b', text) # \b为单词边界标识
258
+
259
+ # 对每个英文单词进行片假名转换
260
+ kana_parts = ['\n{}\n'.format(romajitable.to_kana(word).katakana) for word in english_parts]
261
+
262
+ # 替换原文本中的英文部分
263
+ for eng, kana in zip(english_parts, kana_parts):
264
+ text = text.replace(eng, kana, 1) # 限制每次只替换一个实例
265
+
266
+ return text
267
 
268
  if __name__ == "__main__":
269
+ text = ",如“520”,【23】和〔83〕等。.我亲爱的读者,你也许在某一刻会遇上这样的情形,不禁对那著名哲学句子“那内在的就是那外在的,那外在的就是那内在的”“3”的正确性有了或多或少的怀疑。也许你自己就怀着某种秘密,对之你有着这样一种感觉:因为这秘密在它所具有的喜悦或者痛楚对你来说是太亲切了,以至于你不愿意让他人来和你共享它。也许你的生活使得你和一些人有所接触,对于他们你有着某种预感,隐约感觉到如此的某些事情是可能的,尽管你并不一定能够通过权力或者诱惑来揭示这隐秘。也许你感受到的这些情形并不对你和你的生活发生作用,然而你对这种怀疑却不陌生;它时而在你的思绪中像一种匆匆的形影飘忽而过。这样的一种怀疑来而又去,没有人知道它从哪里来或者它到什么地方去“4”。就我自己而言,我一直对哲学的这一点怀有一种异端的想法,并且因此也尽可能地习惯于自己去深思和考究;我从在这方面与我有同感的作家们那里听取了指导,简言之,我尽了我的努力来弥补那些哲学文本们所遗留下的匮乏。渐渐地,听觉对于我来说倒成了最亲密的感觉功能;因为,正如声音是那相对外在之物而言是无法比较的内在性的揭示,于是耳朵就是用来使这内在性得以被人领会的工具,而听觉就是用来获取这内在性的感觉功能的。每当我在我所见和所听之间发现一个矛盾时,我就觉得我的怀疑得到了强化,而我的观察愿望得到了放大。一个听忏悔的神父与忏悔者之间有窗格子隔开,这神父不看,他只是听。听着听着,他渐渐构想出一个与此相应的外在;这就是说,他不会进入矛盾。相反,在你同时看和听的时候则不同,你看着的是你和言述者之间的一道窗格子。就结果而言,我为在这方面进行观察而做出的努力是非常不同的。有时候我是幸运的,有时候则不,而想要在这些道路上赢得一些战利品,幸运总是一个必须被考虑进去的因素。然而我却从来没有失去继续进行我的调查研究的愿望。如果我真的在什么时候几乎对我的坚定感到了懊悔,那么一种意外幸运也就在这样的时候为我的努力进行了加冕。于是这就是一种意外的幸运,它以一种最奇怪的方式使得我拥有了这些文稿,因而我荣幸地在此向阅读着的关注者们展示这些文稿。在这些文稿中,我得到机会去审视进两个人的生活,这强化了我关于“那外在的不是那内在的”的怀疑。尤其是他们中的一个有着这样的情形。他的外在完全与他的内在相矛盾。而他们中另一个的情形在一定的程度上也是如此,只要他在一种较为无足轻重的外在之下隐藏起了一种更���意义重大的内在,那么他就是处在这样的矛盾中。也许,考虑到顺序,我最好还是先讲述一下,我是怎样获得这些文稿的。现在算来,差不多是在七年前,我在城里的一个旧货商家那里留意到一张文书写字柜“5”,一见之下,它就吸引了我的注意力。它不是出自现代的工艺,很陈旧,但它还是吸引住了我。要解说这一印象的依据,对于我来说是不可能的,但是大多数人在他们的生命中肯定也曾经历过类似的情形。我每天的路径使我经过那旧货商和他的柜桌,在任何一天经过那里时我都从不曾放过时机盯着它看。渐渐地,这个文书写字柜在我心中有了它的故事;看着它,对于我来说成了一种必然,到最后,即使是在我有必要走另一条路的时候,我也毫不犹豫地为它的缘故而绕一段远路。由于我总这样看它,它在我心中也渐渐唤醒一种想要拥有它的愿望。其实我完全能感觉到,这是一种奇怪的愿望,既然我并不需要这家具;对于我来说,买下它就是一种浪费。正如我们所知,愿望有着一种非常诡辩性的说服力。我去了那旧货商家,推说是询问一些别的东西,在我要离开的时候,我漫不经心地就那张文书写字柜问了一个非常低的价钱。我想着,那旧货商人可能会抬价。如果是那个价,那我就占了便宜。不管怎么说,我这样做不是为了钱的缘故,而是为了要在良心上说得过去。但没有成功,那旧货商人有着一种非同寻常的坚定。又是很长一段时间,我每天都去那里,然后以一种钟情着迷的目光看着这文书写字柜。你必须下决心,我寻思着,试想一下,如果它被卖掉了,那就太晚了;哪怕你终于又找到它,你也永远得不到对它的这种印象了。在我走进旧货商家的时候,我的心狂跳着。买下了它,付了钱。这是最后一次了,我想着,你这么浪费;对了,你买下它,这恰恰是一种幸运,因为你这么老是看着它,你就该想着你曾是多么浪费,以这个文书写字柜为起点,你生活中该有一个新的段落开始了。啊,愿望有着一种非常诡辩性的说服力,那些良好的意图总是现成地摆在那里。另外参看阿德勒尔(A.P.Adler)的《对黑格尔的客观逻辑的普及讲演》。“5”[文书写字柜(Secretair)] 法国式柜子,有着许多小的、有时是隐秘的抽屉用于保存文件,并且有一块垂直翻板可以拴出来并且当写字台用。"
270
+ #print("原文本:", text)
271
+ print("处理后的文本:", extrac(text))