HaiLua commited on
Commit
3f961a9
1 Parent(s): e70b91c

Upload 5 files

Browse files
Files changed (5) hide show
  1. config.py +88 -0
  2. gitignore.txt +382 -0
  3. hubert_base.pt +3 -0
  4. requirements.txt +46 -0
  5. vc_infer_pipeline.py +306 -0
config.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ########################硬件参数########################
2
+
3
+ # 填写cuda:x, cpu 或 mps, x指代第几张卡,只支持 N卡 / Apple Silicon 加速
4
+ device = "cuda:0"
5
+
6
+ # 9-10-20-30-40系显卡无脑True,不影响质量,>=20显卡开启有加速
7
+ is_half = True
8
+
9
+ # 默认0用上所有线程,写数字限制CPU资源使用
10
+ n_cpu = 0
11
+
12
+ ########################硬件参数########################
13
+
14
+
15
+ ##################下为参数处理逻辑,勿动##################
16
+
17
+ ########################命令行参数########################
18
+ import argparse
19
+
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument("--port", type=int, default=7865, help="Listen port")
22
+ parser.add_argument("--pycmd", type=str, default="python", help="Python command")
23
+ parser.add_argument("--colab", action="store_true", help="Launch in colab")
24
+ parser.add_argument(
25
+ "--noparallel", action="store_true", help="Disable parallel processing"
26
+ )
27
+ parser.add_argument(
28
+ "--noautoopen", action="store_true", help="Do not open in browser automatically"
29
+ )
30
+ cmd_opts, unknown = parser.parse_known_args()
31
+
32
+ python_cmd = cmd_opts.pycmd
33
+ listen_port = cmd_opts.port
34
+ iscolab = cmd_opts.colab
35
+ noparallel = cmd_opts.noparallel
36
+ noautoopen = cmd_opts.noautoopen
37
+ ########################命令行参数########################
38
+
39
+ import sys
40
+ import torch
41
+
42
+
43
+ # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
44
+ # check `getattr` and try it for compatibility
45
+ def has_mps() -> bool:
46
+ if sys.platform != "darwin":
47
+ return False
48
+ else:
49
+ if not getattr(torch, "has_mps", False):
50
+ return False
51
+ try:
52
+ torch.zeros(1).to(torch.device("mps"))
53
+ return True
54
+ except Exception:
55
+ return False
56
+
57
+
58
+ if not torch.cuda.is_available():
59
+ if has_mps():
60
+ print("没有发现支持的N卡, 使用MPS进行推理")
61
+ device = "mps"
62
+ else:
63
+ print("没有发现支持的N卡, 使用CPU进行推理")
64
+ device = "cpu"
65
+ is_half = False
66
+
67
+ if device not in ["cpu", "mps"]:
68
+ gpu_name = torch.cuda.get_device_name(int(device.split(":")[-1]))
69
+ if "16" in gpu_name or "MX" in gpu_name:
70
+ print("16系显卡/MX系显卡强制单精度")
71
+ is_half = False
72
+
73
+ from multiprocessing import cpu_count
74
+
75
+ if n_cpu == 0:
76
+ n_cpu = cpu_count()
77
+ if is_half:
78
+ # 6G显存配置
79
+ x_pad = 3
80
+ x_query = 10
81
+ x_center = 60
82
+ x_max = 65
83
+ else:
84
+ # 5G显存配置
85
+ x_pad = 1
86
+ x_query = 6
87
+ x_center = 38
88
+ x_max = 41
gitignore.txt ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Ignore Visual Studio temporary files, build results, and
2
+ ## files generated by popular Visual Studio add-ons.
3
+ ##
4
+ ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5
+
6
+ # User-specific files
7
+ *.rsuser
8
+ *.suo
9
+ *.user
10
+ *.userosscache
11
+ *.sln.docstates
12
+
13
+ # User-specific files (MonoDevelop/Xamarin Studio)
14
+ *.userprefs
15
+
16
+ # Mono auto generated files
17
+ mono_crash.*
18
+
19
+ # Build results
20
+ [Dd]ebug/
21
+ [Dd]ebugPublic/
22
+ [Rr]elease/
23
+ [Rr]eleases/
24
+ x64/
25
+ x86/
26
+ [Ww][Ii][Nn]32/
27
+ [Aa][Rr][Mm]/
28
+ [Aa][Rr][Mm]64/
29
+ bld/
30
+ [Bb]in/
31
+ [Oo]bj/
32
+ [Oo]ut/
33
+ [Ll]og/
34
+ [Ll]ogs/
35
+ infer_pack\__pycache__
36
+ # Visual Studio 2015/2017 cache/options directory
37
+ .vs/
38
+ # Uncomment if you have tasks that create the project's static files in wwwroot
39
+ #wwwroot/
40
+
41
+ # Visual Studio 2017 auto generated files
42
+ Generated\ Files/
43
+
44
+ # MSTest test Results
45
+ [Tt]est[Rr]esult*/
46
+ [Bb]uild[Ll]og.*
47
+
48
+ # NUnit
49
+ *.VisualState.xml
50
+ TestResult.xml
51
+ nunit-*.xml
52
+
53
+ # Build Results of an ATL Project
54
+ [Dd]ebugPS/
55
+ [Rr]eleasePS/
56
+ dlldata.c
57
+
58
+ # Benchmark Results
59
+ BenchmarkDotNet.Artifacts/
60
+
61
+ # .NET Core
62
+ project.lock.json
63
+ project.fragment.lock.json
64
+ artifacts/
65
+
66
+ # ASP.NET Scaffolding
67
+ ScaffoldingReadMe.txt
68
+
69
+ # StyleCop
70
+ StyleCopReport.xml
71
+
72
+ # Files built by Visual Studio
73
+ *_i.c
74
+ *_p.c
75
+ *_h.h
76
+ *.ilk
77
+ *.meta
78
+ *.obj
79
+ *.iobj
80
+ *.pch
81
+ *.pdb
82
+ *.ipdb
83
+ *.pgc
84
+ *.pgd
85
+ *.rsp
86
+ *.sbr
87
+ *.tlb
88
+ *.tli
89
+ *.tlh
90
+ *.tmp
91
+ *.tmp_proj
92
+ *_wpftmp.csproj
93
+ *.log
94
+ *.vspscc
95
+ *.vssscc
96
+ .builds
97
+ *.pidb
98
+ *.svclog
99
+ *.scc
100
+
101
+ # Chutzpah Test files
102
+ _Chutzpah*
103
+
104
+ # Visual C++ cache files
105
+ ipch/
106
+ *.aps
107
+ *.ncb
108
+ *.opendb
109
+ *.opensdf
110
+ *.sdf
111
+ *.cachefile
112
+ *.VC.db
113
+ *.VC.VC.opendb
114
+
115
+ # Visual Studio profiler
116
+ *.psess
117
+ *.vsp
118
+ *.vspx
119
+ *.sap
120
+
121
+ # Visual Studio Trace Files
122
+ *.e2e
123
+
124
+ # TFS 2012 Local Workspace
125
+ $tf/
126
+
127
+ # Guidance Automation Toolkit
128
+ *.gpState
129
+
130
+ # ReSharper is a .NET coding add-in
131
+ _ReSharper*/
132
+ *.[Rr]e[Ss]harper
133
+ *.DotSettings.user
134
+
135
+ # TeamCity is a build add-in
136
+ _TeamCity*
137
+
138
+ # DotCover is a Code Coverage Tool
139
+ *.dotCover
140
+
141
+ # AxoCover is a Code Coverage Tool
142
+ .axoCover/*
143
+ !.axoCover/settings.json
144
+
145
+ # Coverlet is a free, cross platform Code Coverage Tool
146
+ coverage*.json
147
+ coverage*.xml
148
+ coverage*.info
149
+
150
+ # Visual Studio code coverage results
151
+ *.coverage
152
+ *.coveragexml
153
+
154
+ # NCrunch
155
+ _NCrunch_*
156
+ .*crunch*.local.xml
157
+ nCrunchTemp_*
158
+
159
+ # MightyMoose
160
+ *.mm.*
161
+ AutoTest.Net/
162
+
163
+ # Web workbench (sass)
164
+ .sass-cache/
165
+
166
+ # Installshield output folder
167
+ [Ee]xpress/
168
+
169
+ # DocProject is a documentation generator add-in
170
+ DocProject/buildhelp/
171
+ DocProject/Help/*.HxT
172
+ DocProject/Help/*.HxC
173
+ DocProject/Help/*.hhc
174
+ DocProject/Help/*.hhk
175
+ DocProject/Help/*.hhp
176
+ DocProject/Help/Html2
177
+ DocProject/Help/html
178
+
179
+ # Click-Once directory
180
+ publish/
181
+
182
+ # Publish Web Output
183
+ *.[Pp]ublish.xml
184
+ *.azurePubxml
185
+ # Note: Comment the next line if you want to checkin your web deploy settings,
186
+ # but database connection strings (with potential passwords) will be unencrypted
187
+ *.pubxml
188
+ *.publishproj
189
+
190
+ # Microsoft Azure Web App publish settings. Comment the next line if you want to
191
+ # checkin your Azure Web App publish settings, but sensitive information contained
192
+ # in these scripts will be unencrypted
193
+ PublishScripts/
194
+
195
+ # NuGet Packages
196
+ *.nupkg
197
+ # NuGet Symbol Packages
198
+ *.snupkg
199
+ # The packages folder can be ignored because of Package Restore
200
+ **/[Pp]ackages/*
201
+ # except build/, which is used as an MSBuild target.
202
+ !**/[Pp]ackages/build/
203
+ # Uncomment if necessary however generally it will be regenerated when needed
204
+ #!**/[Pp]ackages/repositories.config
205
+ # NuGet v3's project.json files produces more ignorable files
206
+ *.nuget.props
207
+ *.nuget.targets
208
+
209
+ # Microsoft Azure Build Output
210
+ csx/
211
+ *.build.csdef
212
+
213
+ # Microsoft Azure Emulator
214
+ ecf/
215
+ rcf/
216
+
217
+ # Windows Store app package directories and files
218
+ AppPackages/
219
+ BundleArtifacts/
220
+ Package.StoreAssociation.xml
221
+ _pkginfo.txt
222
+ *.appx
223
+ *.appxbundle
224
+ *.appxupload
225
+
226
+ # Visual Studio cache files
227
+ # files ending in .cache can be ignored
228
+ *.[Cc]ache
229
+ # but keep track of directories ending in .cache
230
+ !?*.[Cc]ache/
231
+
232
+ # Others
233
+ ClientBin/
234
+ ~$*
235
+ *~
236
+ *.dbmdl
237
+ *.dbproj.schemaview
238
+ *.jfm
239
+ *.pfx
240
+ *.publishsettings
241
+ orleans.codegen.cs
242
+
243
+ # Including strong name files can present a security risk
244
+ # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245
+ #*.snk
246
+
247
+ # Since there are multiple workflows, uncomment next line to ignore bower_components
248
+ # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249
+ #bower_components/
250
+
251
+ # RIA/Silverlight projects
252
+ Generated_Code/
253
+
254
+ # Backup & report files from converting an old project file
255
+ # to a newer Visual Studio version. Backup files are not needed,
256
+ # because we have git ;-)
257
+ _UpgradeReport_Files/
258
+ Backup*/
259
+ UpgradeLog*.XML
260
+ UpgradeLog*.htm
261
+ ServiceFabricBackup/
262
+ *.rptproj.bak
263
+
264
+ # SQL Server files
265
+ *.mdf
266
+ *.ldf
267
+ *.ndf
268
+
269
+ # Business Intelligence projects
270
+ *.rdl.data
271
+ *.bim.layout
272
+ *.bim_*.settings
273
+ *.rptproj.rsuser
274
+ *- [Bb]ackup.rdl
275
+ *- [Bb]ackup ([0-9]).rdl
276
+ *- [Bb]ackup ([0-9][0-9]).rdl
277
+
278
+ # Microsoft Fakes
279
+ FakesAssemblies/
280
+
281
+ # GhostDoc plugin setting file
282
+ *.GhostDoc.xml
283
+
284
+ # Node.js Tools for Visual Studio
285
+ .ntvs_analysis.dat
286
+ node_modules/
287
+
288
+ # Visual Studio 6 build log
289
+ *.plg
290
+
291
+ # Visual Studio 6 workspace options file
292
+ *.opt
293
+
294
+ # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295
+ *.vbw
296
+
297
+ # Visual Studio LightSwitch build output
298
+ **/*.HTMLClient/GeneratedArtifacts
299
+ **/*.DesktopClient/GeneratedArtifacts
300
+ **/*.DesktopClient/ModelManifest.xml
301
+ **/*.Server/GeneratedArtifacts
302
+ **/*.Server/ModelManifest.xml
303
+ _Pvt_Extensions
304
+
305
+ # Paket dependency manager
306
+ .paket/paket.exe
307
+ paket-files/
308
+
309
+ # FAKE - F# Make
310
+ .fake/
311
+
312
+ # CodeRush personal settings
313
+ .cr/personal
314
+
315
+ # Python Tools for Visual Studio (PTVS)
316
+ __pycache__/
317
+
318
+
319
+ # Cake - Uncomment if you are using it
320
+ # tools/**
321
+ # !tools/packages.config
322
+
323
+ # Tabs Studio
324
+ *.tss
325
+
326
+ # Telerik's JustMock configuration file
327
+ *.jmconfig
328
+
329
+ # BizTalk build output
330
+ *.btp.cs
331
+ *.btm.cs
332
+ *.odx.cs
333
+ *.xsd.cs
334
+
335
+ # OpenCover UI analysis results
336
+ OpenCover/
337
+
338
+ # Azure Stream Analytics local run output
339
+ ASALocalRun/
340
+
341
+ # MSBuild Binary and Structured Log
342
+ *.binlog
343
+
344
+ # NVidia Nsight GPU debugger configuration file
345
+ *.nvuser
346
+
347
+ # MFractors (Xamarin productivity tool) working folder
348
+ .mfractor/
349
+
350
+ # Local History for Visual Studio
351
+ .localhistory/
352
+
353
+ # BeatPulse healthcheck temp database
354
+ healthchecksdb
355
+
356
+ # Backup folder for Package Reference Convert tool in Visual Studio 2017
357
+ MigrationBackup/
358
+
359
+ # Ionide (cross platform F# VS Code tools) working folder
360
+ .ionide/
361
+
362
+ # Fody - auto-generated XML schema
363
+ FodyWeavers.xsd
364
+
365
+ # build
366
+ build
367
+ monotonic_align/core.c
368
+ *.o
369
+ *.so
370
+ *.dll
371
+
372
+ # data
373
+ /config.json
374
+ /*.pth
375
+ *.wav
376
+ /monotonic_align/monotonic_align
377
+ /resources
378
+ /MoeGoe.spec
379
+ /dist/MoeGoe
380
+ /dist
381
+
382
+ .idea
hubert_base.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54b40fd2802423a5643779c4861af1e9ee9c1564dc9d32f54f20b5ffba7db96
3
+ size 189507909
requirements.txt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numba==0.56.4
2
+ numpy==1.23.5
3
+ scipy==1.9.3
4
+ librosa==0.9.2
5
+ llvmlite==0.39.0
6
+ fairseq==0.12.2
7
+ faiss-cpu==1.7.0; sys_platform == "darwin"
8
+ faiss-cpu==1.7.2; sys_platform != "darwin"
9
+ gradio
10
+ Cython
11
+ future>=0.18.3
12
+ pydub>=0.25.1
13
+ soundfile>=0.12.1
14
+ ffmpeg-python>=0.2.0
15
+ tensorboardX
16
+ functorch>=2.0.0
17
+ Jinja2>=3.1.2
18
+ json5>=0.9.11
19
+ Markdown
20
+ matplotlib>=3.7.1
21
+ matplotlib-inline>=0.1.6
22
+ praat-parselmouth>=0.4.3
23
+ Pillow>=9.1.1
24
+ pyworld>=0.3.2
25
+ resampy>=0.4.2
26
+ scikit-learn>=1.2.2
27
+ starlette>=0.26.1
28
+ tensorboard
29
+ tensorboard-data-server
30
+ tensorboard-plugin-wit
31
+ torchgen>=0.0.1
32
+ tqdm>=4.65.0
33
+ tornado>=6.2
34
+ Werkzeug>=2.2.3
35
+ uc-micro-py>=1.0.1
36
+ sympy>=1.11.1
37
+ tabulate>=0.9.0
38
+ PyYAML>=6.0
39
+ pyasn1>=0.4.8
40
+ pyasn1-modules>=0.2.8
41
+ fsspec>=2023.3.0
42
+ absl-py>=1.4.0
43
+ audioread
44
+ uvicorn>=0.21.1
45
+ colorama>=0.4.6
46
+ edge-tts
vc_infer_pipeline.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np, parselmouth, torch, pdb
2
+ from time import time as ttime
3
+ import torch.nn.functional as F
4
+ from config import x_pad, x_query, x_center, x_max
5
+ import scipy.signal as signal
6
+ import pyworld, os, traceback, faiss
7
+ from scipy import signal
8
+
9
+ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
10
+
11
+
12
+ class VC(object):
13
+ def __init__(self, tgt_sr, device, is_half):
14
+ self.sr = 16000 # hubert输入采样率
15
+ self.window = 160 # 每帧点数
16
+ self.t_pad = self.sr * x_pad # 每条前后pad时间
17
+ self.t_pad_tgt = tgt_sr * x_pad
18
+ self.t_pad2 = self.t_pad * 2
19
+ self.t_query = self.sr * x_query # 查询切点前后查询时间
20
+ self.t_center = self.sr * x_center # 查询切点位置
21
+ self.t_max = self.sr * x_max # 免查询时长阈值
22
+ self.device = device
23
+ self.is_half = is_half
24
+
25
+ def get_f0(self, x, p_len, f0_up_key, f0_method, inp_f0=None):
26
+ time_step = self.window / self.sr * 1000
27
+ f0_min = 50
28
+ f0_max = 1100
29
+ f0_mel_min = 1127 * np.log(1 + f0_min / 700)
30
+ f0_mel_max = 1127 * np.log(1 + f0_max / 700)
31
+ if f0_method == "pm":
32
+ f0 = (
33
+ parselmouth.Sound(x, self.sr)
34
+ .to_pitch_ac(
35
+ time_step=time_step / 1000,
36
+ voicing_threshold=0.6,
37
+ pitch_floor=f0_min,
38
+ pitch_ceiling=f0_max,
39
+ )
40
+ .selected_array["frequency"]
41
+ )
42
+ pad_size = (p_len - len(f0) + 1) // 2
43
+ if pad_size > 0 or p_len - len(f0) - pad_size > 0:
44
+ f0 = np.pad(
45
+ f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
46
+ )
47
+ elif f0_method == "harvest":
48
+ f0, t = pyworld.harvest(
49
+ x.astype(np.double),
50
+ fs=self.sr,
51
+ f0_ceil=f0_max,
52
+ f0_floor=f0_min,
53
+ frame_period=10,
54
+ )
55
+ f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
56
+ f0 = signal.medfilt(f0, 3)
57
+ f0 *= pow(2, f0_up_key / 12)
58
+ # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
59
+ tf0 = self.sr // self.window # 每秒f0点数
60
+ if inp_f0 is not None:
61
+ delta_t = np.round(
62
+ (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
63
+ ).astype("int16")
64
+ replace_f0 = np.interp(
65
+ list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
66
+ )
67
+ shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0]
68
+ f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
69
+ # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
70
+ f0bak = f0.copy()
71
+ f0_mel = 1127 * np.log(1 + f0 / 700)
72
+ f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
73
+ f0_mel_max - f0_mel_min
74
+ ) + 1
75
+ f0_mel[f0_mel <= 1] = 1
76
+ f0_mel[f0_mel > 255] = 255
77
+ f0_coarse = np.rint(f0_mel).astype(np.int)
78
+ return f0_coarse, f0bak # 1-0
79
+
80
+ def vc(
81
+ self,
82
+ model,
83
+ net_g,
84
+ sid,
85
+ audio0,
86
+ pitch,
87
+ pitchf,
88
+ times,
89
+ index,
90
+ big_npy,
91
+ index_rate,
92
+ ): # ,file_index,file_big_npy
93
+ feats = torch.from_numpy(audio0)
94
+ if self.is_half:
95
+ feats = feats.half()
96
+ else:
97
+ feats = feats.float()
98
+ if feats.dim() == 2: # double channels
99
+ feats = feats.mean(-1)
100
+ assert feats.dim() == 1, feats.dim()
101
+ feats = feats.view(1, -1)
102
+ padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
103
+
104
+ inputs = {
105
+ "source": feats.to(self.device),
106
+ "padding_mask": padding_mask,
107
+ "output_layer": 9, # layer 9
108
+ }
109
+ t0 = ttime()
110
+ with torch.no_grad():
111
+ logits = model.extract_features(**inputs)
112
+ feats = model.final_proj(logits[0])
113
+
114
+ if (
115
+ isinstance(index, type(None)) == False
116
+ and isinstance(big_npy, type(None)) == False
117
+ and index_rate != 0
118
+ ):
119
+ npy = feats[0].cpu().numpy()
120
+ if self.is_half:
121
+ npy = npy.astype("float32")
122
+ _, I = index.search(npy, 1)
123
+ npy = big_npy[I.squeeze()]
124
+ if self.is_half:
125
+ npy = npy.astype("float16")
126
+ feats = (
127
+ torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
128
+ + (1 - index_rate) * feats
129
+ )
130
+
131
+ feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
132
+ t1 = ttime()
133
+ p_len = audio0.shape[0] // self.window
134
+ if feats.shape[1] < p_len:
135
+ p_len = feats.shape[1]
136
+ if pitch != None and pitchf != None:
137
+ pitch = pitch[:, :p_len]
138
+ pitchf = pitchf[:, :p_len]
139
+ p_len = torch.tensor([p_len], device=self.device).long()
140
+ with torch.no_grad():
141
+ if pitch != None and pitchf != None:
142
+ audio1 = (
143
+ (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
144
+ .data.cpu()
145
+ .float()
146
+ .numpy()
147
+ .astype(np.int16)
148
+ )
149
+ else:
150
+ audio1 = (
151
+ (net_g.infer(feats, p_len, sid)[0][0, 0] * 32768)
152
+ .data.cpu()
153
+ .float()
154
+ .numpy()
155
+ .astype(np.int16)
156
+ )
157
+ del feats, p_len, padding_mask
158
+ if torch.cuda.is_available():
159
+ torch.cuda.empty_cache()
160
+ t2 = ttime()
161
+ times[0] += t1 - t0
162
+ times[2] += t2 - t1
163
+ return audio1
164
+
165
+ def pipeline(
166
+ self,
167
+ model,
168
+ net_g,
169
+ sid,
170
+ audio,
171
+ times,
172
+ f0_up_key,
173
+ f0_method,
174
+ file_index,
175
+ file_big_npy,
176
+ index_rate,
177
+ if_f0,
178
+ f0_file=None,
179
+ ):
180
+ if (
181
+ file_big_npy != ""
182
+ and file_index != ""
183
+ and os.path.exists(file_big_npy) == True
184
+ and os.path.exists(file_index) == True
185
+ and index_rate != 0
186
+ ):
187
+ try:
188
+ index = faiss.read_index(file_index)
189
+ big_npy = np.load(file_big_npy)
190
+ except:
191
+ traceback.print_exc()
192
+ index = big_npy = None
193
+ else:
194
+ index = big_npy = None
195
+ print("Feature retrieval library doesn't exist or ratio is 0")
196
+ audio = signal.filtfilt(bh, ah, audio)
197
+ audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
198
+ opt_ts = []
199
+ if audio_pad.shape[0] > self.t_max:
200
+ audio_sum = np.zeros_like(audio)
201
+ for i in range(self.window):
202
+ audio_sum += audio_pad[i : i - self.window]
203
+ for t in range(self.t_center, audio.shape[0], self.t_center):
204
+ opt_ts.append(
205
+ t
206
+ - self.t_query
207
+ + np.where(
208
+ np.abs(audio_sum[t - self.t_query : t + self.t_query])
209
+ == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
210
+ )[0][0]
211
+ )
212
+ s = 0
213
+ audio_opt = []
214
+ t = None
215
+ t1 = ttime()
216
+ audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
217
+ p_len = audio_pad.shape[0] // self.window
218
+ inp_f0 = None
219
+ if hasattr(f0_file, "name") == True:
220
+ try:
221
+ with open(f0_file.name, "r") as f:
222
+ lines = f.read().strip("\n").split("\n")
223
+ inp_f0 = []
224
+ for line in lines:
225
+ inp_f0.append([float(i) for i in line.split(",")])
226
+ inp_f0 = np.array(inp_f0, dtype="float32")
227
+ except:
228
+ traceback.print_exc()
229
+ sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
230
+ pitch, pitchf = None, None
231
+ if if_f0 == 1:
232
+ pitch, pitchf = self.get_f0(audio_pad, p_len, f0_up_key, f0_method, inp_f0)
233
+ pitch = pitch[:p_len]
234
+ pitchf = pitchf[:p_len]
235
+ pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
236
+ pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
237
+ t2 = ttime()
238
+ times[1] += t2 - t1
239
+ for t in opt_ts:
240
+ t = t // self.window * self.window
241
+ if if_f0 == 1:
242
+ audio_opt.append(
243
+ self.vc(
244
+ model,
245
+ net_g,
246
+ sid,
247
+ audio_pad[s : t + self.t_pad2 + self.window],
248
+ pitch[:, s // self.window : (t + self.t_pad2) // self.window],
249
+ pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
250
+ times,
251
+ index,
252
+ big_npy,
253
+ index_rate,
254
+ )[self.t_pad_tgt : -self.t_pad_tgt]
255
+ )
256
+ else:
257
+ audio_opt.append(
258
+ self.vc(
259
+ model,
260
+ net_g,
261
+ sid,
262
+ audio_pad[s : t + self.t_pad2 + self.window],
263
+ None,
264
+ None,
265
+ times,
266
+ index,
267
+ big_npy,
268
+ index_rate,
269
+ )[self.t_pad_tgt : -self.t_pad_tgt]
270
+ )
271
+ s = t
272
+ if if_f0 == 1:
273
+ audio_opt.append(
274
+ self.vc(
275
+ model,
276
+ net_g,
277
+ sid,
278
+ audio_pad[t:],
279
+ pitch[:, t // self.window :] if t is not None else pitch,
280
+ pitchf[:, t // self.window :] if t is not None else pitchf,
281
+ times,
282
+ index,
283
+ big_npy,
284
+ index_rate,
285
+ )[self.t_pad_tgt : -self.t_pad_tgt]
286
+ )
287
+ else:
288
+ audio_opt.append(
289
+ self.vc(
290
+ model,
291
+ net_g,
292
+ sid,
293
+ audio_pad[t:],
294
+ None,
295
+ None,
296
+ times,
297
+ index,
298
+ big_npy,
299
+ index_rate,
300
+ )[self.t_pad_tgt : -self.t_pad_tgt]
301
+ )
302
+ audio_opt = np.concatenate(audio_opt)
303
+ del pitch, pitchf, sid
304
+ if torch.cuda.is_available():
305
+ torch.cuda.empty_cache()
306
+ return audio_opt