NeoPy commited on
Commit
0a6f6ac
·
verified ·
1 Parent(s): b74f750

Update infer/lib/predictors/Generator.py

Browse files
Files changed (1) hide show
  1. infer/lib/predictors/Generator.py +14 -202
infer/lib/predictors/Generator.py CHANGED
@@ -13,10 +13,10 @@ from librosa import yin, pyin, piptrack
13
 
14
  sys.path.append(os.getcwd())
15
 
16
- from main.library.predictors.CREPE.filter import mean, median
17
- from main.library.predictors.WORLD.SWIPE import swipe, stonemask
18
- from main.app.variables import config, configs, logger, translations
19
- from main.library.utils import autotune_f0, proposal_f0_up_key, circular_write
20
 
21
  @nb.jit(nopython=True)
22
  def post_process(
@@ -338,26 +338,7 @@ class Generator:
338
  mode=f0_method,
339
  filter_radius=filter_radius
340
  )
341
- elif "swipe" in f0_method:
342
- f0 = self.get_f0_swipe(
343
- x,
344
- p_len,
345
- filter_radius=filter_radius,
346
- use_stonemask="stonemask" in f0_method
347
- )
348
- elif "penn" in f0_method:
349
- f0 = (
350
- self.get_f0_mangio_penn(
351
- x,
352
- p_len
353
- )
354
- ) if f0_method.split("-")[0] == "mangio" else (
355
- self.get_f0_penn(
356
- x,
357
- p_len,
358
- filter_radius=filter_radius
359
- )
360
- )
361
  elif "djcm" in f0_method:
362
  f0 = self.get_f0_djcm(
363
  x,
@@ -366,17 +347,7 @@ class Generator:
366
  svs="svs" in f0_method,
367
  filter_radius=filter_radius
368
  )
369
- elif "pesto" in f0_method:
370
- f0 = self.get_f0_pesto(
371
- x,
372
- p_len
373
- )
374
- elif "swift" in f0_method:
375
- f0 = self.get_f0_swift(
376
- x,
377
- p_len,
378
- filter_radius=filter_radius
379
- )
380
  else:
381
  raise ValueError(translations["option_not_valid"])
382
 
@@ -470,7 +441,7 @@ class Generator:
470
 
471
  def get_f0_mangio_crepe(self, x, p_len, model="full"):
472
  if not hasattr(self, "mangio_crepe"):
473
- from main.library.predictors.CREPE.CREPE import CREPE
474
 
475
  self.mangio_crepe = CREPE(
476
  os.path.join(
@@ -502,7 +473,7 @@ class Generator:
502
 
503
  def get_f0_crepe(self, x, p_len, model="full", filter_radius=3):
504
  if not hasattr(self, "crepe"):
505
- from main.library.predictors.CREPE.CREPE import CREPE
506
 
507
  self.crepe = CREPE(
508
  os.path.join(
@@ -531,7 +502,7 @@ class Generator:
531
 
532
  def get_f0_fcpe(self, x, p_len, legacy=False, previous=False, filter_radius=3):
533
  if not hasattr(self, "fcpe"):
534
- from main.library.predictors.FCPE.FCPE import FCPE
535
 
536
  self.fcpe = FCPE(
537
  configs,
@@ -566,7 +537,7 @@ class Generator:
566
 
567
  def get_f0_rmvpe(self, x, p_len, clipping=False, filter_radius=3, hpa=False, previous=False):
568
  if not hasattr(self, "rmvpe"):
569
- from main.library.predictors.RMVPE.RMVPE import RMVPE
570
 
571
  self.rmvpe = RMVPE(
572
  os.path.join(
@@ -605,60 +576,6 @@ class Generator:
605
  if self.predictor_onnx and self.delete_predictor_onnx: del self.rmvpe.model, self.rmvpe
606
  return self._resize_f0(f0, p_len)
607
 
608
- def get_f0_pyworld(self, x, p_len, filter_radius, model="harvest", use_stonemask=True):
609
- if not hasattr(self, "pw"):
610
- from main.library.predictors.WORLD.WORLD import PYWORLD
611
-
612
- self.pw = PYWORLD(
613
- os.path.join(configs["predictors_path"], "world"),
614
- os.path.join(configs["binary_path"], "world.bin")
615
- )
616
-
617
- x = x.astype(np.double)
618
- pw_fn = self.pw.harvest if model == "harvest" else self.pw.dio
619
-
620
- f0, t = pw_fn(
621
- x,
622
- fs=self.sample_rate,
623
- f0_ceil=self.f0_max,
624
- f0_floor=self.f0_min,
625
- frame_period=1000 * self.window / self.sample_rate
626
- )
627
-
628
- if use_stonemask:
629
- f0 = self.pw.stonemask(
630
- x,
631
- self.sample_rate,
632
- t,
633
- f0
634
- )
635
-
636
- if filter_radius > 2 and model == "harvest": f0 = medfilt(f0, filter_radius)
637
- elif model == "dio":
638
- for index, pitch in enumerate(f0):
639
- f0[index] = round(pitch, 1)
640
-
641
- return self._resize_f0(f0, p_len)
642
-
643
- def get_f0_swipe(self, x, p_len, filter_radius=3, use_stonemask=True):
644
- f0, t = swipe(
645
- x.astype(np.float32),
646
- self.sample_rate,
647
- f0_floor=self.f0_min,
648
- f0_ceil=self.f0_max,
649
- frame_period=1000 * self.window / self.sample_rate,
650
- sTHR=filter_radius / 10
651
- )
652
-
653
- if use_stonemask:
654
- f0 = stonemask(
655
- x,
656
- self.sample_rate,
657
- t,
658
- f0
659
- )
660
-
661
- return self._resize_f0(f0, p_len)
662
 
663
  def get_f0_librosa(self, x, p_len, mode="yin", filter_radius=3):
664
  if mode != "piptrack":
@@ -689,70 +606,8 @@ class Generator:
689
 
690
  return self._resize_f0(f0, p_len)
691
 
692
- def get_f0_penn(self, x, p_len, filter_radius=3):
693
- if not hasattr(self, "penn"):
694
- from main.library.predictors.PENN.PENN import PENN
695
-
696
- self.penn = PENN(
697
- os.path.join(
698
- configs["predictors_path"],
699
- f"fcn.{'onnx' if self.predictor_onnx else 'pt'}"
700
- ),
701
- hop_length=self.window // 2,
702
- batch_size=self.batch_size // 2,
703
- f0_min=self.f0_min,
704
- f0_max=self.f0_max,
705
- sample_rate=self.sample_rate,
706
- device=self.device,
707
- providers=self.providers,
708
- onnx=self.predictor_onnx,
709
- )
710
-
711
- f0, pd = self.penn.compute_f0(torch.tensor(np.copy((x)))[None].float())
712
-
713
- if self.predictor_onnx and self.delete_predictor_onnx:
714
- del self.penn.model, self.penn.decoder
715
- del self.penn.resample_audio, self.penn
716
-
717
- f0, pd = mean(f0, filter_radius), median(pd, filter_radius)
718
- f0[pd < 0.1] = 0
719
-
720
- return self._resize_f0(f0[0].cpu().numpy(), p_len)
721
-
722
- def get_f0_mangio_penn(self, x, p_len):
723
- if not hasattr(self, "mangio_penn"):
724
- from main.library.predictors.PENN.PENN import PENN
725
-
726
- self.mangio_penn = PENN(
727
- os.path.join(
728
- configs["predictors_path"],
729
- f"fcn.{'onnx' if self.predictor_onnx else 'pt'}"
730
- ),
731
- hop_length=self.hop_length // 2,
732
- batch_size=self.hop_length,
733
- f0_min=self.f0_min,
734
- f0_max=self.f0_max,
735
- sample_rate=self.sample_rate,
736
- device=self.device,
737
- providers=self.providers,
738
- onnx=self.predictor_onnx,
739
- interp_unvoiced_at=0.1
740
- )
741
-
742
- x = x.astype(np.float32)
743
- x /= np.quantile(np.abs(x), 0.999)
744
-
745
- audio = torch.from_numpy(x).to(self.device, copy=True).unsqueeze(dim=0)
746
- if audio.ndim == 2 and audio.shape[0] > 1: audio = audio.mean(dim=0, keepdim=True).detach()
747
-
748
- f0 = self.mangio_penn.compute_f0(audio.detach())
749
-
750
- if self.predictor_onnx and self.delete_predictor_onnx:
751
- del self.mangio_penn.model, self.mangio_penn.decoder
752
- del self.mangio_penn.resample_audio, self.mangio_penn
753
-
754
- return self._resize_f0(f0.squeeze(0).cpu().float().numpy(), p_len)
755
-
756
  def get_f0_djcm(self, x, p_len, clipping=False, svs=False, filter_radius=3):
757
  if not hasattr(self, "djcm"):
758
  from main.library.predictors.DJCM.DJCM import DJCM
@@ -792,48 +647,5 @@ class Generator:
792
  if self.predictor_onnx and self.delete_predictor_onnx: del self.djcm.model, self.djcm
793
  return self._resize_f0(f0, p_len)
794
 
795
- def get_f0_swift(self, x, p_len, filter_radius=3):
796
- if not hasattr(self, "swift"):
797
- from main.library.predictors.SWIFT.SWIFT import SWIFT
798
-
799
- self.swift = SWIFT(
800
- os.path.join(
801
- configs["predictors_path"],
802
- "swift.onnx"
803
- ),
804
- fmin=self.f0_min,
805
- fmax=self.f0_max,
806
- confidence_threshold=filter_radius / 4 + 0.137
807
- )
808
-
809
- pitch_hz, _, _ = self.swift.detect_from_array(x, self.sample_rate)
810
- return self._resize_f0(pitch_hz, p_len)
811
-
812
- def get_f0_pesto(self, x, p_len):
813
- if not hasattr(self, "pesto"):
814
- from main.library.predictors.PESTO.PESTO import PESTO
815
-
816
- self.pesto = PESTO(
817
- os.path.join(
818
- configs["predictors_path"],
819
- f"pesto.{'onnx' if self.predictor_onnx else 'pt'}"
820
- ),
821
- step_size=1000 * self.window / self.sample_rate,
822
- reduction = "alwa",
823
- num_chunks=1,
824
- sample_rate=self.sample_rate,
825
- device=self.device,
826
- providers=self.providers,
827
- onnx=self.predictor_onnx
828
- )
829
-
830
- x = x.astype(np.float32)
831
- x /= np.quantile(np.abs(x), 0.999)
832
-
833
- audio = torch.from_numpy(x).to(self.device, copy=True).unsqueeze(dim=0)
834
- if audio.ndim == 2 and audio.shape[0] > 1: audio = audio.mean(dim=0, keepdim=True).detach()
835
-
836
- f0 = self.pesto.compute_f0(audio.detach())[0]
837
- if self.predictor_onnx and self.delete_predictor_onnx: del self.pesto.model, self.pesto
838
-
839
- return self._resize_f0(f0.squeeze(0).cpu().float().numpy(), p_len)
 
13
 
14
  sys.path.append(os.getcwd())
15
 
16
+ from infer.lib.predictors.CREPE.filter import mean, median
17
+ from infer.lib.predictors.WORLD.SWIPE import swipe, stonemask
18
+ from infer.lib.variables import config, configs, logger, translations
19
+ from infer.lib.utils import autotune_f0, proposal_f0_up_key, circular_write
20
 
21
  @nb.jit(nopython=True)
22
  def post_process(
 
338
  mode=f0_method,
339
  filter_radius=filter_radius
340
  )
341
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  elif "djcm" in f0_method:
343
  f0 = self.get_f0_djcm(
344
  x,
 
347
  svs="svs" in f0_method,
348
  filter_radius=filter_radius
349
  )
350
+
 
 
 
 
 
 
 
 
 
 
351
  else:
352
  raise ValueError(translations["option_not_valid"])
353
 
 
441
 
442
  def get_f0_mangio_crepe(self, x, p_len, model="full"):
443
  if not hasattr(self, "mangio_crepe"):
444
+ from infer.lib.predictors.CREPE.CREPE import CREPE
445
 
446
  self.mangio_crepe = CREPE(
447
  os.path.join(
 
473
 
474
  def get_f0_crepe(self, x, p_len, model="full", filter_radius=3):
475
  if not hasattr(self, "crepe"):
476
+ from infer.lib.predictors.CREPE.CREPE import CREPE
477
 
478
  self.crepe = CREPE(
479
  os.path.join(
 
502
 
503
  def get_f0_fcpe(self, x, p_len, legacy=False, previous=False, filter_radius=3):
504
  if not hasattr(self, "fcpe"):
505
+ from infer.lib.predictors.FCPE.FCPE import FCPE
506
 
507
  self.fcpe = FCPE(
508
  configs,
 
537
 
538
  def get_f0_rmvpe(self, x, p_len, clipping=False, filter_radius=3, hpa=False, previous=False):
539
  if not hasattr(self, "rmvpe"):
540
+ from infer.lib.predictors.RMVPE.RMVPE import RMVPE
541
 
542
  self.rmvpe = RMVPE(
543
  os.path.join(
 
576
  if self.predictor_onnx and self.delete_predictor_onnx: del self.rmvpe.model, self.rmvpe
577
  return self._resize_f0(f0, p_len)
578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  def get_f0_librosa(self, x, p_len, mode="yin", filter_radius=3):
581
  if mode != "piptrack":
 
606
 
607
  return self._resize_f0(f0, p_len)
608
 
609
+
610
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  def get_f0_djcm(self, x, p_len, clipping=False, svs=False, filter_radius=3):
612
  if not hasattr(self, "djcm"):
613
  from main.library.predictors.DJCM.DJCM import DJCM
 
647
  if self.predictor_onnx and self.delete_predictor_onnx: del self.djcm.model, self.djcm
648
  return self._resize_f0(f0, p_len)
649
 
650
+
651
+