vettorazi commited on
Commit
475bad2
1 Parent(s): e5b90cb

second try on multiuple speakers

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -62,24 +62,12 @@ else:
62
  cluster_model_path = None
63
  default_cluster_infer_ratio = default_cluster_infer_ratio if cluster_model_path else 0
64
 
65
- #generator_path = hf_hub_download(repo_id, ckpt_name)
66
- generator_path = None
67
- if speaker == speakers[0]:
68
- generator_path = hf_hub_download(repo_id, "G_10000.pth")
69
- elif speaker == speakers[1]:
70
- generator_path = hf_hub_download(repo_id, "G_534.pth")
71
- elif speaker == speakers[2]:
72
- generator_path = hf_hub_download(repo_id, "G_9933.pth")
73
- else:
74
- # Handle the case when the speaker type is not recognized
75
- raise ValueError("Invalid speaker type")
76
-
77
- model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
78
  config_path = hf_hub_download(repo_id, "config.json")
79
  hparams = HParams(**json.loads(Path(config_path).read_text()))
80
  speakers = list(hparams.spk.keys())
81
  device = "cuda" if torch.cuda.is_available() else "cpu"
82
- #model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
83
  demucs_model = get_model(DEFAULT_MODEL)
84
 
85
 
@@ -142,7 +130,6 @@ def download_youtube_clip(
142
  else:
143
  return None
144
 
145
-
146
  def predict(
147
  speaker,
148
  audio,
@@ -156,6 +143,18 @@ def predict(
156
  chunk_seconds: float = 0.5,
157
  absolute_thresh: bool = False,
158
  ):
 
 
 
 
 
 
 
 
 
 
 
 
159
  audio, _ = librosa.load(audio, sr=model.target_sample, duration=duration_limit)
160
  audio = model.infer_silence(
161
  audio.astype(np.float32),
@@ -172,6 +171,7 @@ def predict(
172
  )
173
  return model.target_sample, audio
174
 
 
175
  SPACE_ID = "nateraw/voice-cloning"
176
  description = f"""
177
  # Attention - This Space may be slow in the shared UI if there is a long queue. To speed it up, you can duplicate and use it with a paid private T4 GPU.
 
62
  cluster_model_path = None
63
  default_cluster_infer_ratio = default_cluster_infer_ratio if cluster_model_path else 0
64
 
65
+ generator_path = hf_hub_download(repo_id, ckpt_name)
 
 
 
 
 
 
 
 
 
 
 
 
66
  config_path = hf_hub_download(repo_id, "config.json")
67
  hparams = HParams(**json.loads(Path(config_path).read_text()))
68
  speakers = list(hparams.spk.keys())
69
  device = "cuda" if torch.cuda.is_available() else "cpu"
70
+ model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
71
  demucs_model = get_model(DEFAULT_MODEL)
72
 
73
 
 
130
  else:
131
  return None
132
 
 
133
  def predict(
134
  speaker,
135
  audio,
 
143
  chunk_seconds: float = 0.5,
144
  absolute_thresh: bool = False,
145
  ):
146
+ if speaker == speakers[0]:
147
+ generator_path = hf_hub_download(repo_id, "G_10000.pth")
148
+ elif speaker == speakers[1]:
149
+ generator_path = hf_hub_download(repo_id, "G_534.pth")
150
+ elif speaker == speakers[2]:
151
+ generator_path = hf_hub_download(repo_id, "G_9933.pth")
152
+ else:
153
+ # Handle the case when the speaker type is not recognized
154
+ raise ValueError("Invalid speaker type")
155
+
156
+ model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
157
+
158
  audio, _ = librosa.load(audio, sr=model.target_sample, duration=duration_limit)
159
  audio = model.infer_silence(
160
  audio.astype(np.float32),
 
171
  )
172
  return model.target_sample, audio
173
 
174
+
175
  SPACE_ID = "nateraw/voice-cloning"
176
  description = f"""
177
  # Attention - This Space may be slow in the shared UI if there is a long queue. To speed it up, you can duplicate and use it with a paid private T4 GPU.