voice-cloning-4

Runtime error

App Files Files Community

vettorazi commited on Jun 16, 2023

Commit

475bad2

•

1 Parent(s): e5b90cb

second try on multiuple speakers

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -62,24 +62,12 @@ else:
     cluster_model_path = None
 default_cluster_infer_ratio = default_cluster_infer_ratio if cluster_model_path else 0
-#generator_path = hf_hub_download(repo_id, ckpt_name)
-generator_path = None
-if speaker == speakers[0]:
-    generator_path = hf_hub_download(repo_id, "G_10000.pth")
-elif speaker == speakers[1]:
-    generator_path = hf_hub_download(repo_id, "G_534.pth")
-elif speaker == speakers[2]:
-    generator_path = hf_hub_download(repo_id, "G_9933.pth")
-else:
-    # Handle the case when the speaker type is not recognized
-    raise ValueError("Invalid speaker type")
-model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
 config_path = hf_hub_download(repo_id, "config.json")
 hparams = HParams(**json.loads(Path(config_path).read_text()))
 speakers = list(hparams.spk.keys())
 device = "cuda" if torch.cuda.is_available() else "cpu"
-#model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
 demucs_model = get_model(DEFAULT_MODEL)
@@ -142,7 +130,6 @@ def download_youtube_clip(
     else:
         return None
 def predict(
     speaker,
     audio,
@@ -156,6 +143,18 @@ def predict(
     chunk_seconds: float = 0.5,
     absolute_thresh: bool = False,
 ):
     audio, _ = librosa.load(audio, sr=model.target_sample, duration=duration_limit)
     audio = model.infer_silence(
         audio.astype(np.float32),
@@ -172,6 +171,7 @@ def predict(
     )
     return model.target_sample, audio
 SPACE_ID = "nateraw/voice-cloning"
 description = f"""
 # Attention - This Space may be slow in the shared UI if there is a long queue. To speed it up, you can duplicate and use it with a paid private T4 GPU.

     cluster_model_path = None
 default_cluster_infer_ratio = default_cluster_infer_ratio if cluster_model_path else 0
+generator_path = hf_hub_download(repo_id, ckpt_name)
 config_path = hf_hub_download(repo_id, "config.json")
 hparams = HParams(**json.loads(Path(config_path).read_text()))
 speakers = list(hparams.spk.keys())
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
 demucs_model = get_model(DEFAULT_MODEL)
     else:
         return None
 def predict(
     speaker,
     audio,
     chunk_seconds: float = 0.5,
     absolute_thresh: bool = False,
 ):
+    if speaker == speakers[0]:
+        generator_path = hf_hub_download(repo_id, "G_10000.pth")
+    elif speaker == speakers[1]:
+        generator_path = hf_hub_download(repo_id, "G_534.pth")
+    elif speaker == speakers[2]:
+        generator_path = hf_hub_download(repo_id, "G_9933.pth")
+    else:
+        # Handle the case when the speaker type is not recognized
+        raise ValueError("Invalid speaker type")
+    model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
     audio, _ = librosa.load(audio, sr=model.target_sample, duration=duration_limit)
     audio = model.infer_silence(
         audio.astype(np.float32),
     )
     return model.target_sample, audio
 SPACE_ID = "nateraw/voice-cloning"
 description = f"""
 # Attention - This Space may be slow in the shared UI if there is a long queue. To speed it up, you can duplicate and use it with a paid private T4 GPU.