voice-cloning2

Runtime error

App Files Files Community

vettorazi commited on Jun 14, 2023

Commit

4bdcce8

•

1 Parent(s): 01ba3ca

Delete main.py

Browse files

Files changed (1) hide show

main.py +0 -118

main.py DELETED Viewed

@@ -1,118 +0,0 @@
-import io
-import json
-import os
-from pathlib import Path
-import librosa
-import numpy as np
-import torch
-import soundfile as sf
-from demucs.apply import apply_model
-from demucs.pretrained import DEFAULT_MODEL, get_model
-import gradio as gr
-from huggingface_hub import hf_hub_download, list_repo_files
-from so_vits_svc_fork.hparams import HParams
-from so_vits_svc_fork.inference.core import Svc
-###################################################################
-# REPLACE THESE VALUES TO CHANGE THE MODEL REPO/CKPT NAME/SETTINGS
-###################################################################
-# The Hugging Face Hub repo ID
-repo_id = "vettorazi/vettorazi"
-# If None, Uses latest ckpt in the repo
-ckpt_name = None
-# If None, Uses "kmeans.pt" if it exists in the repo
-cluster_model_name = None
-# Set the default f0 type to use - use the one it was trained on.
-# The default for so-vits-svc-fork is "dio".
-# Options: "crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
-default_f0_method = "crepe"
-# The default ratio of cluster inference to SVC inference.
-# If cluster_model_name is not found in the repo, this is set to 0.
-default_cluster_infer_ratio = 0.5
-# Limit on duration of audio at inference time. increase if you can
-# In this parent app, we set the limit with an env var to 30 seconds
-# If you didnt set env var + you go OOM try changing 9e9 to <=300ish
-duration_limit = int(os.environ.get("MAX_DURATION_SECONDS", 9e9))
-###################################################################
-if ckpt_name is None:
-    latest_id = sorted(
-        [
-            int(Path(x).stem.split("_")[1])
-            for x in list_repo_files(repo_id)
-            if x.startswith("G_") and x.endswith(".pth")
-        ]
-    )[-1]
-    ckpt_name = f"G_{latest_id}.pth"
-cluster_model_name = cluster_model_name or "kmeans.pt"
-if cluster_model_name in list_repo_files(repo_id):
-    cluster_model_path = hf_hub_download(repo_id, cluster_model_name)
-else:
-    cluster_model_path = None
-default_cluster_infer_ratio = default_cluster_infer_ratio if cluster_model_path else 0
-generator_path = hf_hub_download(repo_id, ckpt_name)
-config_path = hf_hub_download(repo_id, "config.json")
-hparams = HParams(**json.loads(Path(config_path).read_text()))
-speakers = list(hparams.spk.keys())
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
-demucs_model = get_model(DEFAULT_MODEL)
-def predict(
-    speaker,
-    audio,
-    transpose: int = 0,
-    auto_predict_f0: bool = False,
-    cluster_infer_ratio: float = 0,
-    noise_scale: float = 0.4,
-    f0_method: str = "crepe",
-    db_thresh: int = -40,
-    pad_seconds: float = 0.5,
-    chunk_seconds: float = 0.5,
-    absolute_thresh: bool = False,
-):
-    audio, _ = librosa.load(audio, sr=model.target_sample, duration=duration_limit)
-    audio = librosa.util.normalize(audio)
-    out = model.predict(
-        audio,
-        speaker,
-        transpose=transpose,
-        auto_predict_f0=auto_predict_f0,
-        cluster_infer_ratio=cluster_infer_ratio,
-        noise_scale=noise_scale,
-        f0_method=f0_method,
-        db_thresh=db_thresh,
-        pad_seconds=pad_seconds,
-        chunk_seconds=chunk_seconds,
-        absolute_thresh=absolute_thresh,
-    )
-    return model.target_sample, out
-def voice_cloning(speaker, audio):
-    sample_rate, audio_data = predict(speaker, audio)
-    return audio_data, sample_rate
-# Configure the Gradio interface
-inputs = [
-    gr.inputs.Dropdown(choices=speakers, label="Speaker"),
-    gr.inputs.Audio(label="Audio")
-]
-outputs = gr.outputs.Audio(label="Cloned Audio")
-iface = gr.Interface(fn=voice_cloning, inputs=inputs, outputs=outputs)
-if __name__ == "__main__":
-    iface.launch()