Spaces:

LEMAS-Project
/

LEMAS-TTS

Running on Zero

App Files Files Community

Approximetal commited on 9 days ago

Commit

db5f9bf

verified ·

1 Parent(s): 484e4a0

Update inference_gradio.py

Browse files

Files changed (1) hide show

inference_gradio.py +51 -95

inference_gradio.py CHANGED Viewed

@@ -16,29 +16,15 @@ from cached_path import cached_path
 from lemas_tts.api import TTS, PRETRAINED_ROOT, CKPTS_ROOT
-# Global variables
-tts_api = None
-last_checkpoint = ""
-last_device = ""
-last_ema = None
-# Detect whether we are running inside a HF Space with stateless GPU.
-IS_SPACES = os.getenv("SYSTEM") == "spaces"
-# Device detection
-if IS_SPACES:
-    # On Spaces main process we must not initialize CUDA; keep TTS on CPU.
-    device = "cpu"
-else:
-    device = (
-        "cuda"
-        if torch.cuda.is_available()
-        else "xpu"
-        if torch.xpu.is_available()
-        else "mps"
-        if torch.backends.mps.is_available()
-        else "cpu"
-    )
 REPO_ROOT = Path(__file__).resolve().parent
@@ -72,7 +58,7 @@ class UVR5:
             sys.path.append(self.code_dir)
         # Reuse an already-loaded model if it matches the requested device.
-        if self.model is not None and self.device == device:
             return self.model
         from multiprocess_cuda_infer import ModelData, Inference
@@ -85,42 +71,25 @@ class UVR5:
             model_path=model_path,
             audio_path=self.model_dir,
             result_path=self.model_dir,
-            device=device,
             process_method="MDX-Net",
             # keep base_dir and model_dir the same (paths under `pretrained_models`)
             base_dir=self.model_dir,
             **configs,
         )
-        uvr5_model = Inference(model_data, device)
-        # On HF Spaces with stateless GPU, we must not initialize CUDA in the
-        # main process. When running there and staying on CPU, temporarily
-        # spoof torch.cuda.is_available() so UVR5 never touches CUDA APIs.
-        if IS_SPACES and device == "cpu":
-            orig_is_available = _torch.cuda.is_available
-            _torch.cuda.is_available = lambda: False
-            try:
-                uvr5_model.load_model(model_path, 1)
-            finally:
-                _torch.cuda.is_available = orig_is_available
-        else:
-            uvr5_model.load_model(model_path, 1)
-        self.model = uvr5_model
-        self.device = device
         return self.model
     def denoise(self, audio_info):
         print("denoise UVR5: ", audio_info)
-        # On Spaces, force CPU; locally prefer CUDA if available.
-        if IS_SPACES:
-            dev = "cpu"
-        else:
-            dev = "cuda" if torch.cuda.is_available() else "cpu"
-        model = self.load_model(device=dev)
         input_audio = load_wav(audio_info, sr=44100, channel=2)
-        output_audio = model.demix_base({0: input_audio.squeeze()}, is_match_mix=False, device=dev)
         return output_audio.squeeze().T.cpu().numpy(), 44100
@@ -193,7 +162,6 @@ def get_checkpoints_project(project_name=None, is_gradio=True):
         for f in files_checkpoints
         if "pretrained_" not in os.path.basename(f) and "model_last.pt" not in os.path.basename(f)
     ]
-    last_checkpoint = [f for f in files_checkpoints if "model_last.pt" in os.path.basename(f)]
     # Sort regular checkpoints by number
     try:
@@ -204,7 +172,7 @@ def get_checkpoints_project(project_name=None, is_gradio=True):
         regular_checkpoints = sorted(regular_checkpoints)
     # Combine in order: pretrained, regular, last
-    files_checkpoints = pretrained_checkpoints + regular_checkpoints + last_checkpoint
     select_checkpoint = None if not files_checkpoints else files_checkpoints[-1]
@@ -235,13 +203,13 @@ def get_available_projects():
     print("project_list:", project_list)
     return project_list
-@spaces.GPU(duration=240)
 @torch.no_grad()
 @torch.inference_mode()
 def infer(
     project, file_checkpoint, exp_name, ref_text, ref_audio, denoise_audio, gen_text, nfe_step, use_ema, separate_langs, frontend, speed, cfg_strength, use_acc_grl, ref_ratio, no_ref_audio, sway_sampling_coef, use_prosody_encoder, seed
 ):
-    global last_checkpoint, last_device, tts_api, last_ema
     # Resolve checkpoint path (local or HF URL)
     ckpt_path = file_checkpoint
@@ -260,52 +228,40 @@ def infer(
     if denoise_audio:
         ref_audio = denoise_audio
-    device_test = device  # Use the global device
-    if last_checkpoint != ckpt_resolved or last_device != device_test or last_ema != use_ema or tts_api is None:
-        if last_checkpoint != ckpt_resolved:
-            last_checkpoint = ckpt_resolved
-        if last_device != device_test:
-            last_device = device_test
-        if last_ema != use_ema:
-            last_ema = use_ema
-        # Automatically enable prosody encoder when using the prosody checkpoint
-        use_prosody_encoder = True if "prosody" in str(ckpt_resolved) else False
-        # Resolve vocab file (local)
-        local_vocab = Path(PRETRAINED_ROOT) / "data" / project / "vocab.txt"
-        if not local_vocab.is_file():
-            return None, "Vocab file not found!", ""
-        vocab_file = str(local_vocab)
-        # Resolve prosody encoder config & weights (local)
-        local_prosody_cfg = Path(CKPTS_ROOT) / "prosody_encoder" / "pretssel_cfg.json"
-        local_prosody_ckpt = Path(CKPTS_ROOT) / "prosody_encoder" / "prosody_encoder_UnitY2.pt"
-        if not local_prosody_cfg.is_file() or not local_prosody_ckpt.is_file():
-            return None, "Prosody encoder files not found!", ""
-        prosody_cfg_path = str(local_prosody_cfg)
-        prosody_ckpt_path = str(local_prosody_ckpt)
-        try:
-            tts_api = TTS(
-                model=exp_name,
-                ckpt_file=ckpt_resolved,
-                vocab_file=vocab_file,
-                device=device_test,
-                use_ema=use_ema,
-                frontend=frontend,
-                use_prosody_encoder=use_prosody_encoder,
-                prosody_cfg_path=prosody_cfg_path,
-                prosody_ckpt_path=prosody_ckpt_path,
-            )
-        except Exception as e:
-            traceback.print_exc()
-            return None, f"Error loading model: {str(e)}", ""
-        print("Model loaded >>", device_test, file_checkpoint, use_ema)
     if seed == -1:  # -1 used for random
         seed = None

 from lemas_tts.api import TTS, PRETRAINED_ROOT, CKPTS_ROOT
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "xpu"
+    if torch.xpu.is_available()
+    else "mps"
+    if torch.backends.mps.is_available()
+    else "cpu"
+)
 REPO_ROOT = Path(__file__).resolve().parent
             sys.path.append(self.code_dir)
         # Reuse an already-loaded model if it matches the requested device.
+        if self.model is not None:
             return self.model
         from multiprocess_cuda_infer import ModelData, Inference
             model_path=model_path,
             audio_path=self.model_dir,
             result_path=self.model_dir,
+            device="cpu",
             process_method="MDX-Net",
             # keep base_dir and model_dir the same (paths under `pretrained_models`)
             base_dir=self.model_dir,
             **configs,
         )
+        uvr5_model = Inference(model_data, "cpu")
+        uvr5_model.load_model(model_path, 1)
+        self.model = uvr5_model.load_model(device="cpu")
+        self.device = "cpu"
         return self.model
     def denoise(self, audio_info):
         print("denoise UVR5: ", audio_info)
+        # # On Spaces, force CPU; locally prefer CUDA if available.
         input_audio = load_wav(audio_info, sr=44100, channel=2)
+        output_audio = self.model.demix_base({0: input_audio.squeeze()}, is_match_mix=False, device="cpu")
         return output_audio.squeeze().T.cpu().numpy(), 44100
         for f in files_checkpoints
         if "pretrained_" not in os.path.basename(f) and "model_last.pt" not in os.path.basename(f)
     ]
     # Sort regular checkpoints by number
     try:
         regular_checkpoints = sorted(regular_checkpoints)
     # Combine in order: pretrained, regular, last
+    files_checkpoints = pretrained_checkpoints + regular_checkpoints
     select_checkpoint = None if not files_checkpoints else files_checkpoints[-1]
     print("project_list:", project_list)
     return project_list
+@spaces.GPU
 @torch.no_grad()
 @torch.inference_mode()
 def infer(
     project, file_checkpoint, exp_name, ref_text, ref_audio, denoise_audio, gen_text, nfe_step, use_ema, separate_langs, frontend, speed, cfg_strength, use_acc_grl, ref_ratio, no_ref_audio, sway_sampling_coef, use_prosody_encoder, seed
 ):
+    global tts_api, last_ema
     # Resolve checkpoint path (local or HF URL)
     ckpt_path = file_checkpoint
     if denoise_audio:
         ref_audio = denoise_audio
+    # Automatically enable prosody encoder when using the prosody checkpoint
+    use_prosody_encoder = True if "prosody" in str(ckpt_resolved) else False
+    # Resolve vocab file (local)
+    local_vocab = Path(PRETRAINED_ROOT) / "data" / project / "vocab.txt"
+    if not local_vocab.is_file():
+        return None, "Vocab file not found!", ""
+    vocab_file = str(local_vocab)
+    # Resolve prosody encoder config & weights (local)
+    local_prosody_cfg = Path(CKPTS_ROOT) / "prosody_encoder" / "pretssel_cfg.json"
+    local_prosody_ckpt = Path(CKPTS_ROOT) / "prosody_encoder" / "prosody_encoder_UnitY2.pt"
+    if not local_prosody_cfg.is_file() or not local_prosody_ckpt.is_file():
+        return None, "Prosody encoder files not found!", ""
+    prosody_cfg_path = str(local_prosody_cfg)
+    prosody_ckpt_path = str(local_prosody_ckpt)
+    try:
+        tts_api = TTS(
+            model=exp_name,
+            ckpt_file=ckpt_resolved,
+            vocab_file=vocab_file,
+            device="cuda",
+            use_ema=use_ema,
+            frontend=frontend,
+            use_prosody_encoder=use_prosody_encoder,
+            prosody_cfg_path=prosody_cfg_path,
+            prosody_ckpt_path=prosody_ckpt_path,
+        )
+    except Exception as e:
+        traceback.print_exc()
+        return None, f"Error loading model: {str(e)}", ""
+    print("Model loaded >>", file_checkpoint, use_ema)
     if seed == -1:  # -1 used for random
         seed = None