Spaces:

DeepLearning101
/

Speech-Separation

Running

App Files Files Community

DeepLearning101 commited on May 3

Commit

38d7181

verified ·

1 Parent(s): cf73d23

Update DPTNet_eval/DPTNet_quant_sep.py

Browse files

Files changed (1) hide show

DPTNet_eval/DPTNet_quant_sep.py +24 -26

DPTNet_eval/DPTNet_quant_sep.py CHANGED Viewed

@@ -4,6 +4,9 @@ import numpy as np
 import torchaudio
 import yaml
 from . import asteroid_test
 def get_conf():
@@ -32,19 +35,35 @@ def get_conf():
 def load_dpt_model():
     print('Load Separation Model...')
-    now_path = os.path.split(os.path.realpath(__file__))[0]
     conf_filterbank, conf_masknet = get_conf()
-    model_path = os.path.join(now_path, "trained_model/train_dptnet_aishell_partOverlap_B2_300epoch_quan-int8.p")
-    model = getattr(asteroid_test, "DPTNet")(**conf_filterbank, **conf_masknet)
     model = torch.quantization.quantize_dynamic(model, {torch.nn.LSTM, torch.nn.Linear}, dtype=torch.qint8)
     state_dict = torch.load(model_path, map_location="cpu")
     model.load_state_dict(state_dict)
     model.eval()
     return model
 def dpt_sep_process(wav_path, model=None, outfilename=None):
     if model is None:
-        model = load_model()
     x, sr = torchaudio.load(wav_path)
     x = x.cpu()
@@ -73,28 +92,7 @@ def dpt_sep_process(wav_path, model=None, outfilename=None):
     else:
         torchaudio.save(wav_path.replace('.wav', '_sep1.wav'), sep_1, sr)
         torchaudio.save(wav_path.replace('.wav', '_sep2.wav'), sep_2, sr)
-# def dpt_sep_process(wav_path, model=None, outfilename=None):
-#     if model == None:
-#         model = load_model()
-#     x, sr = torchaudio.load(wav_path)
-#     x = x.cpu()
-#     with torch.no_grad():
-#         est_sources = model(x)
-#     est_sources_np = est_sources.squeeze(0)
-#     sep_1, sep_2 = est_sources_np
-#     sep_1 = sep_1 * x[0].abs().max().item() / sep_1.abs().max().item()
-#     sep_2 = sep_2 * x[0].abs().max().item() / sep_2.abs().max().item()
-#     if outfilename != None:
-#         torchaudio.save(outfilename.replace('.wav', '_sep1.wav'), sep_1, sr)
-#         torchaudio.save(outfilename.replace('.wav', '_sep2.wav'), sep_2, sr)
-#         torchaudio.save(outfilename.replace('.wav', '_mix.wav'), x, sr)
-#     else:
-#         torchaudio.save(wav_path.replace('.wav', '_sep1.wav'), sep_1, sr)
-#         torchaudio.save(wav_path.replace('.wav', '_sep2.wav'), sep_2, sr)
 if __name__ == '__main__':
     print("This module should be used via Flask or Gradio.")

 import torchaudio
 import yaml
 from . import asteroid_test
+from huggingface_hub import hf_hub_download
+torchaudio.set_audio_backend("sox_io")
 def get_conf():
 def load_dpt_model():
     print('Load Separation Model...')
+    # 👇 從環境變數取得 HF Token
+    from huggingface_hub import hf_hub_download
+    speech_sep_token = os.getenv("SpeechSeparation")
+    if not speech_sep_token:
+        raise EnvironmentError("環境變數 SpeechSeparation 未設定！")
+    # 👇 從 Hugging Face Hub 下載模型權重
+    model_path = hf_hub_download(
+        repo_id="DeepLearning101/speech-separation",  # 替換成你自己的 repo 名稱
+        filename="train_dptnet_aishell_partOverlap_B2_300epoch_quan-int8.p",
+        token=speech_sep_token
+    )
+    # 👇 原本邏輯完全不變
     conf_filterbank, conf_masknet = get_conf()
+    model_class = getattr(asteroid_test, "DPTNet")
+    model = model_class(**conf_filterbank, **conf_masknet)
     model = torch.quantization.quantize_dynamic(model, {torch.nn.LSTM, torch.nn.Linear}, dtype=torch.qint8)
     state_dict = torch.load(model_path, map_location="cpu")
     model.load_state_dict(state_dict)
     model.eval()
     return model
 def dpt_sep_process(wav_path, model=None, outfilename=None):
     if model is None:
+        model = load_dpt_model()
     x, sr = torchaudio.load(wav_path)
     x = x.cpu()
     else:
         torchaudio.save(wav_path.replace('.wav', '_sep1.wav'), sep_1, sr)
         torchaudio.save(wav_path.replace('.wav', '_sep2.wav'), sep_2, sr)
 if __name__ == '__main__':
     print("This module should be used via Flask or Gradio.")