aaron
commited on
Commit
·
75635fa
1
Parent(s):
67e0911
원래 의도 유지: 더미 모델 사용 금지, 실패 시 앱 중단
Browse files- BigVGAN, FAcodec, BigVGAN 44k 로딩 실패 시 더미 모델 대신 앱 시작 중단
- 모든 모델이 정상 로드되어야만 앱 시작 (부분적 기능 제공 금지)
- 실패 시 명확한 에러 메시지 제공
- 앱 시작 시 모든 모델 미리 로드하여 완전한 기능 보장
- 음성 품질 저하 방지
app.py
CHANGED
|
@@ -224,29 +224,29 @@ def initialize_seed_vc_models():
|
|
| 224 |
campplus_model.eval()
|
| 225 |
campplus_model.to(DEVICE)
|
| 226 |
|
| 227 |
-
# Load BigVGAN
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
# Load FAcodec
|
| 234 |
try:
|
| 235 |
ckpt_path, config_path = load_custom_model_from_hf("Plachta/FAcodec", 'pytorch_model.bin', 'config.yml')
|
| 236 |
with open(config_path, 'r', encoding='utf-8') as f:
|
| 237 |
codec_config = yaml.safe_load(f)
|
| 238 |
codec_model_params = recursive_munch(codec_config['model_params'])
|
| 239 |
|
| 240 |
-
# Remove problematic
|
| 241 |
-
if hasattr(codec_model_params, 'dac_params') and hasattr(codec_model_params.dac_params, 'causal'):
|
| 242 |
-
delattr(codec_model_params.dac_params, 'causal')
|
| 243 |
-
log_print("Removed 'causal' parameter from DAC config")
|
| 244 |
-
|
| 245 |
-
# Also check for other problematic parameters
|
| 246 |
if hasattr(codec_model_params, 'dac_params'):
|
| 247 |
dac_params = codec_model_params.dac_params
|
| 248 |
-
|
| 249 |
-
problematic_params = ['causal', 'causal_conv', 'causal_attention']
|
| 250 |
for param in problematic_params:
|
| 251 |
if hasattr(dac_params, param):
|
| 252 |
delattr(dac_params, param)
|
|
@@ -255,29 +255,11 @@ def initialize_seed_vc_models():
|
|
| 255 |
codec_encoder = build_model(codec_model_params, stage="codec")
|
| 256 |
log_print("✓ FAcodec loaded successfully")
|
| 257 |
except Exception as e:
|
| 258 |
-
log_error(f"
|
| 259 |
log_error(f"FAcodec error traceback: {traceback.format_exc()}")
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
# Try to create a basic DAC model without problematic parameters
|
| 264 |
-
from descript_audio_codec import DAC
|
| 265 |
-
codec_encoder = {'codec': DAC()}
|
| 266 |
-
log_print("✓ Created minimal DAC fallback")
|
| 267 |
-
except Exception as e2:
|
| 268 |
-
log_error(f"Failed to create DAC fallback: {e2}")
|
| 269 |
-
# Create a completely dummy encoder
|
| 270 |
-
class DummyCodec:
|
| 271 |
-
def __getitem__(self, key):
|
| 272 |
-
return self
|
| 273 |
-
def eval(self):
|
| 274 |
-
return self
|
| 275 |
-
def to(self, device):
|
| 276 |
-
return self
|
| 277 |
-
codec_encoder = {'codec': DummyCodec()}
|
| 278 |
-
log_print("✓ Created dummy codec encoder")
|
| 279 |
-
|
| 280 |
-
# Load codec checkpoint with error handling
|
| 281 |
try:
|
| 282 |
ckpt_params = torch.load(ckpt_path, map_location="cpu")
|
| 283 |
if 'codec' in ckpt_params:
|
|
@@ -286,10 +268,11 @@ def initialize_seed_vc_models():
|
|
| 286 |
codec_encoder.codec.load_state_dict(ckpt_params['model'], strict=False)
|
| 287 |
else:
|
| 288 |
codec_encoder.codec.load_state_dict(ckpt_params, strict=False)
|
|
|
|
| 289 |
except Exception as e:
|
| 290 |
-
log_error(f"
|
| 291 |
-
log_error(f"Codec
|
| 292 |
-
|
| 293 |
|
| 294 |
_ = [codec_encoder[key].eval() for key in codec_encoder]
|
| 295 |
_ = [codec_encoder[key].to(DEVICE) for key in codec_encoder]
|
|
@@ -352,9 +335,16 @@ def initialize_seed_vc_models():
|
|
| 352 |
}
|
| 353 |
to_mel_f0 = lambda x: mel_spectrogram(x, **mel_fn_args_f0)
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
_seed_vc_models = {
|
| 360 |
'model': model,
|
|
@@ -376,6 +366,7 @@ def initialize_seed_vc_models():
|
|
| 376 |
'sr_f0': sr_f0
|
| 377 |
}
|
| 378 |
|
|
|
|
| 379 |
return _seed_vc_models
|
| 380 |
|
| 381 |
def adjust_f0_semitones(f0_sequence, n_semitones):
|
|
@@ -724,12 +715,24 @@ styles = list_supported_styles() or [
|
|
| 724 |
'es', 'fr', 'zh', 'jp', 'kr'
|
| 725 |
]
|
| 726 |
|
| 727 |
-
#
|
| 728 |
log_print("=" * 50)
|
| 729 |
-
log_print("
|
| 730 |
-
log_print("Models will be loaded on first use")
|
| 731 |
log_print("=" * 50)
|
| 732 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 733 |
# Create Gradio interface
|
| 734 |
with gr.Blocks(title="Integrated TTS + Voice Conversion", analytics_enabled=False) as demo:
|
| 735 |
gr.Markdown("""
|
|
|
|
| 224 |
campplus_model.eval()
|
| 225 |
campplus_model.to(DEVICE)
|
| 226 |
|
| 227 |
+
# Load BigVGAN - FAIL IF CANNOT LOAD (원래 의도 유지)
|
| 228 |
+
try:
|
| 229 |
+
from modules.bigvgan import bigvgan
|
| 230 |
+
bigvgan_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_22khz_80band_256x', use_cuda_kernel=False)
|
| 231 |
+
bigvgan_model.remove_weight_norm()
|
| 232 |
+
bigvgan_model = bigvgan_model.eval().to(DEVICE)
|
| 233 |
+
log_print("✓ BigVGAN loaded successfully")
|
| 234 |
+
except Exception as e:
|
| 235 |
+
log_error(f"CRITICAL ERROR: Failed to load BigVGAN: {e}")
|
| 236 |
+
log_error(f"BigVGAN error traceback: {traceback.format_exc()}")
|
| 237 |
+
raise gr.Error(f"BigVGAN 모델 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
|
| 238 |
|
| 239 |
+
# Load FAcodec - FAIL IF CANNOT LOAD (원래 의도 유지)
|
| 240 |
try:
|
| 241 |
ckpt_path, config_path = load_custom_model_from_hf("Plachta/FAcodec", 'pytorch_model.bin', 'config.yml')
|
| 242 |
with open(config_path, 'r', encoding='utf-8') as f:
|
| 243 |
codec_config = yaml.safe_load(f)
|
| 244 |
codec_model_params = recursive_munch(codec_config['model_params'])
|
| 245 |
|
| 246 |
+
# Remove problematic parameters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
if hasattr(codec_model_params, 'dac_params'):
|
| 248 |
dac_params = codec_model_params.dac_params
|
| 249 |
+
problematic_params = ['causal', 'causal_conv', 'causal_attention', 'lstm']
|
|
|
|
| 250 |
for param in problematic_params:
|
| 251 |
if hasattr(dac_params, param):
|
| 252 |
delattr(dac_params, param)
|
|
|
|
| 255 |
codec_encoder = build_model(codec_model_params, stage="codec")
|
| 256 |
log_print("✓ FAcodec loaded successfully")
|
| 257 |
except Exception as e:
|
| 258 |
+
log_error(f"CRITICAL ERROR: Failed to load FAcodec: {e}")
|
| 259 |
log_error(f"FAcodec error traceback: {traceback.format_exc()}")
|
| 260 |
+
raise gr.Error(f"FAcodec 모델 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
|
| 261 |
+
|
| 262 |
+
# Load codec checkpoint - FAIL IF CANNOT LOAD (원래 의도 유지)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
try:
|
| 264 |
ckpt_params = torch.load(ckpt_path, map_location="cpu")
|
| 265 |
if 'codec' in ckpt_params:
|
|
|
|
| 268 |
codec_encoder.codec.load_state_dict(ckpt_params['model'], strict=False)
|
| 269 |
else:
|
| 270 |
codec_encoder.codec.load_state_dict(ckpt_params, strict=False)
|
| 271 |
+
log_print("✓ Codec checkpoint loaded successfully")
|
| 272 |
except Exception as e:
|
| 273 |
+
log_error(f"CRITICAL ERROR: Failed to load codec checkpoint: {e}")
|
| 274 |
+
log_error(f"Codec checkpoint error traceback: {traceback.format_exc()}")
|
| 275 |
+
raise gr.Error(f"코덱 체크포인트 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
|
| 276 |
|
| 277 |
_ = [codec_encoder[key].eval() for key in codec_encoder]
|
| 278 |
_ = [codec_encoder[key].to(DEVICE) for key in codec_encoder]
|
|
|
|
| 335 |
}
|
| 336 |
to_mel_f0 = lambda x: mel_spectrogram(x, **mel_fn_args_f0)
|
| 337 |
|
| 338 |
+
# Load BigVGAN 44k - FAIL IF CANNOT LOAD (원래 의도 유지)
|
| 339 |
+
try:
|
| 340 |
+
bigvgan_44k_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_44khz_128band_512x', use_cuda_kernel=False)
|
| 341 |
+
bigvgan_44k_model.remove_weight_norm()
|
| 342 |
+
bigvgan_44k_model = bigvgan_44k_model.eval().to(DEVICE)
|
| 343 |
+
log_print("✓ BigVGAN 44k loaded successfully")
|
| 344 |
+
except Exception as e:
|
| 345 |
+
log_error(f"CRITICAL ERROR: Failed to load BigVGAN 44k: {e}")
|
| 346 |
+
log_error(f"BigVGAN 44k error traceback: {traceback.format_exc()}")
|
| 347 |
+
raise gr.Error(f"BigVGAN 44k 모델 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
|
| 348 |
|
| 349 |
_seed_vc_models = {
|
| 350 |
'model': model,
|
|
|
|
| 366 |
'sr_f0': sr_f0
|
| 367 |
}
|
| 368 |
|
| 369 |
+
log_print("✓ All Seed-VC models loaded successfully!")
|
| 370 |
return _seed_vc_models
|
| 371 |
|
| 372 |
def adjust_f0_semitones(f0_sequence, n_semitones):
|
|
|
|
| 715 |
'es', 'fr', 'zh', 'jp', 'kr'
|
| 716 |
]
|
| 717 |
|
| 718 |
+
# 앱 시작 시 모델 초기화 (원래 의도 유지)
|
| 719 |
log_print("=" * 50)
|
| 720 |
+
log_print("INITIALIZING MODELS...")
|
|
|
|
| 721 |
log_print("=" * 50)
|
| 722 |
|
| 723 |
+
try:
|
| 724 |
+
# 모든 모델을 미리 로드하여 완전한 기능 보장
|
| 725 |
+
initialize_seed_vc_models()
|
| 726 |
+
log_print("✓ All models initialized successfully!")
|
| 727 |
+
except Exception as e:
|
| 728 |
+
log_error(f"CRITICAL ERROR during model initialization: {e}")
|
| 729 |
+
log_error(f"Error type: {type(e).__name__}")
|
| 730 |
+
log_error("Full traceback:")
|
| 731 |
+
log_error(traceback.format_exc())
|
| 732 |
+
log_error("App will not start due to model initialization failure")
|
| 733 |
+
# 앱 시작 중단
|
| 734 |
+
sys.exit(1)
|
| 735 |
+
|
| 736 |
# Create Gradio interface
|
| 737 |
with gr.Blocks(title="Integrated TTS + Voice Conversion", analytics_enabled=False) as demo:
|
| 738 |
gr.Markdown("""
|