aaron commited on
Commit
75635fa
·
1 Parent(s): 67e0911

원래 의도 유지: 더미 모델 사용 금지, 실패 시 앱 중단

Browse files

- BigVGAN, FAcodec, BigVGAN 44k 로딩 실패 시 더미 모델 대신 앱 시작 중단
- 모든 모델이 정상 로드되어야만 앱 시작 (부분적 기능 제공 금지)
- 실패 시 명확한 에러 메시지 제공
- 앱 시작 시 모든 모델 미리 로드하여 완전한 기능 보장
- 음성 품질 저하 방지

Files changed (1) hide show
  1. app.py +48 -45
app.py CHANGED
@@ -224,29 +224,29 @@ def initialize_seed_vc_models():
224
  campplus_model.eval()
225
  campplus_model.to(DEVICE)
226
 
227
- # Load BigVGAN
228
- from modules.bigvgan import bigvgan
229
- bigvgan_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_22khz_80band_256x', use_cuda_kernel=False)
230
- bigvgan_model.remove_weight_norm()
231
- bigvgan_model = bigvgan_model.eval().to(DEVICE)
 
 
 
 
 
 
232
 
233
- # Load FAcodec with error handling
234
  try:
235
  ckpt_path, config_path = load_custom_model_from_hf("Plachta/FAcodec", 'pytorch_model.bin', 'config.yml')
236
  with open(config_path, 'r', encoding='utf-8') as f:
237
  codec_config = yaml.safe_load(f)
238
  codec_model_params = recursive_munch(codec_config['model_params'])
239
 
240
- # Remove problematic 'causal' parameter if it exists
241
- if hasattr(codec_model_params, 'dac_params') and hasattr(codec_model_params.dac_params, 'causal'):
242
- delattr(codec_model_params.dac_params, 'causal')
243
- log_print("Removed 'causal' parameter from DAC config")
244
-
245
- # Also check for other problematic parameters
246
  if hasattr(codec_model_params, 'dac_params'):
247
  dac_params = codec_model_params.dac_params
248
- # Remove any parameters that might cause issues
249
- problematic_params = ['causal', 'causal_conv', 'causal_attention']
250
  for param in problematic_params:
251
  if hasattr(dac_params, param):
252
  delattr(dac_params, param)
@@ -255,29 +255,11 @@ def initialize_seed_vc_models():
255
  codec_encoder = build_model(codec_model_params, stage="codec")
256
  log_print("✓ FAcodec loaded successfully")
257
  except Exception as e:
258
- log_error(f"Warning: Failed to load FAcodec: {e}")
259
  log_error(f"FAcodec error traceback: {traceback.format_exc()}")
260
- # Create a minimal dummy codec encoder
261
- log_print("Creating minimal codec encoder as fallback...")
262
- try:
263
- # Try to create a basic DAC model without problematic parameters
264
- from descript_audio_codec import DAC
265
- codec_encoder = {'codec': DAC()}
266
- log_print("✓ Created minimal DAC fallback")
267
- except Exception as e2:
268
- log_error(f"Failed to create DAC fallback: {e2}")
269
- # Create a completely dummy encoder
270
- class DummyCodec:
271
- def __getitem__(self, key):
272
- return self
273
- def eval(self):
274
- return self
275
- def to(self, device):
276
- return self
277
- codec_encoder = {'codec': DummyCodec()}
278
- log_print("✓ Created dummy codec encoder")
279
-
280
- # Load codec checkpoint with error handling
281
  try:
282
  ckpt_params = torch.load(ckpt_path, map_location="cpu")
283
  if 'codec' in ckpt_params:
@@ -286,10 +268,11 @@ def initialize_seed_vc_models():
286
  codec_encoder.codec.load_state_dict(ckpt_params['model'], strict=False)
287
  else:
288
  codec_encoder.codec.load_state_dict(ckpt_params, strict=False)
 
289
  except Exception as e:
290
- log_error(f"Warning: Could not load codec state dict: {e}")
291
- log_error(f"Codec state dict error traceback: {traceback.format_exc()}")
292
- log_error("Codec will use default parameters")
293
 
294
  _ = [codec_encoder[key].eval() for key in codec_encoder]
295
  _ = [codec_encoder[key].to(DEVICE) for key in codec_encoder]
@@ -352,9 +335,16 @@ def initialize_seed_vc_models():
352
  }
353
  to_mel_f0 = lambda x: mel_spectrogram(x, **mel_fn_args_f0)
354
 
355
- bigvgan_44k_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_44khz_128band_512x', use_cuda_kernel=False)
356
- bigvgan_44k_model.remove_weight_norm()
357
- bigvgan_44k_model = bigvgan_44k_model.eval().to(DEVICE)
 
 
 
 
 
 
 
358
 
359
  _seed_vc_models = {
360
  'model': model,
@@ -376,6 +366,7 @@ def initialize_seed_vc_models():
376
  'sr_f0': sr_f0
377
  }
378
 
 
379
  return _seed_vc_models
380
 
381
  def adjust_f0_semitones(f0_sequence, n_semitones):
@@ -724,12 +715,24 @@ styles = list_supported_styles() or [
724
  'es', 'fr', 'zh', 'jp', 'kr'
725
  ]
726
 
727
- # Skip model pre-loading for faster startup
728
  log_print("=" * 50)
729
- log_print("SKIPPING MODEL PRE-LOADING FOR FASTER STARTUP")
730
- log_print("Models will be loaded on first use")
731
  log_print("=" * 50)
732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
  # Create Gradio interface
734
  with gr.Blocks(title="Integrated TTS + Voice Conversion", analytics_enabled=False) as demo:
735
  gr.Markdown("""
 
224
  campplus_model.eval()
225
  campplus_model.to(DEVICE)
226
 
227
+ # Load BigVGAN - FAIL IF CANNOT LOAD (원래 의도 유지)
228
+ try:
229
+ from modules.bigvgan import bigvgan
230
+ bigvgan_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_22khz_80band_256x', use_cuda_kernel=False)
231
+ bigvgan_model.remove_weight_norm()
232
+ bigvgan_model = bigvgan_model.eval().to(DEVICE)
233
+ log_print("✓ BigVGAN loaded successfully")
234
+ except Exception as e:
235
+ log_error(f"CRITICAL ERROR: Failed to load BigVGAN: {e}")
236
+ log_error(f"BigVGAN error traceback: {traceback.format_exc()}")
237
+ raise gr.Error(f"BigVGAN 모델 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
238
 
239
+ # Load FAcodec - FAIL IF CANNOT LOAD (원래 의도 유지)
240
  try:
241
  ckpt_path, config_path = load_custom_model_from_hf("Plachta/FAcodec", 'pytorch_model.bin', 'config.yml')
242
  with open(config_path, 'r', encoding='utf-8') as f:
243
  codec_config = yaml.safe_load(f)
244
  codec_model_params = recursive_munch(codec_config['model_params'])
245
 
246
+ # Remove problematic parameters
 
 
 
 
 
247
  if hasattr(codec_model_params, 'dac_params'):
248
  dac_params = codec_model_params.dac_params
249
+ problematic_params = ['causal', 'causal_conv', 'causal_attention', 'lstm']
 
250
  for param in problematic_params:
251
  if hasattr(dac_params, param):
252
  delattr(dac_params, param)
 
255
  codec_encoder = build_model(codec_model_params, stage="codec")
256
  log_print("✓ FAcodec loaded successfully")
257
  except Exception as e:
258
+ log_error(f"CRITICAL ERROR: Failed to load FAcodec: {e}")
259
  log_error(f"FAcodec error traceback: {traceback.format_exc()}")
260
+ raise gr.Error(f"FAcodec 모델 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
261
+
262
+ # Load codec checkpoint - FAIL IF CANNOT LOAD (원래 의도 유지)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  try:
264
  ckpt_params = torch.load(ckpt_path, map_location="cpu")
265
  if 'codec' in ckpt_params:
 
268
  codec_encoder.codec.load_state_dict(ckpt_params['model'], strict=False)
269
  else:
270
  codec_encoder.codec.load_state_dict(ckpt_params, strict=False)
271
+ log_print("✓ Codec checkpoint loaded successfully")
272
  except Exception as e:
273
+ log_error(f"CRITICAL ERROR: Failed to load codec checkpoint: {e}")
274
+ log_error(f"Codec checkpoint error traceback: {traceback.format_exc()}")
275
+ raise gr.Error(f"코덱 체크포인트 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
276
 
277
  _ = [codec_encoder[key].eval() for key in codec_encoder]
278
  _ = [codec_encoder[key].to(DEVICE) for key in codec_encoder]
 
335
  }
336
  to_mel_f0 = lambda x: mel_spectrogram(x, **mel_fn_args_f0)
337
 
338
+ # Load BigVGAN 44k - FAIL IF CANNOT LOAD (원래 의도 유지)
339
+ try:
340
+ bigvgan_44k_model = bigvgan.BigVGAN.from_pretrained('nvidia/bigvgan_v2_44khz_128band_512x', use_cuda_kernel=False)
341
+ bigvgan_44k_model.remove_weight_norm()
342
+ bigvgan_44k_model = bigvgan_44k_model.eval().to(DEVICE)
343
+ log_print("✓ BigVGAN 44k loaded successfully")
344
+ except Exception as e:
345
+ log_error(f"CRITICAL ERROR: Failed to load BigVGAN 44k: {e}")
346
+ log_error(f"BigVGAN 44k error traceback: {traceback.format_exc()}")
347
+ raise gr.Error(f"BigVGAN 44k 모델 로딩 실패: {e}. 앱을 시작할 수 없습니다.")
348
 
349
  _seed_vc_models = {
350
  'model': model,
 
366
  'sr_f0': sr_f0
367
  }
368
 
369
+ log_print("✓ All Seed-VC models loaded successfully!")
370
  return _seed_vc_models
371
 
372
  def adjust_f0_semitones(f0_sequence, n_semitones):
 
715
  'es', 'fr', 'zh', 'jp', 'kr'
716
  ]
717
 
718
+ # 시작 모델 초기화 (원래 의도 유지)
719
  log_print("=" * 50)
720
+ log_print("INITIALIZING MODELS...")
 
721
  log_print("=" * 50)
722
 
723
+ try:
724
+ # 모든 모델을 미리 로드하여 완전한 기능 보장
725
+ initialize_seed_vc_models()
726
+ log_print("✓ All models initialized successfully!")
727
+ except Exception as e:
728
+ log_error(f"CRITICAL ERROR during model initialization: {e}")
729
+ log_error(f"Error type: {type(e).__name__}")
730
+ log_error("Full traceback:")
731
+ log_error(traceback.format_exc())
732
+ log_error("App will not start due to model initialization failure")
733
+ # 앱 시작 중단
734
+ sys.exit(1)
735
+
736
  # Create Gradio interface
737
  with gr.Blocks(title="Integrated TTS + Voice Conversion", analytics_enabled=False) as demo:
738
  gr.Markdown("""