cockolo terada commited on
Commit
6607eec
·
verified ·
1 Parent(s): b2f808d

Update gradio_tabs/single.py

Browse files
Files changed (1) hide show
  1. gradio_tabs/single.py +206 -124
gradio_tabs/single.py CHANGED
@@ -23,6 +23,11 @@ import uuid # 結合ファイルの一意な名前生成のために追加
23
 
24
  from typing import Dict, Any, List, Tuple, Optional, Set
25
 
 
 
 
 
 
26
  # (TTSModelHolder, MockTTSModelなどのモックやヘルパー関数は変更なしのため省略します)
27
  # --- タイムゾーン定義 ---
28
  # グローバルな定数としてJSTを定義
@@ -46,7 +51,10 @@ class TTSModelHolder:
46
  p.mkdir(parents=True, exist_ok=True)
47
  # 起動時に一度だけサンプルモデルを作成するロジック
48
  if not any(p.iterdir()):
49
- print("No models found in model_assets. Creating sample models...")
 
 
 
50
  # Sample Model 1
51
  model1_path = p / "MyModel1"
52
  model1_path.mkdir(parents=True, exist_ok=True)
@@ -74,7 +82,10 @@ class TTSModelHolder:
74
  json.dump(style_settings_data, f, indent=2, ensure_ascii=False)
75
 
76
  # FNモデル (FN1-10)
77
- print("Creating FN models (FN1-10)...")
 
 
 
78
  for i in range(1, 11):
79
  fn_path = p / f"FN{i}"
80
  fn_path.mkdir(exist_ok=True)
@@ -83,7 +94,10 @@ class TTSModelHolder:
83
  json.dump({"data": {"style2id": {"Neutral": 0}}}, f)
84
 
85
  # whisperモデル (非表示用)
86
- print("Creating 'whisper' model...")
 
 
 
87
  whisper_path = p / "whisper"
88
  whisper_path.mkdir(exist_ok=True)
89
  (whisper_path / "G_0.safetensors").touch()
@@ -97,21 +111,33 @@ class TTSModelHolder:
97
  """
98
  if self.root_dir.is_dir():
99
  self.model_names = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
100
- print(f"TTSModelHolder model list refreshed. Known models: {self.model_names}")
 
 
 
101
  else:
102
  self.model_names = []
103
- print("TTSModelHolder root directory not found.")
 
 
 
104
  return self.model_names
105
 
106
  def get_model(self, model_name, model_path):
107
- print(f"Loading model: {model_name} (file: {Path(model_path).name})")
 
 
 
108
  if model_name not in self.model_names:
109
  error_msg = (
110
  f"Model '{model_name}' is not in the known list of TTSModelHolder. "
111
  f"Current list: {self.model_names}. "
112
  "Please refresh the model list by toggling the symlink checkbox or clicking the refresh button."
113
  )
114
- print(f"[ERROR] {error_msg}")
 
 
 
115
  raise ValueError(error_msg)
116
 
117
  self.current_model = MockTTSModel()
@@ -123,7 +149,10 @@ class MockTTSModel:
123
 
124
  def infer(self, text, **kwargs):
125
  length_scale = kwargs.get('length', 1.0)
126
- print(f"Inferencing with text '{text}' and style: {kwargs.get('style')} and weight: {kwargs.get('style_weight')}, length_scale: {length_scale}")
 
 
 
127
  sampling_rate = 44100
128
  base_duration = max(1, len(text) // 5)
129
  duration = base_duration * length_scale
@@ -208,7 +237,10 @@ def sort_models_by_custom_order(model_list: List[str], custom_order: List[str])
208
 
209
  def set_random_seed(seed: int):
210
  if seed >= 0:
211
- print(f"Setting random seed to: {seed}")
 
 
 
212
  torch.manual_seed(seed)
213
  if torch.cuda.is_available():
214
  torch.cuda.manual_seed(seed)
@@ -358,7 +390,8 @@ def load_styles_from_model_folder(model_asset_path: Path) -> Dict[str, Any]:
358
  for style_name in style2id.keys():
359
  final_styles[style_name] = {"display_name": style_name, "weight": DEFAULT_STYLE_WEIGHT}
360
  except Exception as e:
361
- print(f"Warning: Failed to load or parse {config_path}: {e}")
 
362
  custom_style_config_path = model_asset_path / STYLE_CONFIG_FILENAME_IN_MODEL_DIR
363
  if custom_style_config_path.exists():
364
  try:
@@ -370,7 +403,8 @@ def load_styles_from_model_folder(model_asset_path: Path) -> Dict[str, Any]:
370
  final_styles.setdefault(style_key, {})
371
  final_styles[style_key].update(style_info)
372
  except Exception as e:
373
- print(f"Warning: Failed to load or parse {custom_style_config_path}: {e}")
 
374
  if not final_styles or DEFAULT_STYLE not in final_styles:
375
  final_styles[DEFAULT_STYLE] = {"display_name": DEFAULT_STYLE, "weight": DEFAULT_STYLE_WEIGHT}
376
  return final_styles
@@ -389,17 +423,22 @@ def process_single_synthesis_webui(
389
  current_model_file_path = Path(current_model_file_path_str)
390
  log_messages = []
391
  set_random_seed(seed_arg)
392
- if seed_arg >= 0:
 
393
  log_messages.append(f"乱数シードを {seed_arg} に固定しました。")
 
394
  try:
395
  model_holder_ref.get_model(current_model_name, current_model_file_path)
396
  if model_holder_ref.current_model is None:
397
  msg = f"モデルのロード失敗: {current_model_name} (ファイル: {current_model_file_path.name})"
398
- log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
399
- log_messages.append(f"使用モデル: {current_model_name} (ファイル: {current_model_file_path.name})")
 
 
 
400
  except Exception as e:
401
  msg = f"モデルロードエラー '{current_model_name}' (ファイル: {current_model_file_path.name}): {e}"
402
- log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
403
  speaker_id = 0
404
  if model_holder_ref.current_model and hasattr(model_holder_ref.current_model, 'spk2id'):
405
  model_spk2id = model_holder_ref.current_model.spk2id
@@ -407,7 +446,10 @@ def process_single_synthesis_webui(
407
  speaker_id = model_spk2id[speaker_name_arg]
408
  elif model_spk2id:
409
  speaker_id = list(model_spk2id.values())[0]
410
- log_messages.append(f"音声合成中...")
 
 
 
411
  start_time_synth = datetime.datetime.now(JST)
412
  try:
413
  length_for_model = 1.0 / length_scale_arg if length_scale_arg != 0 else 1.0
@@ -421,11 +463,14 @@ def process_single_synthesis_webui(
421
  speaker_id=speaker_id, pitch_scale=pitch_scale_arg, intonation_scale=intonation_scale_arg,
422
  )
423
  except (InvalidToneError, ValueError) as e:
424
- msg = f"合成エラー: {e}"; log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
425
  except Exception as e:
426
- msg = f"予期せぬエラー: {e}"; log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
427
  duration_synth = (datetime.datetime.now(JST) - start_time_synth).total_seconds()
428
- log_messages.append(f"音声合成成功。音声長: {len(audio_data)/sr:.2f}s, 処理時間: {duration_synth:.2f}s.")
 
 
 
429
  return True, log_messages, (sr, audio_data)
430
 
431
 
@@ -438,11 +483,14 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
438
  MERGER_CACHE_PATH.mkdir(parents=True, exist_ok=True)
439
  is_merger_cache_available = MERGER_CACHE_PATH.is_dir()
440
  if is_merger_cache_available:
441
- print(f"Merger cache directory is available at: {MERGER_CACHE_PATH}")
 
442
  else:
443
- print(f"Warning: Merger cache path {MERGER_CACHE_PATH} exists but is not a directory.")
 
444
  except OSError as e:
445
- print(f"Warning: Could not create or access merger cache directory {MERGER_CACHE_PATH}: {e}")
 
446
 
447
  NORMAL_MODE_MODEL_ORDER = [
448
  "mikeneko",
@@ -470,9 +518,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
470
  MAX_WORKBENCH_ITEMS = 8
471
 
472
  all_styles_data_state = gr.State({})
473
- # ▼▼▼ 変更: 生成されたWAVファイルのパスリストを保持するStateを追加 ▼▼▼
474
  synthesized_wav_files_state = gr.State([])
475
- # ▲▲▲ 変更 ▲▲▲
476
  workbench_state = gr.State([])
477
  merged_preview_state = gr.State({})
478
 
@@ -495,27 +541,24 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
495
  f"**Style:** {item['style']} (Weight: {item['style_weight']:.2f})"
496
  )
497
 
498
- # ▼▼▼ 変更: プレーヤーにはMP3、ダウンロードにはWAVを割り当て ▼▼▼
499
  wav_path = item['audio_path']
500
  mp3_path = str(Path(wav_path).with_suffix('.mp3'))
501
- # MP3がなければフォールバックとしてWAVを再生
502
  playback_path = mp3_path if Path(mp3_path).exists() else wav_path
503
 
504
  updates.extend([
505
- gr.update(visible=True), # Container Column
506
- gr.update(value=f"**{i+1}**"), # Item Number Display
507
- gr.update(value=playback_path), # Audio (プレーヤー用)
508
- gr.update(value=wav_path, visible=True),# Download Button (ダウンロード用)
509
- gr.update(value=info_text) # Info Markdown
510
  ])
511
- # ▲▲▲ 変更 ▲▲▲
512
  else:
513
  updates.extend([
514
- gr.update(visible=False), # Container Column
515
- gr.update(value=""), # Item Number Display
516
- gr.update(value=None), # Audio
517
- gr.update(value=None, visible=False), # Download Button
518
- gr.update(value="") # Info
519
  ])
520
  return tuple(updates)
521
 
@@ -665,7 +708,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
665
  try:
666
  item.unlink()
667
  except OSError as e:
668
- print(f"Failed to remove symlink {item}: {e}")
 
669
 
670
  if use_symlink_mode:
671
  if MERGER_CACHE_PATH.exists() and MERGER_CACHE_PATH.is_dir():
@@ -676,9 +720,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
676
  try:
677
  os.symlink(item, target_link)
678
  except OSError as e:
679
- print(f"Warning: Could not create symlink for {item.name}: {e}")
 
680
  else:
681
- print(f"Warning: Symlink mode is on, but {MERGER_CACHE_PATH} does not exist or is not a directory.")
 
682
 
683
  model_holder.refresh()
684
 
@@ -726,7 +772,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
726
  return gr.update(value=data.get("weight", DEFAULT_STYLE_WEIGHT))
727
  return gr.update(value=DEFAULT_STYLE_WEIGHT)
728
 
729
- # ▼▼▼ 変更: 音声合成処理をWAV/MP3両対応に修正 ▼▼▼
730
  def action_run_synthesis(
731
  model_name: Optional[str],
732
  style_display_name: Optional[str], style_weight_for_synth: float,
@@ -744,15 +789,15 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
744
  error_outputs.append(gr.update(visible=False)) # audio_output_area
745
  for _ in range(MAX_AUDIO_OUTPUTS):
746
  error_outputs.extend([
747
- gr.update(visible=False), # audio_item_columns
748
- gr.update(value=None), # audio_outputs
749
- gr.update(value=None, visible=False), # download_buttons
750
  ])
751
  for _ in range(ITEMS_PER_ROW - 1):
752
- error_outputs.append(gr.update(visible=False)) # dummy_audio_item_columns
753
  for _ in range(MAX_AUDIO_OUTPUTS):
754
- error_outputs.append("") # synthesized_text_states
755
- error_outputs.append([]) # synthesized_wav_files_state
756
 
757
  if re.search(INVALID_FILENAME_CHARS_PATTERN, text):
758
  found_chars = "".join(sorted(list(set(re.findall(INVALID_FILENAME_CHARS_PATTERN, text)))))
@@ -783,7 +828,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
783
  return tuple(error_outputs)
784
 
785
  actual_model_file_to_load = str(model_path / files[0])
786
- all_logs.append(f"[自動選択] 使用モデルファイル: {files[0]}")
 
787
 
788
  batch_count = int(batch_count)
789
  if batch_count <= 0: batch_count = 1
@@ -792,7 +838,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
792
  final_mp3_paths = []
793
  generated_texts = []
794
 
795
- # 共通のファイル保存ロジック
796
  def save_audio_files(audio_segment: AudioSegment, base_filename: str) -> Optional[Tuple[str, str]]:
797
  try:
798
  temp_dir = Path(tempfile.gettempdir())
@@ -809,7 +854,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
809
 
810
  return str(output_path_wav), str(output_path_mp3)
811
  except Exception as e:
812
- all_logs.append(f"[エラー] 一時音声ファイルの保存に失敗: {e}")
813
  return None
814
 
815
  if generation_mode == "発音ガチャ2":
@@ -817,13 +862,16 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
817
  ratio_list = [float(x.strip()) for x in random_text_ratio_str.split(',') if x.strip()]
818
  if not ratio_list:
819
  ratio_list = [0.5]
820
- all_logs.append("[警告] カタカナ化の割合に有効な数値が指定されなかったため、0.5 を使用します。")
821
  except ValueError:
822
  ratio_list = [0.5]
823
- all_logs.append("[警告] カタカナ化の割合の解析に失敗したため、0.5 を使用します。")
824
- all_logs.append(f"--- 発音ガチャ2 モード (pyopenjtalk) ---")
825
- internal_mode = int(random_text_mode) + 1
826
- all_logs.append(f"粒度: {random_text_mode} (内部モード: {internal_mode}), カタカナ化割合候補: {ratio_list}")
 
 
 
827
  generated_variations: Dict[str, List[str]] = {}
828
  max_attempts = batch_count * 20
829
  for _ in progress.tqdm(range(max_attempts), desc="テキストバリエーション生成中", total=max_attempts):
@@ -835,12 +883,14 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
835
  if final_text and final_text not in generated_variations:
836
  generated_variations[final_text] = processed_blocks_list
837
  if len(generated_variations) < batch_count:
838
- all_logs.append(f"[警告] {batch_count}個のユニークなテキストを生成できませんでした。({len(generated_variations)}個のみ生成)")
839
 
840
  for i, (final_text, processed_blocks_list) in enumerate(progress.tqdm(generated_variations.items(), desc=f"{len(generated_variations)}件の音声を生成中")):
841
- all_logs.append(f"--- 生成 {i+1}/{len(generated_variations)} ---")
842
- all_logs.append(f" 分割パターン: {' / '.join(processed_blocks_list)}")
843
- all_logs.append(f" 合成テキスト: \"{final_text[:50]}{'...' if len(final_text)>50 else ''}\"")
 
 
844
  success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, final_text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, -1, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
845
  all_logs.extend([f" {log}" for log in logs])
846
  if success and audio_tuple:
@@ -857,12 +907,15 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
857
  generated_texts.append(final_text)
858
 
859
  else: # 発音ガチャ1 モード
860
- all_logs.append("--- 発音ガチャ1 モード ---")
 
861
  start_seed = int(seed)
862
  for i in progress.tqdm(range(batch_count), desc=f"{batch_count}件の音声を生成中"):
863
  current_seed = start_seed + i if start_seed >= 0 else -1
864
- all_logs.append(f"--- 生成 {i+1}/{batch_count} (Seed: {current_seed if current_seed >= 0 else 'Random'}) ---")
865
- all_logs.append(f" 合成テキスト: \"{text[:50]}{'...' if len(text)>50 else ''}\"")
 
 
866
  success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, current_seed, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
867
  all_logs.extend([f" {log}" for log in logs])
868
  if success and audio_tuple:
@@ -877,13 +930,24 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
877
  final_wav_paths.append(saved_paths[0])
878
  final_mp3_paths.append(saved_paths[1])
879
  generated_texts.append(text)
880
-
881
- all_logs.append("--- 全ての生成が完了しました ---")
 
 
 
 
882
 
883
  final_outputs = []
884
- status_message = "\n".join(all_logs)
 
 
 
 
 
 
885
  final_outputs.append(status_message)
886
- num_generated = len(final_wav_paths)
 
887
  final_outputs.append(gr.update(visible=num_generated > 0))
888
 
889
  for i in range(MAX_AUDIO_OUTPUTS):
@@ -891,8 +955,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
891
  mp3_val = final_mp3_paths[i] if is_visible else None
892
  wav_val = final_wav_paths[i] if is_visible else None
893
  final_outputs.append(gr.update(visible=is_visible))
894
- final_outputs.append(gr.update(value=mp3_val)) # Audio (MP3)
895
- final_outputs.append(gr.update(value=wav_val, visible=is_visible)) # Download (WAV)
896
 
897
  num_dummies_needed = (ITEMS_PER_ROW - (num_generated % ITEMS_PER_ROW)) % ITEMS_PER_ROW if num_generated > 0 else 0
898
  for i in range(ITEMS_PER_ROW - 1):
@@ -902,11 +966,9 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
902
  text_val = generated_texts[i] if i < num_generated else ""
903
  final_outputs.append(text_val)
904
 
905
- final_outputs.append(final_wav_paths) # State用
906
  return tuple(final_outputs)
907
- # ▲▲▲ 変更 ▲▲▲
908
 
909
- # ▼▼▼ 変更: キープ追加処理をState経由のWAVパスで行うように修正 ▼▼▼
910
  def add_to_workbench(
911
  current_status: str,
912
  current_workbench_list: List[Dict],
@@ -917,20 +979,18 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
917
  safe_workbench_list = current_workbench_list or []
918
  if not wav_audio_path or not Path(wav_audio_path).exists():
919
  log_messages.append("⚠️ [キープ追加エラー] 追加する音声ファイル(WAV)が見つかりません。")
920
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
921
  return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
922
 
923
- # 内部的にはWAVパスで同一性をチェック
924
  if any(item['audio_path'] == wav_audio_path for item in safe_workbench_list):
925
  log_messages.append("ℹ️ この音声はすでにキープに存在します。")
926
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
927
  return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
928
 
929
  display_model_name = model
930
  parsed_result = parse_merged_model_name(model)
931
  if parsed_result: display_model_name, _ = parsed_result
932
 
933
- # StateにはWAVパスを保存
934
  new_item = {"audio_path": wav_audio_path, "text": text, "model": display_model_name, "original_models": [model], "style": style_display_name, "style_weight": style_weight, "timestamp": datetime.datetime.now(JST).isoformat(), "is_merged": False}
935
  updated_list = safe_workbench_list + [new_item]
936
 
@@ -941,30 +1001,36 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
941
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
942
  if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
943
  if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
944
- except Exception as e: print(f"Warning: Failed to delete old workbench audio file: {e}")
 
 
945
  log_messages.append(f"ℹ️ キープのアイテムが最大数({MAX_WORKBENCH_ITEMS})に達したため、一番古いアイテムを削除しました。")
946
 
947
  ui_updates = update_workbench_ui(updated_list)
948
  log_messages.append("✅ キープに音声を追加しました。")
949
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
 
 
 
 
 
 
950
  return (final_status, updated_list) + ui_updates
951
- # ▲▲▲ 変更 ▲▲▲
952
 
953
- # ▼▼▼ 変更: ファイル削除時にMP3も削除 ▼▼▼
954
  def remove_from_workbench(current_status: str, index_to_remove: int, current_workbench_list: List[Dict]) -> Tuple:
955
  log_messages = []
956
  safe_workbench_list = current_workbench_list or []
957
- if not (0 <= index_to_remove < len(safe_workbench_list)): return (current_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
 
 
958
 
959
  item_to_remove = safe_workbench_list[index_to_remove]
960
  try:
961
  path_to_delete_wav = Path(item_to_remove['audio_path'])
962
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
963
 
964
- # WAVファイルの削除
965
  if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir():
966
  path_to_delete_wav.unlink()
967
- # MP3ファイルの削除
968
  if path_to_delete_mp3.exists():
969
  path_to_delete_mp3.unlink()
970
  log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除し、一時ファイル(WAV/MP3)をクリーンアップしました。")
@@ -972,36 +1038,41 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
972
  log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(ファイルは保持: {path_to_delete_wav.name})")
973
  else:
974
  log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(関連ファイルなし)")
975
-
976
  except Exception as e: log_messages.append(f"⚠️ キープのアイテム #{index_to_remove + 1} のファイル削除中にエラー: {e}")
977
 
978
  updated_list = [item for i, item in enumerate(safe_workbench_list) if i != index_to_remove]
979
  ui_updates = update_workbench_ui(updated_list)
980
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
 
 
 
 
 
 
981
  return (final_status, updated_list) + ui_updates
982
- # ▲▲▲ 変更 ▲▲▲
983
 
984
- # ▼▼▼ 変更: 結合プレビューもWAV/MP3両対応に ▼▼▼
985
  def action_merge_preview(current_status: str, first_audio_num: int, second_audio_num: int, pause_ms: int, workbench_list: List[Dict], progress=gr.Progress(track_tqdm=True)):
986
  log_messages = []
987
- error_return = (
988
- (current_status + "\n" + "\n".join(log_messages)).strip(),
989
- None,
990
- gr.update(value=None, visible=False),
991
- {}
992
- )
 
 
993
  if not workbench_list:
994
  log_messages.append("⚠️ [結合プレビュー警告] キープに音声がありません。")
995
- return error_return
996
  idx1, idx2 = int(first_audio_num) - 1, int(second_audio_num) - 1
997
  if not (0 <= idx1 < len(workbench_list) and 0 <= idx2 < len(workbench_list)):
998
  log_messages.append(f"⚠️ [結合プレビュー警告] 指定された番号(#{first_audio_num}, #{second_audio_num})の音声が見つかりません。")
999
- return error_return
1000
  item1, item2 = workbench_list[idx1], workbench_list[idx2]
1001
  audio_path1, audio_path2 = item1.get("audio_path"), item2.get("audio_path")
1002
  if not audio_path1 or not Path(audio_path1).exists() or not audio_path2 or not Path(audio_path2).exists():
1003
  log_messages.append("❌ [結合プレビューエラー] 音声��ァイル(WAV)が見つかりません。ファイルが削除された可能性があります。")
1004
- return error_return
1005
 
1006
  progress(0, desc="結合準備中...")
1007
  try:
@@ -1009,7 +1080,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1009
  pause_duration = int(pause_ms)
1010
  if pause_duration >= 0:
1011
  combined_audio = segment1 + AudioSegment.silent(duration=pause_duration) + segment2
1012
- log_messages.append(f"音声 #{first_audio_num} と #{second_audio_num} を {pause_duration}ms のポーズを挟んで結合しました。")
1013
  else:
1014
  overlap_duration = abs(pause_duration)
1015
  max_possible_overlap = min(len(segment1), len(segment2))
@@ -1018,11 +1089,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1018
  overlap_duration = max_possible_overlap
1019
  combined_audio = AudioSegment.silent(duration=len(segment1) + len(segment2) - overlap_duration)
1020
  combined_audio = combined_audio.overlay(segment1, position=0).overlay(segment2, position=len(segment1) - overlap_duration)
1021
- log_messages.append(f"音声 #{first_audio_num} と #{second_audio_num} を {overlap_duration}ms 重ねて(オーバーレイして)結合しました。")
1022
  progress(1, desc="結合完了")
1023
  except Exception as e:
1024
  log_messages.append(f"❌ [結合プレビューエラー] 音声の結合中にエラーが発生しました: {e}")
1025
- return error_return
1026
 
1027
  base_filename = f"merged_preview_{uuid.uuid4().hex[:8]}"
1028
  temp_dir = Path(tempfile.gettempdir())
@@ -1037,30 +1108,37 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1037
  original_models1, original_models2 = item1.get('original_models', []), item2.get('original_models', [])
1038
  all_original_models = set(original_models1 + original_models2)
1039
 
1040
- # StateにはWAVパスを保存
1041
  metadata = {"text": f"{item1.get('text', '')} | {item2.get('text', '')}", "display_models": sorted(list(all_display_models)), "original_models": sorted(list(all_original_models)), "audio_path": str(wav_temp_path), "timestamp": datetime.datetime.now(JST).isoformat()}
1042
  log_messages.append("✅ 結合プレビューが生成されました。")
1043
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1044
 
1045
- # プレーヤーにはMP3、ダウンロードボタンにはWAVを渡す
 
 
 
 
 
1046
  return final_status, str(mp3_temp_path), gr.update(value=str(wav_temp_path), visible=True), metadata
1047
- # ▲▲▲ 変更 ▲▲▲
1048
 
1049
- # ▼▼▼ 変更: 元ファイル削除時にMP3も削除 ▼▼▼
1050
  def action_add_merged_to_workbench(current_status: str, preview_data: Dict, current_workbench_list: List[Dict], delete_originals: bool, first_audio_num: int, second_audio_num: int) -> Tuple:
1051
  log_messages = []
1052
  safe_workbench_list = current_workbench_list or []
 
 
 
 
 
 
 
 
 
1053
  if not preview_data or "audio_path" not in preview_data:
1054
  log_messages.append("⚠️ [キープ追加エラー] 追加する結合済み音声がありません。先にプレビューを生成してください。")
1055
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1056
- return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
1057
 
1058
- # preview_data["audio_path"] はWAVのパス
1059
  src_path = Path(preview_data["audio_path"])
1060
  if not src_path.exists():
1061
  log_messages.append("⚠️ [キープ追加エラー] 結合済み音声ファイルが見つかりません。")
1062
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1063
- return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
1064
 
1065
  new_merged_item = {"audio_path": str(src_path), "text": preview_data.get("text", "N/A"), "model": " | ".join(preview_data.get("display_models", [])), "original_models": preview_data.get("original_models", []), "style": "N/A", "style_weight": 0.0, "timestamp": preview_data.get("timestamp"), "is_merged": True}
1066
  final_workbench_list = []
@@ -1076,10 +1154,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1076
  try:
1077
  path_to_delete_wav = Path(item_to_remove['audio_path'])
1078
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
1079
- if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir():
1080
- path_to_delete_wav.unlink()
1081
- if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir():
1082
- path_to_delete_mp3.unlink()
1083
  except Exception as e: log_messages.append(f"⚠️ 元の音声ファイル削除中にエラー: {e}")
1084
 
1085
  final_workbench_list = [new_merged_item] + remaining_list
@@ -1095,16 +1171,23 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1095
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
1096
  if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
1097
  if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
1098
- except Exception as e: print(f"Warning: Failed to delete old workbench audio file: {e}")
 
 
1099
  log_messages.append(f"ℹ️ キープが最大数({MAX_WORKBENCH_ITEMS})に達したため一番古いアイテムを削除しました。")
1100
 
1101
  ui_updates = update_workbench_ui(final_workbench_list)
1102
- final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
 
 
 
 
 
 
1103
  return (final_status, final_workbench_list) + ui_updates
1104
- # ▲▲▲ 変更 ▲▲▲
1105
 
1106
 
1107
- # --- イベントリスナー接続 (一部変更あり) ---
1108
  def on_fn_mode_change(is_fn_mode_on: bool) -> gr.Checkbox:
1109
  if is_fn_mode_on: return gr.update(value=False)
1110
  return gr.update()
@@ -1124,14 +1207,12 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1124
  current_styles_dropdown.change(on_style_dropdown_select, inputs=[current_styles_dropdown, all_styles_data_state], outputs=[style_weight_for_synth_slider])
1125
  use_assist_text_checkbox.change(lambda x: (gr.update(visible=x), gr.update(visible=x)), inputs=[use_assist_text_checkbox], outputs=[assist_text_textbox, assist_text_weight_slider])
1126
 
1127
- # ▼▼▼ 変更: generate_button の出力に State を追加 ▼▼▼
1128
  generate_outputs = [status_textbox, audio_output_area]
1129
  for i in range(MAX_AUDIO_OUTPUTS):
1130
  generate_outputs.extend([audio_item_columns[i], audio_outputs[i], download_buttons[i]])
1131
  generate_outputs.extend(dummy_audio_item_columns)
1132
  generate_outputs.extend(synthesized_text_states)
1133
- generate_outputs.append(synthesized_wav_files_state) # Stateを追加
1134
- # ▲▲▲ 変更 ▲▲▲
1135
 
1136
  generate_button.click(
1137
  fn=action_run_synthesis,
@@ -1150,23 +1231,21 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1150
  outputs=generate_outputs
1151
  )
1152
 
1153
- # ▼▼▼ 変更: to_workbench_button の入力と呼び出し方を修正 ▼▼▼
1154
  for i in range(MAX_AUDIO_OUTPUTS):
1155
  to_workbench_buttons[i].click(
1156
  fn=lambda current_status, workbench_list, text, model, style_display, style_weight, all_wavs, idx=i: \
1157
  add_to_workbench(
1158
  current_status, workbench_list,
1159
- all_wavs[idx] if all_wavs and idx < len(all_wavs) else None, # WAVパスを渡す
1160
  text, model, style_display, style_weight
1161
  ),
1162
  inputs=[
1163
  status_textbox, workbench_state, synthesized_text_states[i],
1164
  selected_model_dropdown, current_styles_dropdown, style_weight_for_synth_slider,
1165
- synthesized_wav_files_state # Stateを入力に追加
1166
  ],
1167
  outputs=[status_textbox, workbench_state] + all_workbench_ui_components
1168
  )
1169
- # ▲▲▲ 変更 ▲▲▲
1170
 
1171
  for i, item in enumerate(workbench_items):
1172
  item["delete_btn"].click(
@@ -1210,7 +1289,10 @@ if __name__ == "__main__":
1210
 
1211
  merger_cache_path = Path("/tmp/sbv2_merger_cache")
1212
  mock_model_holder = TTSModelHolder()
1213
- print(f"Initial models loaded by TTSModelHolder: {mock_model_holder.model_names}")
 
 
 
1214
 
1215
  app = create_synthesis_app(mock_model_holder)
1216
 
 
23
 
24
  from typing import Dict, Any, List, Tuple, Optional, Set
25
 
26
+ # --- ログ設定 ---
27
+ # TrueにするとターミナルとUIに詳細なログが出力されます。
28
+ # Falseにすると、エラーや重要な通知以外のログは抑制されます。
29
+ ENABLE_LOGGING = False
30
+
31
  # (TTSModelHolder, MockTTSModelなどのモックやヘルパー関数は変更なしのため省略します)
32
  # --- タイムゾーン定義 ---
33
  # グローバルな定数としてJSTを定義
 
51
  p.mkdir(parents=True, exist_ok=True)
52
  # 起動時に一度だけサンプルモデルを作成するロジック
53
  if not any(p.iterdir()):
54
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
55
+ if ENABLE_LOGGING:
56
+ print("No models found in model_assets. Creating sample models...")
57
+ # ▲▲▲ 変更 ▲▲▲
58
  # Sample Model 1
59
  model1_path = p / "MyModel1"
60
  model1_path.mkdir(parents=True, exist_ok=True)
 
82
  json.dump(style_settings_data, f, indent=2, ensure_ascii=False)
83
 
84
  # FNモデル (FN1-10)
85
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
86
+ if ENABLE_LOGGING:
87
+ print("Creating FN models (FN1-10)...")
88
+ # ▲▲▲ 変更 ▲▲▲
89
  for i in range(1, 11):
90
  fn_path = p / f"FN{i}"
91
  fn_path.mkdir(exist_ok=True)
 
94
  json.dump({"data": {"style2id": {"Neutral": 0}}}, f)
95
 
96
  # whisperモデル (非表示用)
97
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
98
+ if ENABLE_LOGGING:
99
+ print("Creating 'whisper' model...")
100
+ # ▲▲▲ 変更 ▲▲▲
101
  whisper_path = p / "whisper"
102
  whisper_path.mkdir(exist_ok=True)
103
  (whisper_path / "G_0.safetensors").touch()
 
111
  """
112
  if self.root_dir.is_dir():
113
  self.model_names = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
114
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
115
+ if ENABLE_LOGGING:
116
+ print(f"TTSModelHolder model list refreshed. Known models: {self.model_names}")
117
+ # ▲▲▲ 変更 ▲▲▲
118
  else:
119
  self.model_names = []
120
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
121
+ if ENABLE_LOGGING:
122
+ print("TTSModelHolder root directory not found.")
123
+ # ▲▲▲ 変更 ▲▲▲
124
  return self.model_names
125
 
126
  def get_model(self, model_name, model_path):
127
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
128
+ if ENABLE_LOGGING:
129
+ print(f"Loading model: {model_name} (file: {Path(model_path).name})")
130
+ # ▲▲▲ 変更 ▲▲▲
131
  if model_name not in self.model_names:
132
  error_msg = (
133
  f"Model '{model_name}' is not in the known list of TTSModelHolder. "
134
  f"Current list: {self.model_names}. "
135
  "Please refresh the model list by toggling the symlink checkbox or clicking the refresh button."
136
  )
137
+ # ▼▼▼ 変更: printをエラーなので残すか、制御するか検討。ここでは制御対象に含める。▼▼▼
138
+ if ENABLE_LOGGING:
139
+ print(f"[ERROR] {error_msg}")
140
+ # ▲▲▲ 変更 ▲▲▲
141
  raise ValueError(error_msg)
142
 
143
  self.current_model = MockTTSModel()
 
149
 
150
  def infer(self, text, **kwargs):
151
  length_scale = kwargs.get('length', 1.0)
152
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
153
+ if ENABLE_LOGGING:
154
+ print(f"Inferencing with text '{text}' and style: {kwargs.get('style')} and weight: {kwargs.get('style_weight')}, length_scale: {length_scale}")
155
+ # ▲▲▲ 変更 ▲▲▲
156
  sampling_rate = 44100
157
  base_duration = max(1, len(text) // 5)
158
  duration = base_duration * length_scale
 
237
 
238
  def set_random_seed(seed: int):
239
  if seed >= 0:
240
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
241
+ if ENABLE_LOGGING:
242
+ print(f"Setting random seed to: {seed}")
243
+ # ▲▲▲ 変更 ▲▲▲
244
  torch.manual_seed(seed)
245
  if torch.cuda.is_available():
246
  torch.cuda.manual_seed(seed)
 
390
  for style_name in style2id.keys():
391
  final_styles[style_name] = {"display_name": style_name, "weight": DEFAULT_STYLE_WEIGHT}
392
  except Exception as e:
393
+ if ENABLE_LOGGING:
394
+ print(f"Warning: Failed to load or parse {config_path}: {e}")
395
  custom_style_config_path = model_asset_path / STYLE_CONFIG_FILENAME_IN_MODEL_DIR
396
  if custom_style_config_path.exists():
397
  try:
 
403
  final_styles.setdefault(style_key, {})
404
  final_styles[style_key].update(style_info)
405
  except Exception as e:
406
+ if ENABLE_LOGGING:
407
+ print(f"Warning: Failed to load or parse {custom_style_config_path}: {e}")
408
  if not final_styles or DEFAULT_STYLE not in final_styles:
409
  final_styles[DEFAULT_STYLE] = {"display_name": DEFAULT_STYLE, "weight": DEFAULT_STYLE_WEIGHT}
410
  return final_styles
 
423
  current_model_file_path = Path(current_model_file_path_str)
424
  log_messages = []
425
  set_random_seed(seed_arg)
426
+ # ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
427
+ if seed_arg >= 0 and ENABLE_LOGGING:
428
  log_messages.append(f"乱数シードを {seed_arg} に固定しました。")
429
+ # ▲▲▲ 変更 ▲▲▲
430
  try:
431
  model_holder_ref.get_model(current_model_name, current_model_file_path)
432
  if model_holder_ref.current_model is None:
433
  msg = f"モデルのロード失敗: {current_model_name} (ファイル: {current_model_file_path.name})"
434
+ log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
435
+ # ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
436
+ if ENABLE_LOGGING:
437
+ log_messages.append(f"使用モデル: {current_model_name} (ファイル: {current_model_file_path.name})")
438
+ # ▲▲▲ 変更 ▲▲▲
439
  except Exception as e:
440
  msg = f"モデルロードエラー '{current_model_name}' (ファイル: {current_model_file_path.name}): {e}"
441
+ log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
442
  speaker_id = 0
443
  if model_holder_ref.current_model and hasattr(model_holder_ref.current_model, 'spk2id'):
444
  model_spk2id = model_holder_ref.current_model.spk2id
 
446
  speaker_id = model_spk2id[speaker_name_arg]
447
  elif model_spk2id:
448
  speaker_id = list(model_spk2id.values())[0]
449
+ # ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
450
+ if ENABLE_LOGGING:
451
+ log_messages.append(f"音声合成中...")
452
+ # ▲▲▲ 変更 ▲▲▲
453
  start_time_synth = datetime.datetime.now(JST)
454
  try:
455
  length_for_model = 1.0 / length_scale_arg if length_scale_arg != 0 else 1.0
 
463
  speaker_id=speaker_id, pitch_scale=pitch_scale_arg, intonation_scale=intonation_scale_arg,
464
  )
465
  except (InvalidToneError, ValueError) as e:
466
+ msg = f"合成エラー: {e}"; log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
467
  except Exception as e:
468
+ msg = f"予期せぬエラー: {e}"; log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
469
  duration_synth = (datetime.datetime.now(JST) - start_time_synth).total_seconds()
470
+ # ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
471
+ if ENABLE_LOGGING:
472
+ log_messages.append(f"音声��成成功。音声長: {len(audio_data)/sr:.2f}s, 処理時間: {duration_synth:.2f}s.")
473
+ # ▲▲▲ 変更 ▲▲▲
474
  return True, log_messages, (sr, audio_data)
475
 
476
 
 
483
  MERGER_CACHE_PATH.mkdir(parents=True, exist_ok=True)
484
  is_merger_cache_available = MERGER_CACHE_PATH.is_dir()
485
  if is_merger_cache_available:
486
+ if ENABLE_LOGGING:
487
+ print(f"Merger cache directory is available at: {MERGER_CACHE_PATH}")
488
  else:
489
+ if ENABLE_LOGGING:
490
+ print(f"Warning: Merger cache path {MERGER_CACHE_PATH} exists but is not a directory.")
491
  except OSError as e:
492
+ if ENABLE_LOGGING:
493
+ print(f"Warning: Could not create or access merger cache directory {MERGER_CACHE_PATH}: {e}")
494
 
495
  NORMAL_MODE_MODEL_ORDER = [
496
  "mikeneko",
 
518
  MAX_WORKBENCH_ITEMS = 8
519
 
520
  all_styles_data_state = gr.State({})
 
521
  synthesized_wav_files_state = gr.State([])
 
522
  workbench_state = gr.State([])
523
  merged_preview_state = gr.State({})
524
 
 
541
  f"**Style:** {item['style']} (Weight: {item['style_weight']:.2f})"
542
  )
543
 
 
544
  wav_path = item['audio_path']
545
  mp3_path = str(Path(wav_path).with_suffix('.mp3'))
 
546
  playback_path = mp3_path if Path(mp3_path).exists() else wav_path
547
 
548
  updates.extend([
549
+ gr.update(visible=True),
550
+ gr.update(value=f"**{i+1}**"),
551
+ gr.update(value=playback_path),
552
+ gr.update(value=wav_path, visible=True),
553
+ gr.update(value=info_text)
554
  ])
 
555
  else:
556
  updates.extend([
557
+ gr.update(visible=False),
558
+ gr.update(value=""),
559
+ gr.update(value=None),
560
+ gr.update(value=None, visible=False),
561
+ gr.update(value="")
562
  ])
563
  return tuple(updates)
564
 
 
708
  try:
709
  item.unlink()
710
  except OSError as e:
711
+ if ENABLE_LOGGING:
712
+ print(f"Failed to remove symlink {item}: {e}")
713
 
714
  if use_symlink_mode:
715
  if MERGER_CACHE_PATH.exists() and MERGER_CACHE_PATH.is_dir():
 
720
  try:
721
  os.symlink(item, target_link)
722
  except OSError as e:
723
+ if ENABLE_LOGGING:
724
+ print(f"Warning: Could not create symlink for {item.name}: {e}")
725
  else:
726
+ if ENABLE_LOGGING:
727
+ print(f"Warning: Symlink mode is on, but {MERGER_CACHE_PATH} does not exist or is not a directory.")
728
 
729
  model_holder.refresh()
730
 
 
772
  return gr.update(value=data.get("weight", DEFAULT_STYLE_WEIGHT))
773
  return gr.update(value=DEFAULT_STYLE_WEIGHT)
774
 
 
775
  def action_run_synthesis(
776
  model_name: Optional[str],
777
  style_display_name: Optional[str], style_weight_for_synth: float,
 
789
  error_outputs.append(gr.update(visible=False)) # audio_output_area
790
  for _ in range(MAX_AUDIO_OUTPUTS):
791
  error_outputs.extend([
792
+ gr.update(visible=False),
793
+ gr.update(value=None),
794
+ gr.update(value=None, visible=False),
795
  ])
796
  for _ in range(ITEMS_PER_ROW - 1):
797
+ error_outputs.append(gr.update(visible=False))
798
  for _ in range(MAX_AUDIO_OUTPUTS):
799
+ error_outputs.append("")
800
+ error_outputs.append([])
801
 
802
  if re.search(INVALID_FILENAME_CHARS_PATTERN, text):
803
  found_chars = "".join(sorted(list(set(re.findall(INVALID_FILENAME_CHARS_PATTERN, text)))))
 
828
  return tuple(error_outputs)
829
 
830
  actual_model_file_to_load = str(model_path / files[0])
831
+ if ENABLE_LOGGING:
832
+ all_logs.append(f"[自動選択] 使用モデルファイル: {files[0]}")
833
 
834
  batch_count = int(batch_count)
835
  if batch_count <= 0: batch_count = 1
 
838
  final_mp3_paths = []
839
  generated_texts = []
840
 
 
841
  def save_audio_files(audio_segment: AudioSegment, base_filename: str) -> Optional[Tuple[str, str]]:
842
  try:
843
  temp_dir = Path(tempfile.gettempdir())
 
854
 
855
  return str(output_path_wav), str(output_path_mp3)
856
  except Exception as e:
857
+ all_logs.append(f"[エラー] 一時音声ファイルの保存に失敗: {e}")
858
  return None
859
 
860
  if generation_mode == "発音ガチャ2":
 
862
  ratio_list = [float(x.strip()) for x in random_text_ratio_str.split(',') if x.strip()]
863
  if not ratio_list:
864
  ratio_list = [0.5]
865
+ all_logs.append("⚠️ [警告] カタカナ化の割合に有効な数値が指定されなかったため、0.5 を使用します。")
866
  except ValueError:
867
  ratio_list = [0.5]
868
+ all_logs.append("⚠️ [警告] カタカナ化の割合の解析に失敗したため、0.5 を使用します。")
869
+
870
+ if ENABLE_LOGGING:
871
+ all_logs.append(f"--- 発音ガチャ2 モード (pyopenjtalk) ---")
872
+ internal_mode = int(random_text_mode) + 1
873
+ all_logs.append(f"粒度: {random_text_mode} (内部モード: {internal_mode}), カタカナ化割合候補: {ratio_list}")
874
+
875
  generated_variations: Dict[str, List[str]] = {}
876
  max_attempts = batch_count * 20
877
  for _ in progress.tqdm(range(max_attempts), desc="テキストバリエーション生成中", total=max_attempts):
 
883
  if final_text and final_text not in generated_variations:
884
  generated_variations[final_text] = processed_blocks_list
885
  if len(generated_variations) < batch_count:
886
+ all_logs.append(f"⚠️ [警告] {batch_count}個のユニークなテキストを生成できませんでした。({len(generated_variations)}個のみ生成)")
887
 
888
  for i, (final_text, processed_blocks_list) in enumerate(progress.tqdm(generated_variations.items(), desc=f"{len(generated_variations)}件の音声を生成中")):
889
+ if ENABLE_LOGGING:
890
+ all_logs.append(f"--- 生成 {i+1}/{len(generated_variations)} ---")
891
+ all_logs.append(f" 分割パターン: {' / '.join(processed_blocks_list)}")
892
+ all_logs.append(f" ┗ 合成テキスト: \"{final_text[:50]}{'...' if len(final_text)>50 else ''}\"")
893
+
894
  success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, final_text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, -1, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
895
  all_logs.extend([f" {log}" for log in logs])
896
  if success and audio_tuple:
 
907
  generated_texts.append(final_text)
908
 
909
  else: # 発音ガチャ1 モード
910
+ if ENABLE_LOGGING:
911
+ all_logs.append("--- 発音ガチャ1 モード ---")
912
  start_seed = int(seed)
913
  for i in progress.tqdm(range(batch_count), desc=f"{batch_count}件の音声を生成中"):
914
  current_seed = start_seed + i if start_seed >= 0 else -1
915
+ if ENABLE_LOGGING:
916
+ all_logs.append(f"--- 生成 {i+1}/{batch_count} (Seed: {current_seed if current_seed >= 0 else 'Random'}) ---")
917
+ all_logs.append(f" ┗ 合成テキスト: \"{text[:50]}{'...' if len(text)>50 else ''}\"")
918
+
919
  success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, current_seed, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
920
  all_logs.extend([f" {log}" for log in logs])
921
  if success and audio_tuple:
 
930
  final_wav_paths.append(saved_paths[0])
931
  final_mp3_paths.append(saved_paths[1])
932
  generated_texts.append(text)
933
+
934
+ num_generated = len(final_wav_paths)
935
+ if num_generated > 0:
936
+ all_logs.append(f"✅ 合計 {num_generated} 件の音声合成が完了しました。")
937
+ else:
938
+ all_logs.append("ℹ️ 音声は生成されませんでした。")
939
 
940
  final_outputs = []
941
+
942
+ # ▼▼▼ 変更: ログ表示をENABLE_LOGGINGで制御 ▼▼▼
943
+ if ENABLE_LOGGING:
944
+ status_message = "\n".join(all_logs)
945
+ else:
946
+ essential_logs = [log for log in all_logs if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
947
+ status_message = "\n".join(essential_logs)
948
  final_outputs.append(status_message)
949
+ # ▲▲▲ 変更 ▲▲▲
950
+
951
  final_outputs.append(gr.update(visible=num_generated > 0))
952
 
953
  for i in range(MAX_AUDIO_OUTPUTS):
 
955
  mp3_val = final_mp3_paths[i] if is_visible else None
956
  wav_val = final_wav_paths[i] if is_visible else None
957
  final_outputs.append(gr.update(visible=is_visible))
958
+ final_outputs.append(gr.update(value=mp3_val))
959
+ final_outputs.append(gr.update(value=wav_val, visible=is_visible))
960
 
961
  num_dummies_needed = (ITEMS_PER_ROW - (num_generated % ITEMS_PER_ROW)) % ITEMS_PER_ROW if num_generated > 0 else 0
962
  for i in range(ITEMS_PER_ROW - 1):
 
966
  text_val = generated_texts[i] if i < num_generated else ""
967
  final_outputs.append(text_val)
968
 
969
+ final_outputs.append(final_wav_paths)
970
  return tuple(final_outputs)
 
971
 
 
972
  def add_to_workbench(
973
  current_status: str,
974
  current_workbench_list: List[Dict],
 
979
  safe_workbench_list = current_workbench_list or []
980
  if not wav_audio_path or not Path(wav_audio_path).exists():
981
  log_messages.append("⚠️ [キープ追加エラー] 追加する音声ファイル(WAV)が見つかりません。")
982
+ final_status = "\n".join(log_messages) if not ENABLE_LOGGING else (current_status + "\n" + "\n".join(log_messages)).strip()
983
  return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
984
 
 
985
  if any(item['audio_path'] == wav_audio_path for item in safe_workbench_list):
986
  log_messages.append("ℹ️ この音声はすでにキープに存在します。")
987
+ final_status = "\n".join(log_messages) if not ENABLE_LOGGING else (current_status + "\n" + "\n".join(log_messages)).strip()
988
  return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
989
 
990
  display_model_name = model
991
  parsed_result = parse_merged_model_name(model)
992
  if parsed_result: display_model_name, _ = parsed_result
993
 
 
994
  new_item = {"audio_path": wav_audio_path, "text": text, "model": display_model_name, "original_models": [model], "style": style_display_name, "style_weight": style_weight, "timestamp": datetime.datetime.now(JST).isoformat(), "is_merged": False}
995
  updated_list = safe_workbench_list + [new_item]
996
 
 
1001
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
1002
  if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
1003
  if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
1004
+ except Exception as e:
1005
+ if ENABLE_LOGGING:
1006
+ print(f"Warning: Failed to delete old workbench audio file: {e}")
1007
  log_messages.append(f"ℹ️ キープのアイテムが最大数({MAX_WORKBENCH_ITEMS})に達したため、一番古いアイテムを削除しました。")
1008
 
1009
  ui_updates = update_workbench_ui(updated_list)
1010
  log_messages.append("✅ キープに音声を追加しました。")
1011
+ # ▼▼▼ 変更: ログ表示をENABLE_LOGGINGで制御 ▼▼▼
1012
+ if ENABLE_LOGGING:
1013
+ final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1014
+ else:
1015
+ essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
1016
+ final_status = "\n".join(essential_logs).strip()
1017
+ # ▲▲▲ 変更 ▲▲▲
1018
  return (final_status, updated_list) + ui_updates
 
1019
 
 
1020
  def remove_from_workbench(current_status: str, index_to_remove: int, current_workbench_list: List[Dict]) -> Tuple:
1021
  log_messages = []
1022
  safe_workbench_list = current_workbench_list or []
1023
+ if not (0 <= index_to_remove < len(safe_workbench_list)):
1024
+ final_status = current_status if ENABLE_LOGGING else ""
1025
+ return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
1026
 
1027
  item_to_remove = safe_workbench_list[index_to_remove]
1028
  try:
1029
  path_to_delete_wav = Path(item_to_remove['audio_path'])
1030
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
1031
 
 
1032
  if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir():
1033
  path_to_delete_wav.unlink()
 
1034
  if path_to_delete_mp3.exists():
1035
  path_to_delete_mp3.unlink()
1036
  log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除し、一時ファイル(WAV/MP3)をクリーンアップしました。")
 
1038
  log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(ファイルは保持: {path_to_delete_wav.name})")
1039
  else:
1040
  log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(関連ファイルなし)")
 
1041
  except Exception as e: log_messages.append(f"⚠️ キープのアイテム #{index_to_remove + 1} のファイル削除中にエラー: {e}")
1042
 
1043
  updated_list = [item for i, item in enumerate(safe_workbench_list) if i != index_to_remove]
1044
  ui_updates = update_workbench_ui(updated_list)
1045
+ # ▼▼▼ 変更: ログ表示をENABLE_LOGGINGで制御 ▼▼▼
1046
+ if ENABLE_LOGGING:
1047
+ final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1048
+ else:
1049
+ essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
1050
+ final_status = "\n".join(essential_logs).strip()
1051
+ # ▲▲▲ 変更 ▲▲▲
1052
  return (final_status, updated_list) + ui_updates
 
1053
 
 
1054
  def action_merge_preview(current_status: str, first_audio_num: int, second_audio_num: int, pause_ms: int, workbench_list: List[Dict], progress=gr.Progress(track_tqdm=True)):
1055
  log_messages = []
1056
+ def create_error_return():
1057
+ if ENABLE_LOGGING:
1058
+ final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1059
+ else:
1060
+ essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
1061
+ final_status = "\n".join(essential_logs).strip()
1062
+ return (final_status, None, gr.update(value=None, visible=False), {})
1063
+
1064
  if not workbench_list:
1065
  log_messages.append("⚠️ [結合プレビュー警告] キープに音声がありません。")
1066
+ return create_error_return()
1067
  idx1, idx2 = int(first_audio_num) - 1, int(second_audio_num) - 1
1068
  if not (0 <= idx1 < len(workbench_list) and 0 <= idx2 < len(workbench_list)):
1069
  log_messages.append(f"⚠️ [結合プレビュー警告] 指定された番号(#{first_audio_num}, #{second_audio_num})の音声が見つかりません。")
1070
+ return create_error_return()
1071
  item1, item2 = workbench_list[idx1], workbench_list[idx2]
1072
  audio_path1, audio_path2 = item1.get("audio_path"), item2.get("audio_path")
1073
  if not audio_path1 or not Path(audio_path1).exists() or not audio_path2 or not Path(audio_path2).exists():
1074
  log_messages.append("❌ [結合プレビューエラー] 音声��ァイル(WAV)が見つかりません。ファイルが削除された可能性があります。")
1075
+ return create_error_return()
1076
 
1077
  progress(0, desc="結合準備中...")
1078
  try:
 
1080
  pause_duration = int(pause_ms)
1081
  if pause_duration >= 0:
1082
  combined_audio = segment1 + AudioSegment.silent(duration=pause_duration) + segment2
1083
+ if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num} と #{second_audio_num} を {pause_duration}ms のポーズを挟んで結合しました。")
1084
  else:
1085
  overlap_duration = abs(pause_duration)
1086
  max_possible_overlap = min(len(segment1), len(segment2))
 
1089
  overlap_duration = max_possible_overlap
1090
  combined_audio = AudioSegment.silent(duration=len(segment1) + len(segment2) - overlap_duration)
1091
  combined_audio = combined_audio.overlay(segment1, position=0).overlay(segment2, position=len(segment1) - overlap_duration)
1092
+ if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num} と #{second_audio_num} を {overlap_duration}ms 重ねて(オーバーレイして)結合しました。")
1093
  progress(1, desc="結合完了")
1094
  except Exception as e:
1095
  log_messages.append(f"❌ [結合プレビューエラー] 音声の結合中にエラーが発生しました: {e}")
1096
+ return create_error_return()
1097
 
1098
  base_filename = f"merged_preview_{uuid.uuid4().hex[:8]}"
1099
  temp_dir = Path(tempfile.gettempdir())
 
1108
  original_models1, original_models2 = item1.get('original_models', []), item2.get('original_models', [])
1109
  all_original_models = set(original_models1 + original_models2)
1110
 
 
1111
  metadata = {"text": f"{item1.get('text', '')} | {item2.get('text', '')}", "display_models": sorted(list(all_display_models)), "original_models": sorted(list(all_original_models)), "audio_path": str(wav_temp_path), "timestamp": datetime.datetime.now(JST).isoformat()}
1112
  log_messages.append("✅ 結合プレビューが生成されました。")
 
1113
 
1114
+ if ENABLE_LOGGING:
1115
+ final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1116
+ else:
1117
+ essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
1118
+ final_status = "\n".join(essential_logs).strip()
1119
+
1120
  return final_status, str(mp3_temp_path), gr.update(value=str(wav_temp_path), visible=True), metadata
 
1121
 
 
1122
  def action_add_merged_to_workbench(current_status: str, preview_data: Dict, current_workbench_list: List[Dict], delete_originals: bool, first_audio_num: int, second_audio_num: int) -> Tuple:
1123
  log_messages = []
1124
  safe_workbench_list = current_workbench_list or []
1125
+
1126
+ def create_error_return():
1127
+ if ENABLE_LOGGING:
1128
+ final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1129
+ else:
1130
+ essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
1131
+ final_status = "\n".join(essential_logs).strip()
1132
+ return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
1133
+
1134
  if not preview_data or "audio_path" not in preview_data:
1135
  log_messages.append("⚠️ [キープ追加エラー] 追加する結合済み音声がありません。先にプレビューを生成してください。")
1136
+ return create_error_return()
 
1137
 
 
1138
  src_path = Path(preview_data["audio_path"])
1139
  if not src_path.exists():
1140
  log_messages.append("⚠️ [キープ追加エラー] 結合済み音声ファイルが見つかりません。")
1141
+ return create_error_return()
 
1142
 
1143
  new_merged_item = {"audio_path": str(src_path), "text": preview_data.get("text", "N/A"), "model": " | ".join(preview_data.get("display_models", [])), "original_models": preview_data.get("original_models", []), "style": "N/A", "style_weight": 0.0, "timestamp": preview_data.get("timestamp"), "is_merged": True}
1144
  final_workbench_list = []
 
1154
  try:
1155
  path_to_delete_wav = Path(item_to_remove['audio_path'])
1156
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
1157
+ if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
1158
+ if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
 
 
1159
  except Exception as e: log_messages.append(f"⚠️ 元の音声ファイル削除中にエラー: {e}")
1160
 
1161
  final_workbench_list = [new_merged_item] + remaining_list
 
1171
  path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
1172
  if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
1173
  if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
1174
+ except Exception as e:
1175
+ if ENABLE_LOGGING:
1176
+ print(f"Warning: Failed to delete old workbench audio file: {e}")
1177
  log_messages.append(f"ℹ️ キープが最大数({MAX_WORKBENCH_ITEMS})に達したため一番古いアイテムを削除しました。")
1178
 
1179
  ui_updates = update_workbench_ui(final_workbench_list)
1180
+
1181
+ if ENABLE_LOGGING:
1182
+ final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
1183
+ else:
1184
+ essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
1185
+ final_status = "\n".join(essential_logs).strip()
1186
+
1187
  return (final_status, final_workbench_list) + ui_updates
 
1188
 
1189
 
1190
+ # --- イベントリスナー接続 (変更なし) ---
1191
  def on_fn_mode_change(is_fn_mode_on: bool) -> gr.Checkbox:
1192
  if is_fn_mode_on: return gr.update(value=False)
1193
  return gr.update()
 
1207
  current_styles_dropdown.change(on_style_dropdown_select, inputs=[current_styles_dropdown, all_styles_data_state], outputs=[style_weight_for_synth_slider])
1208
  use_assist_text_checkbox.change(lambda x: (gr.update(visible=x), gr.update(visible=x)), inputs=[use_assist_text_checkbox], outputs=[assist_text_textbox, assist_text_weight_slider])
1209
 
 
1210
  generate_outputs = [status_textbox, audio_output_area]
1211
  for i in range(MAX_AUDIO_OUTPUTS):
1212
  generate_outputs.extend([audio_item_columns[i], audio_outputs[i], download_buttons[i]])
1213
  generate_outputs.extend(dummy_audio_item_columns)
1214
  generate_outputs.extend(synthesized_text_states)
1215
+ generate_outputs.append(synthesized_wav_files_state)
 
1216
 
1217
  generate_button.click(
1218
  fn=action_run_synthesis,
 
1231
  outputs=generate_outputs
1232
  )
1233
 
 
1234
  for i in range(MAX_AUDIO_OUTPUTS):
1235
  to_workbench_buttons[i].click(
1236
  fn=lambda current_status, workbench_list, text, model, style_display, style_weight, all_wavs, idx=i: \
1237
  add_to_workbench(
1238
  current_status, workbench_list,
1239
+ all_wavs[idx] if all_wavs and idx < len(all_wavs) else None,
1240
  text, model, style_display, style_weight
1241
  ),
1242
  inputs=[
1243
  status_textbox, workbench_state, synthesized_text_states[i],
1244
  selected_model_dropdown, current_styles_dropdown, style_weight_for_synth_slider,
1245
+ synthesized_wav_files_state
1246
  ],
1247
  outputs=[status_textbox, workbench_state] + all_workbench_ui_components
1248
  )
 
1249
 
1250
  for i, item in enumerate(workbench_items):
1251
  item["delete_btn"].click(
 
1289
 
1290
  merger_cache_path = Path("/tmp/sbv2_merger_cache")
1291
  mock_model_holder = TTSModelHolder()
1292
+ # ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
1293
+ if ENABLE_LOGGING:
1294
+ print(f"Initial models loaded by TTSModelHolder: {mock_model_holder.model_names}")
1295
+ # ▲▲▲ 変更 ▲▲▲
1296
 
1297
  app = create_synthesis_app(mock_model_holder)
1298