Spaces:
Running
Running
cockolo terada
commited on
Update gradio_tabs/single.py
Browse files- gradio_tabs/single.py +206 -124
gradio_tabs/single.py
CHANGED
|
@@ -23,6 +23,11 @@ import uuid # 結合ファイルの一意な名前生成のために追加
|
|
| 23 |
|
| 24 |
from typing import Dict, Any, List, Tuple, Optional, Set
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# (TTSModelHolder, MockTTSModelなどのモックやヘルパー関数は変更なしのため省略します)
|
| 27 |
# --- タイムゾーン定義 ---
|
| 28 |
# グローバルな定数としてJSTを定義
|
|
@@ -46,7 +51,10 @@ class TTSModelHolder:
|
|
| 46 |
p.mkdir(parents=True, exist_ok=True)
|
| 47 |
# 起動時に一度だけサンプルモデルを作成するロジック
|
| 48 |
if not any(p.iterdir()):
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
# Sample Model 1
|
| 51 |
model1_path = p / "MyModel1"
|
| 52 |
model1_path.mkdir(parents=True, exist_ok=True)
|
|
@@ -74,7 +82,10 @@ class TTSModelHolder:
|
|
| 74 |
json.dump(style_settings_data, f, indent=2, ensure_ascii=False)
|
| 75 |
|
| 76 |
# FNモデル (FN1-10)
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
| 78 |
for i in range(1, 11):
|
| 79 |
fn_path = p / f"FN{i}"
|
| 80 |
fn_path.mkdir(exist_ok=True)
|
|
@@ -83,7 +94,10 @@ class TTSModelHolder:
|
|
| 83 |
json.dump({"data": {"style2id": {"Neutral": 0}}}, f)
|
| 84 |
|
| 85 |
# whisperモデル (非表示用)
|
| 86 |
-
print
|
|
|
|
|
|
|
|
|
|
| 87 |
whisper_path = p / "whisper"
|
| 88 |
whisper_path.mkdir(exist_ok=True)
|
| 89 |
(whisper_path / "G_0.safetensors").touch()
|
|
@@ -97,21 +111,33 @@ class TTSModelHolder:
|
|
| 97 |
"""
|
| 98 |
if self.root_dir.is_dir():
|
| 99 |
self.model_names = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
| 101 |
else:
|
| 102 |
self.model_names = []
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
| 104 |
return self.model_names
|
| 105 |
|
| 106 |
def get_model(self, model_name, model_path):
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
| 108 |
if model_name not in self.model_names:
|
| 109 |
error_msg = (
|
| 110 |
f"Model '{model_name}' is not in the known list of TTSModelHolder. "
|
| 111 |
f"Current list: {self.model_names}. "
|
| 112 |
"Please refresh the model list by toggling the symlink checkbox or clicking the refresh button."
|
| 113 |
)
|
| 114 |
-
print
|
|
|
|
|
|
|
|
|
|
| 115 |
raise ValueError(error_msg)
|
| 116 |
|
| 117 |
self.current_model = MockTTSModel()
|
|
@@ -123,7 +149,10 @@ class MockTTSModel:
|
|
| 123 |
|
| 124 |
def infer(self, text, **kwargs):
|
| 125 |
length_scale = kwargs.get('length', 1.0)
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
| 127 |
sampling_rate = 44100
|
| 128 |
base_duration = max(1, len(text) // 5)
|
| 129 |
duration = base_duration * length_scale
|
|
@@ -208,7 +237,10 @@ def sort_models_by_custom_order(model_list: List[str], custom_order: List[str])
|
|
| 208 |
|
| 209 |
def set_random_seed(seed: int):
|
| 210 |
if seed >= 0:
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
| 212 |
torch.manual_seed(seed)
|
| 213 |
if torch.cuda.is_available():
|
| 214 |
torch.cuda.manual_seed(seed)
|
|
@@ -358,7 +390,8 @@ def load_styles_from_model_folder(model_asset_path: Path) -> Dict[str, Any]:
|
|
| 358 |
for style_name in style2id.keys():
|
| 359 |
final_styles[style_name] = {"display_name": style_name, "weight": DEFAULT_STYLE_WEIGHT}
|
| 360 |
except Exception as e:
|
| 361 |
-
|
|
|
|
| 362 |
custom_style_config_path = model_asset_path / STYLE_CONFIG_FILENAME_IN_MODEL_DIR
|
| 363 |
if custom_style_config_path.exists():
|
| 364 |
try:
|
|
@@ -370,7 +403,8 @@ def load_styles_from_model_folder(model_asset_path: Path) -> Dict[str, Any]:
|
|
| 370 |
final_styles.setdefault(style_key, {})
|
| 371 |
final_styles[style_key].update(style_info)
|
| 372 |
except Exception as e:
|
| 373 |
-
|
|
|
|
| 374 |
if not final_styles or DEFAULT_STYLE not in final_styles:
|
| 375 |
final_styles[DEFAULT_STYLE] = {"display_name": DEFAULT_STYLE, "weight": DEFAULT_STYLE_WEIGHT}
|
| 376 |
return final_styles
|
|
@@ -389,17 +423,22 @@ def process_single_synthesis_webui(
|
|
| 389 |
current_model_file_path = Path(current_model_file_path_str)
|
| 390 |
log_messages = []
|
| 391 |
set_random_seed(seed_arg)
|
| 392 |
-
|
|
|
|
| 393 |
log_messages.append(f"乱数シードを {seed_arg} に固定しました。")
|
|
|
|
| 394 |
try:
|
| 395 |
model_holder_ref.get_model(current_model_name, current_model_file_path)
|
| 396 |
if model_holder_ref.current_model is None:
|
| 397 |
msg = f"モデルのロード失敗: {current_model_name} (ファイル: {current_model_file_path.name})"
|
| 398 |
-
log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
| 400 |
except Exception as e:
|
| 401 |
msg = f"モデルロードエラー '{current_model_name}' (ファイル: {current_model_file_path.name}): {e}"
|
| 402 |
-
log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
|
| 403 |
speaker_id = 0
|
| 404 |
if model_holder_ref.current_model and hasattr(model_holder_ref.current_model, 'spk2id'):
|
| 405 |
model_spk2id = model_holder_ref.current_model.spk2id
|
|
@@ -407,7 +446,10 @@ def process_single_synthesis_webui(
|
|
| 407 |
speaker_id = model_spk2id[speaker_name_arg]
|
| 408 |
elif model_spk2id:
|
| 409 |
speaker_id = list(model_spk2id.values())[0]
|
| 410 |
-
|
|
|
|
|
|
|
|
|
|
| 411 |
start_time_synth = datetime.datetime.now(JST)
|
| 412 |
try:
|
| 413 |
length_for_model = 1.0 / length_scale_arg if length_scale_arg != 0 else 1.0
|
|
@@ -421,11 +463,14 @@ def process_single_synthesis_webui(
|
|
| 421 |
speaker_id=speaker_id, pitch_scale=pitch_scale_arg, intonation_scale=intonation_scale_arg,
|
| 422 |
)
|
| 423 |
except (InvalidToneError, ValueError) as e:
|
| 424 |
-
msg = f"合成エラー: {e}"; log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
|
| 425 |
except Exception as e:
|
| 426 |
-
msg = f"予期せぬエラー: {e}"; log_messages.append(f"[エラー] {msg}"); return False, log_messages, None
|
| 427 |
duration_synth = (datetime.datetime.now(JST) - start_time_synth).total_seconds()
|
| 428 |
-
|
|
|
|
|
|
|
|
|
|
| 429 |
return True, log_messages, (sr, audio_data)
|
| 430 |
|
| 431 |
|
|
@@ -438,11 +483,14 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 438 |
MERGER_CACHE_PATH.mkdir(parents=True, exist_ok=True)
|
| 439 |
is_merger_cache_available = MERGER_CACHE_PATH.is_dir()
|
| 440 |
if is_merger_cache_available:
|
| 441 |
-
|
|
|
|
| 442 |
else:
|
| 443 |
-
|
|
|
|
| 444 |
except OSError as e:
|
| 445 |
-
|
|
|
|
| 446 |
|
| 447 |
NORMAL_MODE_MODEL_ORDER = [
|
| 448 |
"mikeneko",
|
|
@@ -470,9 +518,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 470 |
MAX_WORKBENCH_ITEMS = 8
|
| 471 |
|
| 472 |
all_styles_data_state = gr.State({})
|
| 473 |
-
# ▼▼▼ 変更: 生成されたWAVファイルのパスリストを保持するStateを追加 ▼▼▼
|
| 474 |
synthesized_wav_files_state = gr.State([])
|
| 475 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 476 |
workbench_state = gr.State([])
|
| 477 |
merged_preview_state = gr.State({})
|
| 478 |
|
|
@@ -495,27 +541,24 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 495 |
f"**Style:** {item['style']} (Weight: {item['style_weight']:.2f})"
|
| 496 |
)
|
| 497 |
|
| 498 |
-
# ▼▼▼ 変更: プレーヤーにはMP3、ダウンロードにはWAVを割り当て ▼▼▼
|
| 499 |
wav_path = item['audio_path']
|
| 500 |
mp3_path = str(Path(wav_path).with_suffix('.mp3'))
|
| 501 |
-
# MP3がなければフォールバックとしてWAVを再生
|
| 502 |
playback_path = mp3_path if Path(mp3_path).exists() else wav_path
|
| 503 |
|
| 504 |
updates.extend([
|
| 505 |
-
gr.update(visible=True),
|
| 506 |
-
gr.update(value=f"**{i+1}**"),
|
| 507 |
-
gr.update(value=playback_path),
|
| 508 |
-
gr.update(value=wav_path, visible=True)
|
| 509 |
-
gr.update(value=info_text)
|
| 510 |
])
|
| 511 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 512 |
else:
|
| 513 |
updates.extend([
|
| 514 |
-
gr.update(visible=False),
|
| 515 |
-
gr.update(value=""),
|
| 516 |
-
gr.update(value=None),
|
| 517 |
-
gr.update(value=None, visible=False),
|
| 518 |
-
gr.update(value="")
|
| 519 |
])
|
| 520 |
return tuple(updates)
|
| 521 |
|
|
@@ -665,7 +708,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 665 |
try:
|
| 666 |
item.unlink()
|
| 667 |
except OSError as e:
|
| 668 |
-
|
|
|
|
| 669 |
|
| 670 |
if use_symlink_mode:
|
| 671 |
if MERGER_CACHE_PATH.exists() and MERGER_CACHE_PATH.is_dir():
|
|
@@ -676,9 +720,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 676 |
try:
|
| 677 |
os.symlink(item, target_link)
|
| 678 |
except OSError as e:
|
| 679 |
-
|
|
|
|
| 680 |
else:
|
| 681 |
-
|
|
|
|
| 682 |
|
| 683 |
model_holder.refresh()
|
| 684 |
|
|
@@ -726,7 +772,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 726 |
return gr.update(value=data.get("weight", DEFAULT_STYLE_WEIGHT))
|
| 727 |
return gr.update(value=DEFAULT_STYLE_WEIGHT)
|
| 728 |
|
| 729 |
-
# ▼▼▼ 変更: 音声合成処理をWAV/MP3両対応に修正 ▼▼▼
|
| 730 |
def action_run_synthesis(
|
| 731 |
model_name: Optional[str],
|
| 732 |
style_display_name: Optional[str], style_weight_for_synth: float,
|
|
@@ -744,15 +789,15 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 744 |
error_outputs.append(gr.update(visible=False)) # audio_output_area
|
| 745 |
for _ in range(MAX_AUDIO_OUTPUTS):
|
| 746 |
error_outputs.extend([
|
| 747 |
-
gr.update(visible=False),
|
| 748 |
-
gr.update(value=None),
|
| 749 |
-
gr.update(value=None, visible=False),
|
| 750 |
])
|
| 751 |
for _ in range(ITEMS_PER_ROW - 1):
|
| 752 |
-
error_outputs.append(gr.update(visible=False))
|
| 753 |
for _ in range(MAX_AUDIO_OUTPUTS):
|
| 754 |
-
error_outputs.append("")
|
| 755 |
-
error_outputs.append([])
|
| 756 |
|
| 757 |
if re.search(INVALID_FILENAME_CHARS_PATTERN, text):
|
| 758 |
found_chars = "".join(sorted(list(set(re.findall(INVALID_FILENAME_CHARS_PATTERN, text)))))
|
|
@@ -783,7 +828,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 783 |
return tuple(error_outputs)
|
| 784 |
|
| 785 |
actual_model_file_to_load = str(model_path / files[0])
|
| 786 |
-
|
|
|
|
| 787 |
|
| 788 |
batch_count = int(batch_count)
|
| 789 |
if batch_count <= 0: batch_count = 1
|
|
@@ -792,7 +838,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 792 |
final_mp3_paths = []
|
| 793 |
generated_texts = []
|
| 794 |
|
| 795 |
-
# 共通のファイル保存ロジック
|
| 796 |
def save_audio_files(audio_segment: AudioSegment, base_filename: str) -> Optional[Tuple[str, str]]:
|
| 797 |
try:
|
| 798 |
temp_dir = Path(tempfile.gettempdir())
|
|
@@ -809,7 +854,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 809 |
|
| 810 |
return str(output_path_wav), str(output_path_mp3)
|
| 811 |
except Exception as e:
|
| 812 |
-
all_logs.append(f"[エラー] 一時音声ファイルの保存に失敗: {e}")
|
| 813 |
return None
|
| 814 |
|
| 815 |
if generation_mode == "発音ガチャ2":
|
|
@@ -817,13 +862,16 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 817 |
ratio_list = [float(x.strip()) for x in random_text_ratio_str.split(',') if x.strip()]
|
| 818 |
if not ratio_list:
|
| 819 |
ratio_list = [0.5]
|
| 820 |
-
all_logs.append("[警告] カタカナ化の割合に有効な数値が指定されなかったため、0.5 を使用します。")
|
| 821 |
except ValueError:
|
| 822 |
ratio_list = [0.5]
|
| 823 |
-
all_logs.append("[警告] カタカナ化の割合の解析に失敗したため、0.5 を使用します。")
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
|
|
|
|
|
|
|
|
|
| 827 |
generated_variations: Dict[str, List[str]] = {}
|
| 828 |
max_attempts = batch_count * 20
|
| 829 |
for _ in progress.tqdm(range(max_attempts), desc="テキストバリエーション生成中", total=max_attempts):
|
|
@@ -835,12 +883,14 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 835 |
if final_text and final_text not in generated_variations:
|
| 836 |
generated_variations[final_text] = processed_blocks_list
|
| 837 |
if len(generated_variations) < batch_count:
|
| 838 |
-
all_logs.append(f"[警告] {batch_count}個のユニークなテキストを生成できませんでした。({len(generated_variations)}個のみ生成)")
|
| 839 |
|
| 840 |
for i, (final_text, processed_blocks_list) in enumerate(progress.tqdm(generated_variations.items(), desc=f"{len(generated_variations)}件の音声を生成中")):
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
|
|
|
|
|
|
| 844 |
success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, final_text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, -1, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
|
| 845 |
all_logs.extend([f" {log}" for log in logs])
|
| 846 |
if success and audio_tuple:
|
|
@@ -857,12 +907,15 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 857 |
generated_texts.append(final_text)
|
| 858 |
|
| 859 |
else: # 発音ガチャ1 モード
|
| 860 |
-
|
|
|
|
| 861 |
start_seed = int(seed)
|
| 862 |
for i in progress.tqdm(range(batch_count), desc=f"{batch_count}件の音声を生成中"):
|
| 863 |
current_seed = start_seed + i if start_seed >= 0 else -1
|
| 864 |
-
|
| 865 |
-
|
|
|
|
|
|
|
| 866 |
success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, current_seed, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
|
| 867 |
all_logs.extend([f" {log}" for log in logs])
|
| 868 |
if success and audio_tuple:
|
|
@@ -877,13 +930,24 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 877 |
final_wav_paths.append(saved_paths[0])
|
| 878 |
final_mp3_paths.append(saved_paths[1])
|
| 879 |
generated_texts.append(text)
|
| 880 |
-
|
| 881 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 882 |
|
| 883 |
final_outputs = []
|
| 884 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
final_outputs.append(status_message)
|
| 886 |
-
|
|
|
|
| 887 |
final_outputs.append(gr.update(visible=num_generated > 0))
|
| 888 |
|
| 889 |
for i in range(MAX_AUDIO_OUTPUTS):
|
|
@@ -891,8 +955,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 891 |
mp3_val = final_mp3_paths[i] if is_visible else None
|
| 892 |
wav_val = final_wav_paths[i] if is_visible else None
|
| 893 |
final_outputs.append(gr.update(visible=is_visible))
|
| 894 |
-
final_outputs.append(gr.update(value=mp3_val))
|
| 895 |
-
final_outputs.append(gr.update(value=wav_val, visible=is_visible))
|
| 896 |
|
| 897 |
num_dummies_needed = (ITEMS_PER_ROW - (num_generated % ITEMS_PER_ROW)) % ITEMS_PER_ROW if num_generated > 0 else 0
|
| 898 |
for i in range(ITEMS_PER_ROW - 1):
|
|
@@ -902,11 +966,9 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 902 |
text_val = generated_texts[i] if i < num_generated else ""
|
| 903 |
final_outputs.append(text_val)
|
| 904 |
|
| 905 |
-
final_outputs.append(final_wav_paths)
|
| 906 |
return tuple(final_outputs)
|
| 907 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 908 |
|
| 909 |
-
# ▼▼▼ 変更: キープ追加処理をState経由のWAVパスで行うように修正 ▼▼▼
|
| 910 |
def add_to_workbench(
|
| 911 |
current_status: str,
|
| 912 |
current_workbench_list: List[Dict],
|
|
@@ -917,20 +979,18 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 917 |
safe_workbench_list = current_workbench_list or []
|
| 918 |
if not wav_audio_path or not Path(wav_audio_path).exists():
|
| 919 |
log_messages.append("⚠️ [キープ追加エラー] 追加する音声ファイル(WAV)が見つかりません。")
|
| 920 |
-
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 921 |
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 922 |
|
| 923 |
-
# 内部的にはWAVパスで同一性をチェック
|
| 924 |
if any(item['audio_path'] == wav_audio_path for item in safe_workbench_list):
|
| 925 |
log_messages.append("ℹ️ この音声はすでにキープに存在します。")
|
| 926 |
-
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 927 |
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 928 |
|
| 929 |
display_model_name = model
|
| 930 |
parsed_result = parse_merged_model_name(model)
|
| 931 |
if parsed_result: display_model_name, _ = parsed_result
|
| 932 |
|
| 933 |
-
# StateにはWAVパスを保存
|
| 934 |
new_item = {"audio_path": wav_audio_path, "text": text, "model": display_model_name, "original_models": [model], "style": style_display_name, "style_weight": style_weight, "timestamp": datetime.datetime.now(JST).isoformat(), "is_merged": False}
|
| 935 |
updated_list = safe_workbench_list + [new_item]
|
| 936 |
|
|
@@ -941,30 +1001,36 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 941 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 942 |
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
|
| 943 |
if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
|
| 944 |
-
except Exception as e:
|
|
|
|
|
|
|
| 945 |
log_messages.append(f"ℹ️ キープのアイテムが最大数({MAX_WORKBENCH_ITEMS})に達したため、一番古いアイテムを削除しました。")
|
| 946 |
|
| 947 |
ui_updates = update_workbench_ui(updated_list)
|
| 948 |
log_messages.append("✅ キープに音声を追加しました。")
|
| 949 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 950 |
return (final_status, updated_list) + ui_updates
|
| 951 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 952 |
|
| 953 |
-
# ▼▼▼ 変更: ファイル削除時にMP3も削除 ▼▼▼
|
| 954 |
def remove_from_workbench(current_status: str, index_to_remove: int, current_workbench_list: List[Dict]) -> Tuple:
|
| 955 |
log_messages = []
|
| 956 |
safe_workbench_list = current_workbench_list or []
|
| 957 |
-
if not (0 <= index_to_remove < len(safe_workbench_list)):
|
|
|
|
|
|
|
| 958 |
|
| 959 |
item_to_remove = safe_workbench_list[index_to_remove]
|
| 960 |
try:
|
| 961 |
path_to_delete_wav = Path(item_to_remove['audio_path'])
|
| 962 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 963 |
|
| 964 |
-
# WAVファイルの削除
|
| 965 |
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir():
|
| 966 |
path_to_delete_wav.unlink()
|
| 967 |
-
# MP3ファイルの削除
|
| 968 |
if path_to_delete_mp3.exists():
|
| 969 |
path_to_delete_mp3.unlink()
|
| 970 |
log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除し、一時ファイル(WAV/MP3)をクリーンアップしました。")
|
|
@@ -972,36 +1038,41 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 972 |
log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(ファイルは保持: {path_to_delete_wav.name})")
|
| 973 |
else:
|
| 974 |
log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(関連ファイルなし)")
|
| 975 |
-
|
| 976 |
except Exception as e: log_messages.append(f"⚠️ キープのアイテム #{index_to_remove + 1} のファイル削除中にエラー: {e}")
|
| 977 |
|
| 978 |
updated_list = [item for i, item in enumerate(safe_workbench_list) if i != index_to_remove]
|
| 979 |
ui_updates = update_workbench_ui(updated_list)
|
| 980 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 981 |
return (final_status, updated_list) + ui_updates
|
| 982 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 983 |
|
| 984 |
-
# ▼▼▼ 変更: 結合プレビューもWAV/MP3両対応に ▼▼▼
|
| 985 |
def action_merge_preview(current_status: str, first_audio_num: int, second_audio_num: int, pause_ms: int, workbench_list: List[Dict], progress=gr.Progress(track_tqdm=True)):
|
| 986 |
log_messages = []
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
|
| 992 |
-
|
|
|
|
|
|
|
| 993 |
if not workbench_list:
|
| 994 |
log_messages.append("⚠️ [結合プレビュー警告] キープに音声がありません。")
|
| 995 |
-
return
|
| 996 |
idx1, idx2 = int(first_audio_num) - 1, int(second_audio_num) - 1
|
| 997 |
if not (0 <= idx1 < len(workbench_list) and 0 <= idx2 < len(workbench_list)):
|
| 998 |
log_messages.append(f"⚠️ [結合プレビュー警告] 指定された番号(#{first_audio_num}, #{second_audio_num})の音声が見つかりません。")
|
| 999 |
-
return
|
| 1000 |
item1, item2 = workbench_list[idx1], workbench_list[idx2]
|
| 1001 |
audio_path1, audio_path2 = item1.get("audio_path"), item2.get("audio_path")
|
| 1002 |
if not audio_path1 or not Path(audio_path1).exists() or not audio_path2 or not Path(audio_path2).exists():
|
| 1003 |
log_messages.append("❌ [結合プレビューエラー] 音声��ァイル(WAV)が見つかりません。ファイルが削除された可能性があります。")
|
| 1004 |
-
return
|
| 1005 |
|
| 1006 |
progress(0, desc="結合準備中...")
|
| 1007 |
try:
|
|
@@ -1009,7 +1080,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1009 |
pause_duration = int(pause_ms)
|
| 1010 |
if pause_duration >= 0:
|
| 1011 |
combined_audio = segment1 + AudioSegment.silent(duration=pause_duration) + segment2
|
| 1012 |
-
log_messages.append(f"
|
| 1013 |
else:
|
| 1014 |
overlap_duration = abs(pause_duration)
|
| 1015 |
max_possible_overlap = min(len(segment1), len(segment2))
|
|
@@ -1018,11 +1089,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1018 |
overlap_duration = max_possible_overlap
|
| 1019 |
combined_audio = AudioSegment.silent(duration=len(segment1) + len(segment2) - overlap_duration)
|
| 1020 |
combined_audio = combined_audio.overlay(segment1, position=0).overlay(segment2, position=len(segment1) - overlap_duration)
|
| 1021 |
-
log_messages.append(f"
|
| 1022 |
progress(1, desc="結合完了")
|
| 1023 |
except Exception as e:
|
| 1024 |
log_messages.append(f"❌ [結合プレビューエラー] 音声の結合中にエラーが発生しました: {e}")
|
| 1025 |
-
return
|
| 1026 |
|
| 1027 |
base_filename = f"merged_preview_{uuid.uuid4().hex[:8]}"
|
| 1028 |
temp_dir = Path(tempfile.gettempdir())
|
|
@@ -1037,30 +1108,37 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1037 |
original_models1, original_models2 = item1.get('original_models', []), item2.get('original_models', [])
|
| 1038 |
all_original_models = set(original_models1 + original_models2)
|
| 1039 |
|
| 1040 |
-
# StateにはWAVパスを保存
|
| 1041 |
metadata = {"text": f"{item1.get('text', '')} | {item2.get('text', '')}", "display_models": sorted(list(all_display_models)), "original_models": sorted(list(all_original_models)), "audio_path": str(wav_temp_path), "timestamp": datetime.datetime.now(JST).isoformat()}
|
| 1042 |
log_messages.append("✅ 結合プレビューが生成されました。")
|
| 1043 |
-
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1044 |
|
| 1045 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
return final_status, str(mp3_temp_path), gr.update(value=str(wav_temp_path), visible=True), metadata
|
| 1047 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 1048 |
|
| 1049 |
-
# ▼▼▼ 変更: 元ファイル削除時にMP3も削除 ▼▼▼
|
| 1050 |
def action_add_merged_to_workbench(current_status: str, preview_data: Dict, current_workbench_list: List[Dict], delete_originals: bool, first_audio_num: int, second_audio_num: int) -> Tuple:
|
| 1051 |
log_messages = []
|
| 1052 |
safe_workbench_list = current_workbench_list or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1053 |
if not preview_data or "audio_path" not in preview_data:
|
| 1054 |
log_messages.append("⚠️ [キープ追加エラー] 追加する結合済み音声がありません。先にプレビューを生成してください。")
|
| 1055 |
-
|
| 1056 |
-
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 1057 |
|
| 1058 |
-
# preview_data["audio_path"] はWAVのパス
|
| 1059 |
src_path = Path(preview_data["audio_path"])
|
| 1060 |
if not src_path.exists():
|
| 1061 |
log_messages.append("⚠️ [キープ追加エラー] 結合済み音声ファイルが見つかりません。")
|
| 1062 |
-
|
| 1063 |
-
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 1064 |
|
| 1065 |
new_merged_item = {"audio_path": str(src_path), "text": preview_data.get("text", "N/A"), "model": " | ".join(preview_data.get("display_models", [])), "original_models": preview_data.get("original_models", []), "style": "N/A", "style_weight": 0.0, "timestamp": preview_data.get("timestamp"), "is_merged": True}
|
| 1066 |
final_workbench_list = []
|
|
@@ -1076,10 +1154,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1076 |
try:
|
| 1077 |
path_to_delete_wav = Path(item_to_remove['audio_path'])
|
| 1078 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 1079 |
-
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir():
|
| 1080 |
-
|
| 1081 |
-
if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir():
|
| 1082 |
-
path_to_delete_mp3.unlink()
|
| 1083 |
except Exception as e: log_messages.append(f"⚠️ 元の音声ファイル削除中にエラー: {e}")
|
| 1084 |
|
| 1085 |
final_workbench_list = [new_merged_item] + remaining_list
|
|
@@ -1095,16 +1171,23 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1095 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 1096 |
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
|
| 1097 |
if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
|
| 1098 |
-
except Exception as e:
|
|
|
|
|
|
|
| 1099 |
log_messages.append(f"ℹ️ キープが最大数({MAX_WORKBENCH_ITEMS})に達したため一番古いアイテムを削除しました。")
|
| 1100 |
|
| 1101 |
ui_updates = update_workbench_ui(final_workbench_list)
|
| 1102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1103 |
return (final_status, final_workbench_list) + ui_updates
|
| 1104 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 1105 |
|
| 1106 |
|
| 1107 |
-
# --- イベントリスナー接続 (
|
| 1108 |
def on_fn_mode_change(is_fn_mode_on: bool) -> gr.Checkbox:
|
| 1109 |
if is_fn_mode_on: return gr.update(value=False)
|
| 1110 |
return gr.update()
|
|
@@ -1124,14 +1207,12 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1124 |
current_styles_dropdown.change(on_style_dropdown_select, inputs=[current_styles_dropdown, all_styles_data_state], outputs=[style_weight_for_synth_slider])
|
| 1125 |
use_assist_text_checkbox.change(lambda x: (gr.update(visible=x), gr.update(visible=x)), inputs=[use_assist_text_checkbox], outputs=[assist_text_textbox, assist_text_weight_slider])
|
| 1126 |
|
| 1127 |
-
# ▼▼▼ 変更: generate_button の出力に State を追加 ▼▼▼
|
| 1128 |
generate_outputs = [status_textbox, audio_output_area]
|
| 1129 |
for i in range(MAX_AUDIO_OUTPUTS):
|
| 1130 |
generate_outputs.extend([audio_item_columns[i], audio_outputs[i], download_buttons[i]])
|
| 1131 |
generate_outputs.extend(dummy_audio_item_columns)
|
| 1132 |
generate_outputs.extend(synthesized_text_states)
|
| 1133 |
-
generate_outputs.append(synthesized_wav_files_state)
|
| 1134 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 1135 |
|
| 1136 |
generate_button.click(
|
| 1137 |
fn=action_run_synthesis,
|
|
@@ -1150,23 +1231,21 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1150 |
outputs=generate_outputs
|
| 1151 |
)
|
| 1152 |
|
| 1153 |
-
# ▼▼▼ 変更: to_workbench_button の入力と呼び出し方を修正 ▼▼▼
|
| 1154 |
for i in range(MAX_AUDIO_OUTPUTS):
|
| 1155 |
to_workbench_buttons[i].click(
|
| 1156 |
fn=lambda current_status, workbench_list, text, model, style_display, style_weight, all_wavs, idx=i: \
|
| 1157 |
add_to_workbench(
|
| 1158 |
current_status, workbench_list,
|
| 1159 |
-
all_wavs[idx] if all_wavs and idx < len(all_wavs) else None,
|
| 1160 |
text, model, style_display, style_weight
|
| 1161 |
),
|
| 1162 |
inputs=[
|
| 1163 |
status_textbox, workbench_state, synthesized_text_states[i],
|
| 1164 |
selected_model_dropdown, current_styles_dropdown, style_weight_for_synth_slider,
|
| 1165 |
-
synthesized_wav_files_state
|
| 1166 |
],
|
| 1167 |
outputs=[status_textbox, workbench_state] + all_workbench_ui_components
|
| 1168 |
)
|
| 1169 |
-
# ▲▲▲ 変更 ▲▲▲
|
| 1170 |
|
| 1171 |
for i, item in enumerate(workbench_items):
|
| 1172 |
item["delete_btn"].click(
|
|
@@ -1210,7 +1289,10 @@ if __name__ == "__main__":
|
|
| 1210 |
|
| 1211 |
merger_cache_path = Path("/tmp/sbv2_merger_cache")
|
| 1212 |
mock_model_holder = TTSModelHolder()
|
| 1213 |
-
|
|
|
|
|
|
|
|
|
|
| 1214 |
|
| 1215 |
app = create_synthesis_app(mock_model_holder)
|
| 1216 |
|
|
|
|
| 23 |
|
| 24 |
from typing import Dict, Any, List, Tuple, Optional, Set
|
| 25 |
|
| 26 |
+
# --- ログ設定 ---
|
| 27 |
+
# TrueにするとターミナルとUIに詳細なログが出力されます。
|
| 28 |
+
# Falseにすると、エラーや重要な通知以外のログは抑制されます。
|
| 29 |
+
ENABLE_LOGGING = False
|
| 30 |
+
|
| 31 |
# (TTSModelHolder, MockTTSModelなどのモックやヘルパー関数は変更なしのため省略します)
|
| 32 |
# --- タイムゾーン定義 ---
|
| 33 |
# グローバルな定数としてJSTを定義
|
|
|
|
| 51 |
p.mkdir(parents=True, exist_ok=True)
|
| 52 |
# 起動時に一度だけサンプルモデルを作成するロジック
|
| 53 |
if not any(p.iterdir()):
|
| 54 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 55 |
+
if ENABLE_LOGGING:
|
| 56 |
+
print("No models found in model_assets. Creating sample models...")
|
| 57 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 58 |
# Sample Model 1
|
| 59 |
model1_path = p / "MyModel1"
|
| 60 |
model1_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 82 |
json.dump(style_settings_data, f, indent=2, ensure_ascii=False)
|
| 83 |
|
| 84 |
# FNモデル (FN1-10)
|
| 85 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 86 |
+
if ENABLE_LOGGING:
|
| 87 |
+
print("Creating FN models (FN1-10)...")
|
| 88 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 89 |
for i in range(1, 11):
|
| 90 |
fn_path = p / f"FN{i}"
|
| 91 |
fn_path.mkdir(exist_ok=True)
|
|
|
|
| 94 |
json.dump({"data": {"style2id": {"Neutral": 0}}}, f)
|
| 95 |
|
| 96 |
# whisperモデル (非表示用)
|
| 97 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 98 |
+
if ENABLE_LOGGING:
|
| 99 |
+
print("Creating 'whisper' model...")
|
| 100 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 101 |
whisper_path = p / "whisper"
|
| 102 |
whisper_path.mkdir(exist_ok=True)
|
| 103 |
(whisper_path / "G_0.safetensors").touch()
|
|
|
|
| 111 |
"""
|
| 112 |
if self.root_dir.is_dir():
|
| 113 |
self.model_names = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
|
| 114 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 115 |
+
if ENABLE_LOGGING:
|
| 116 |
+
print(f"TTSModelHolder model list refreshed. Known models: {self.model_names}")
|
| 117 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 118 |
else:
|
| 119 |
self.model_names = []
|
| 120 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 121 |
+
if ENABLE_LOGGING:
|
| 122 |
+
print("TTSModelHolder root directory not found.")
|
| 123 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 124 |
return self.model_names
|
| 125 |
|
| 126 |
def get_model(self, model_name, model_path):
|
| 127 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 128 |
+
if ENABLE_LOGGING:
|
| 129 |
+
print(f"Loading model: {model_name} (file: {Path(model_path).name})")
|
| 130 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 131 |
if model_name not in self.model_names:
|
| 132 |
error_msg = (
|
| 133 |
f"Model '{model_name}' is not in the known list of TTSModelHolder. "
|
| 134 |
f"Current list: {self.model_names}. "
|
| 135 |
"Please refresh the model list by toggling the symlink checkbox or clicking the refresh button."
|
| 136 |
)
|
| 137 |
+
# ▼▼▼ 変更: printをエラーなので残すか、制御するか検討。ここでは制御対象に含める。▼▼▼
|
| 138 |
+
if ENABLE_LOGGING:
|
| 139 |
+
print(f"[ERROR] {error_msg}")
|
| 140 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 141 |
raise ValueError(error_msg)
|
| 142 |
|
| 143 |
self.current_model = MockTTSModel()
|
|
|
|
| 149 |
|
| 150 |
def infer(self, text, **kwargs):
|
| 151 |
length_scale = kwargs.get('length', 1.0)
|
| 152 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 153 |
+
if ENABLE_LOGGING:
|
| 154 |
+
print(f"Inferencing with text '{text}' and style: {kwargs.get('style')} and weight: {kwargs.get('style_weight')}, length_scale: {length_scale}")
|
| 155 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 156 |
sampling_rate = 44100
|
| 157 |
base_duration = max(1, len(text) // 5)
|
| 158 |
duration = base_duration * length_scale
|
|
|
|
| 237 |
|
| 238 |
def set_random_seed(seed: int):
|
| 239 |
if seed >= 0:
|
| 240 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 241 |
+
if ENABLE_LOGGING:
|
| 242 |
+
print(f"Setting random seed to: {seed}")
|
| 243 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 244 |
torch.manual_seed(seed)
|
| 245 |
if torch.cuda.is_available():
|
| 246 |
torch.cuda.manual_seed(seed)
|
|
|
|
| 390 |
for style_name in style2id.keys():
|
| 391 |
final_styles[style_name] = {"display_name": style_name, "weight": DEFAULT_STYLE_WEIGHT}
|
| 392 |
except Exception as e:
|
| 393 |
+
if ENABLE_LOGGING:
|
| 394 |
+
print(f"Warning: Failed to load or parse {config_path}: {e}")
|
| 395 |
custom_style_config_path = model_asset_path / STYLE_CONFIG_FILENAME_IN_MODEL_DIR
|
| 396 |
if custom_style_config_path.exists():
|
| 397 |
try:
|
|
|
|
| 403 |
final_styles.setdefault(style_key, {})
|
| 404 |
final_styles[style_key].update(style_info)
|
| 405 |
except Exception as e:
|
| 406 |
+
if ENABLE_LOGGING:
|
| 407 |
+
print(f"Warning: Failed to load or parse {custom_style_config_path}: {e}")
|
| 408 |
if not final_styles or DEFAULT_STYLE not in final_styles:
|
| 409 |
final_styles[DEFAULT_STYLE] = {"display_name": DEFAULT_STYLE, "weight": DEFAULT_STYLE_WEIGHT}
|
| 410 |
return final_styles
|
|
|
|
| 423 |
current_model_file_path = Path(current_model_file_path_str)
|
| 424 |
log_messages = []
|
| 425 |
set_random_seed(seed_arg)
|
| 426 |
+
# ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
|
| 427 |
+
if seed_arg >= 0 and ENABLE_LOGGING:
|
| 428 |
log_messages.append(f"乱数シードを {seed_arg} に固定しました。")
|
| 429 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 430 |
try:
|
| 431 |
model_holder_ref.get_model(current_model_name, current_model_file_path)
|
| 432 |
if model_holder_ref.current_model is None:
|
| 433 |
msg = f"モデルのロード失敗: {current_model_name} (ファイル: {current_model_file_path.name})"
|
| 434 |
+
log_messages.append(f"❌ [エラー] {msg}"); return False, log_messages, None
|
| 435 |
+
# ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
|
| 436 |
+
if ENABLE_LOGGING:
|
| 437 |
+
log_messages.append(f"使用モデル: {current_model_name} (ファイル: {current_model_file_path.name})")
|
| 438 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 439 |
except Exception as e:
|
| 440 |
msg = f"モデルロードエラー '{current_model_name}' (ファイル: {current_model_file_path.name}): {e}"
|
| 441 |
+
log_messages.append(f"❌ [エラー] {msg}"); return False, log_messages, None
|
| 442 |
speaker_id = 0
|
| 443 |
if model_holder_ref.current_model and hasattr(model_holder_ref.current_model, 'spk2id'):
|
| 444 |
model_spk2id = model_holder_ref.current_model.spk2id
|
|
|
|
| 446 |
speaker_id = model_spk2id[speaker_name_arg]
|
| 447 |
elif model_spk2id:
|
| 448 |
speaker_id = list(model_spk2id.values())[0]
|
| 449 |
+
# ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
|
| 450 |
+
if ENABLE_LOGGING:
|
| 451 |
+
log_messages.append(f"音声合成中...")
|
| 452 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 453 |
start_time_synth = datetime.datetime.now(JST)
|
| 454 |
try:
|
| 455 |
length_for_model = 1.0 / length_scale_arg if length_scale_arg != 0 else 1.0
|
|
|
|
| 463 |
speaker_id=speaker_id, pitch_scale=pitch_scale_arg, intonation_scale=intonation_scale_arg,
|
| 464 |
)
|
| 465 |
except (InvalidToneError, ValueError) as e:
|
| 466 |
+
msg = f"合成エラー: {e}"; log_messages.append(f"❌ [エラー] {msg}"); return False, log_messages, None
|
| 467 |
except Exception as e:
|
| 468 |
+
msg = f"予期せぬエラー: {e}"; log_messages.append(f"❌ [エラー] {msg}"); return False, log_messages, None
|
| 469 |
duration_synth = (datetime.datetime.now(JST) - start_time_synth).total_seconds()
|
| 470 |
+
# ▼▼▼ 変更: ログ追加をENABLE_LOGGINGで制御 ▼▼▼
|
| 471 |
+
if ENABLE_LOGGING:
|
| 472 |
+
log_messages.append(f"音声��成成功。音声長: {len(audio_data)/sr:.2f}s, 処理時間: {duration_synth:.2f}s.")
|
| 473 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 474 |
return True, log_messages, (sr, audio_data)
|
| 475 |
|
| 476 |
|
|
|
|
| 483 |
MERGER_CACHE_PATH.mkdir(parents=True, exist_ok=True)
|
| 484 |
is_merger_cache_available = MERGER_CACHE_PATH.is_dir()
|
| 485 |
if is_merger_cache_available:
|
| 486 |
+
if ENABLE_LOGGING:
|
| 487 |
+
print(f"Merger cache directory is available at: {MERGER_CACHE_PATH}")
|
| 488 |
else:
|
| 489 |
+
if ENABLE_LOGGING:
|
| 490 |
+
print(f"Warning: Merger cache path {MERGER_CACHE_PATH} exists but is not a directory.")
|
| 491 |
except OSError as e:
|
| 492 |
+
if ENABLE_LOGGING:
|
| 493 |
+
print(f"Warning: Could not create or access merger cache directory {MERGER_CACHE_PATH}: {e}")
|
| 494 |
|
| 495 |
NORMAL_MODE_MODEL_ORDER = [
|
| 496 |
"mikeneko",
|
|
|
|
| 518 |
MAX_WORKBENCH_ITEMS = 8
|
| 519 |
|
| 520 |
all_styles_data_state = gr.State({})
|
|
|
|
| 521 |
synthesized_wav_files_state = gr.State([])
|
|
|
|
| 522 |
workbench_state = gr.State([])
|
| 523 |
merged_preview_state = gr.State({})
|
| 524 |
|
|
|
|
| 541 |
f"**Style:** {item['style']} (Weight: {item['style_weight']:.2f})"
|
| 542 |
)
|
| 543 |
|
|
|
|
| 544 |
wav_path = item['audio_path']
|
| 545 |
mp3_path = str(Path(wav_path).with_suffix('.mp3'))
|
|
|
|
| 546 |
playback_path = mp3_path if Path(mp3_path).exists() else wav_path
|
| 547 |
|
| 548 |
updates.extend([
|
| 549 |
+
gr.update(visible=True),
|
| 550 |
+
gr.update(value=f"**{i+1}**"),
|
| 551 |
+
gr.update(value=playback_path),
|
| 552 |
+
gr.update(value=wav_path, visible=True),
|
| 553 |
+
gr.update(value=info_text)
|
| 554 |
])
|
|
|
|
| 555 |
else:
|
| 556 |
updates.extend([
|
| 557 |
+
gr.update(visible=False),
|
| 558 |
+
gr.update(value=""),
|
| 559 |
+
gr.update(value=None),
|
| 560 |
+
gr.update(value=None, visible=False),
|
| 561 |
+
gr.update(value="")
|
| 562 |
])
|
| 563 |
return tuple(updates)
|
| 564 |
|
|
|
|
| 708 |
try:
|
| 709 |
item.unlink()
|
| 710 |
except OSError as e:
|
| 711 |
+
if ENABLE_LOGGING:
|
| 712 |
+
print(f"Failed to remove symlink {item}: {e}")
|
| 713 |
|
| 714 |
if use_symlink_mode:
|
| 715 |
if MERGER_CACHE_PATH.exists() and MERGER_CACHE_PATH.is_dir():
|
|
|
|
| 720 |
try:
|
| 721 |
os.symlink(item, target_link)
|
| 722 |
except OSError as e:
|
| 723 |
+
if ENABLE_LOGGING:
|
| 724 |
+
print(f"Warning: Could not create symlink for {item.name}: {e}")
|
| 725 |
else:
|
| 726 |
+
if ENABLE_LOGGING:
|
| 727 |
+
print(f"Warning: Symlink mode is on, but {MERGER_CACHE_PATH} does not exist or is not a directory.")
|
| 728 |
|
| 729 |
model_holder.refresh()
|
| 730 |
|
|
|
|
| 772 |
return gr.update(value=data.get("weight", DEFAULT_STYLE_WEIGHT))
|
| 773 |
return gr.update(value=DEFAULT_STYLE_WEIGHT)
|
| 774 |
|
|
|
|
| 775 |
def action_run_synthesis(
|
| 776 |
model_name: Optional[str],
|
| 777 |
style_display_name: Optional[str], style_weight_for_synth: float,
|
|
|
|
| 789 |
error_outputs.append(gr.update(visible=False)) # audio_output_area
|
| 790 |
for _ in range(MAX_AUDIO_OUTPUTS):
|
| 791 |
error_outputs.extend([
|
| 792 |
+
gr.update(visible=False),
|
| 793 |
+
gr.update(value=None),
|
| 794 |
+
gr.update(value=None, visible=False),
|
| 795 |
])
|
| 796 |
for _ in range(ITEMS_PER_ROW - 1):
|
| 797 |
+
error_outputs.append(gr.update(visible=False))
|
| 798 |
for _ in range(MAX_AUDIO_OUTPUTS):
|
| 799 |
+
error_outputs.append("")
|
| 800 |
+
error_outputs.append([])
|
| 801 |
|
| 802 |
if re.search(INVALID_FILENAME_CHARS_PATTERN, text):
|
| 803 |
found_chars = "".join(sorted(list(set(re.findall(INVALID_FILENAME_CHARS_PATTERN, text)))))
|
|
|
|
| 828 |
return tuple(error_outputs)
|
| 829 |
|
| 830 |
actual_model_file_to_load = str(model_path / files[0])
|
| 831 |
+
if ENABLE_LOGGING:
|
| 832 |
+
all_logs.append(f"[自動選択] 使用モデルファイル: {files[0]}")
|
| 833 |
|
| 834 |
batch_count = int(batch_count)
|
| 835 |
if batch_count <= 0: batch_count = 1
|
|
|
|
| 838 |
final_mp3_paths = []
|
| 839 |
generated_texts = []
|
| 840 |
|
|
|
|
| 841 |
def save_audio_files(audio_segment: AudioSegment, base_filename: str) -> Optional[Tuple[str, str]]:
|
| 842 |
try:
|
| 843 |
temp_dir = Path(tempfile.gettempdir())
|
|
|
|
| 854 |
|
| 855 |
return str(output_path_wav), str(output_path_mp3)
|
| 856 |
except Exception as e:
|
| 857 |
+
all_logs.append(f"❌ [エラー] 一時音声ファイルの保存に失敗: {e}")
|
| 858 |
return None
|
| 859 |
|
| 860 |
if generation_mode == "発音ガチャ2":
|
|
|
|
| 862 |
ratio_list = [float(x.strip()) for x in random_text_ratio_str.split(',') if x.strip()]
|
| 863 |
if not ratio_list:
|
| 864 |
ratio_list = [0.5]
|
| 865 |
+
all_logs.append("⚠️ [警告] カタカナ化の割合に有効な数値が指定されなかったため、0.5 を使用します。")
|
| 866 |
except ValueError:
|
| 867 |
ratio_list = [0.5]
|
| 868 |
+
all_logs.append("⚠️ [警告] カタカナ化の割合の解析に失敗したため、0.5 を使用します。")
|
| 869 |
+
|
| 870 |
+
if ENABLE_LOGGING:
|
| 871 |
+
all_logs.append(f"--- 発音ガチャ2 モード (pyopenjtalk) ---")
|
| 872 |
+
internal_mode = int(random_text_mode) + 1
|
| 873 |
+
all_logs.append(f"粒度: {random_text_mode} (内部モード: {internal_mode}), カタカナ化割合候補: {ratio_list}")
|
| 874 |
+
|
| 875 |
generated_variations: Dict[str, List[str]] = {}
|
| 876 |
max_attempts = batch_count * 20
|
| 877 |
for _ in progress.tqdm(range(max_attempts), desc="テキストバリエーション生成中", total=max_attempts):
|
|
|
|
| 883 |
if final_text and final_text not in generated_variations:
|
| 884 |
generated_variations[final_text] = processed_blocks_list
|
| 885 |
if len(generated_variations) < batch_count:
|
| 886 |
+
all_logs.append(f"⚠️ [警告] {batch_count}個のユニークなテキストを生成できませんでした。({len(generated_variations)}個のみ生成)")
|
| 887 |
|
| 888 |
for i, (final_text, processed_blocks_list) in enumerate(progress.tqdm(generated_variations.items(), desc=f"{len(generated_variations)}件の音声を生成中")):
|
| 889 |
+
if ENABLE_LOGGING:
|
| 890 |
+
all_logs.append(f"--- 生成 {i+1}/{len(generated_variations)} ---")
|
| 891 |
+
all_logs.append(f" ┠ 分割パターン: {' / '.join(processed_blocks_list)}")
|
| 892 |
+
all_logs.append(f" ┗ 合成テキスト: \"{final_text[:50]}{'...' if len(final_text)>50 else ''}\"")
|
| 893 |
+
|
| 894 |
success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, final_text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, -1, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
|
| 895 |
all_logs.extend([f" {log}" for log in logs])
|
| 896 |
if success and audio_tuple:
|
|
|
|
| 907 |
generated_texts.append(final_text)
|
| 908 |
|
| 909 |
else: # 発音ガチャ1 モード
|
| 910 |
+
if ENABLE_LOGGING:
|
| 911 |
+
all_logs.append("--- 発音ガチャ1 モード ---")
|
| 912 |
start_seed = int(seed)
|
| 913 |
for i in progress.tqdm(range(batch_count), desc=f"{batch_count}件の音声を生成中"):
|
| 914 |
current_seed = start_seed + i if start_seed >= 0 else -1
|
| 915 |
+
if ENABLE_LOGGING:
|
| 916 |
+
all_logs.append(f"--- 生成 {i+1}/{batch_count} (Seed: {current_seed if current_seed >= 0 else 'Random'}) ---")
|
| 917 |
+
all_logs.append(f" ┗ 合成テキスト: \"{text[:50]}{'...' if len(text)>50 else ''}\"")
|
| 918 |
+
|
| 919 |
success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, current_seed, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
|
| 920 |
all_logs.extend([f" {log}" for log in logs])
|
| 921 |
if success and audio_tuple:
|
|
|
|
| 930 |
final_wav_paths.append(saved_paths[0])
|
| 931 |
final_mp3_paths.append(saved_paths[1])
|
| 932 |
generated_texts.append(text)
|
| 933 |
+
|
| 934 |
+
num_generated = len(final_wav_paths)
|
| 935 |
+
if num_generated > 0:
|
| 936 |
+
all_logs.append(f"✅ 合計 {num_generated} 件の音声合成が完了しました。")
|
| 937 |
+
else:
|
| 938 |
+
all_logs.append("ℹ️ 音声は生成されませんでした。")
|
| 939 |
|
| 940 |
final_outputs = []
|
| 941 |
+
|
| 942 |
+
# ▼▼▼ 変更: ログ表示をENABLE_LOGGINGで制御 ▼▼▼
|
| 943 |
+
if ENABLE_LOGGING:
|
| 944 |
+
status_message = "\n".join(all_logs)
|
| 945 |
+
else:
|
| 946 |
+
essential_logs = [log for log in all_logs if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 947 |
+
status_message = "\n".join(essential_logs)
|
| 948 |
final_outputs.append(status_message)
|
| 949 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 950 |
+
|
| 951 |
final_outputs.append(gr.update(visible=num_generated > 0))
|
| 952 |
|
| 953 |
for i in range(MAX_AUDIO_OUTPUTS):
|
|
|
|
| 955 |
mp3_val = final_mp3_paths[i] if is_visible else None
|
| 956 |
wav_val = final_wav_paths[i] if is_visible else None
|
| 957 |
final_outputs.append(gr.update(visible=is_visible))
|
| 958 |
+
final_outputs.append(gr.update(value=mp3_val))
|
| 959 |
+
final_outputs.append(gr.update(value=wav_val, visible=is_visible))
|
| 960 |
|
| 961 |
num_dummies_needed = (ITEMS_PER_ROW - (num_generated % ITEMS_PER_ROW)) % ITEMS_PER_ROW if num_generated > 0 else 0
|
| 962 |
for i in range(ITEMS_PER_ROW - 1):
|
|
|
|
| 966 |
text_val = generated_texts[i] if i < num_generated else ""
|
| 967 |
final_outputs.append(text_val)
|
| 968 |
|
| 969 |
+
final_outputs.append(final_wav_paths)
|
| 970 |
return tuple(final_outputs)
|
|
|
|
| 971 |
|
|
|
|
| 972 |
def add_to_workbench(
|
| 973 |
current_status: str,
|
| 974 |
current_workbench_list: List[Dict],
|
|
|
|
| 979 |
safe_workbench_list = current_workbench_list or []
|
| 980 |
if not wav_audio_path or not Path(wav_audio_path).exists():
|
| 981 |
log_messages.append("⚠️ [キープ追加エラー] 追加する音声ファイル(WAV)が見つかりません。")
|
| 982 |
+
final_status = "\n".join(log_messages) if not ENABLE_LOGGING else (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 983 |
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 984 |
|
|
|
|
| 985 |
if any(item['audio_path'] == wav_audio_path for item in safe_workbench_list):
|
| 986 |
log_messages.append("ℹ️ この音声はすでにキープに存在します。")
|
| 987 |
+
final_status = "\n".join(log_messages) if not ENABLE_LOGGING else (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 988 |
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 989 |
|
| 990 |
display_model_name = model
|
| 991 |
parsed_result = parse_merged_model_name(model)
|
| 992 |
if parsed_result: display_model_name, _ = parsed_result
|
| 993 |
|
|
|
|
| 994 |
new_item = {"audio_path": wav_audio_path, "text": text, "model": display_model_name, "original_models": [model], "style": style_display_name, "style_weight": style_weight, "timestamp": datetime.datetime.now(JST).isoformat(), "is_merged": False}
|
| 995 |
updated_list = safe_workbench_list + [new_item]
|
| 996 |
|
|
|
|
| 1001 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 1002 |
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
|
| 1003 |
if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
|
| 1004 |
+
except Exception as e:
|
| 1005 |
+
if ENABLE_LOGGING:
|
| 1006 |
+
print(f"Warning: Failed to delete old workbench audio file: {e}")
|
| 1007 |
log_messages.append(f"ℹ️ キープのアイテムが最大数({MAX_WORKBENCH_ITEMS})に達したため、一番古いアイテムを削除しました。")
|
| 1008 |
|
| 1009 |
ui_updates = update_workbench_ui(updated_list)
|
| 1010 |
log_messages.append("✅ キープに音声を追加しました。")
|
| 1011 |
+
# ▼▼▼ 変更: ログ表示をENABLE_LOGGINGで制御 ▼▼▼
|
| 1012 |
+
if ENABLE_LOGGING:
|
| 1013 |
+
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1014 |
+
else:
|
| 1015 |
+
essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 1016 |
+
final_status = "\n".join(essential_logs).strip()
|
| 1017 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 1018 |
return (final_status, updated_list) + ui_updates
|
|
|
|
| 1019 |
|
|
|
|
| 1020 |
def remove_from_workbench(current_status: str, index_to_remove: int, current_workbench_list: List[Dict]) -> Tuple:
|
| 1021 |
log_messages = []
|
| 1022 |
safe_workbench_list = current_workbench_list or []
|
| 1023 |
+
if not (0 <= index_to_remove < len(safe_workbench_list)):
|
| 1024 |
+
final_status = current_status if ENABLE_LOGGING else ""
|
| 1025 |
+
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 1026 |
|
| 1027 |
item_to_remove = safe_workbench_list[index_to_remove]
|
| 1028 |
try:
|
| 1029 |
path_to_delete_wav = Path(item_to_remove['audio_path'])
|
| 1030 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 1031 |
|
|
|
|
| 1032 |
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir():
|
| 1033 |
path_to_delete_wav.unlink()
|
|
|
|
| 1034 |
if path_to_delete_mp3.exists():
|
| 1035 |
path_to_delete_mp3.unlink()
|
| 1036 |
log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除し、一時ファイル(WAV/MP3)をクリーンアップしました。")
|
|
|
|
| 1038 |
log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(ファイルは保持: {path_to_delete_wav.name})")
|
| 1039 |
else:
|
| 1040 |
log_messages.append(f"✅ キープからアイテム #{index_to_remove + 1} を削除しました。(関連ファイルなし)")
|
|
|
|
| 1041 |
except Exception as e: log_messages.append(f"⚠️ キープのアイテム #{index_to_remove + 1} のファイル削除中にエラー: {e}")
|
| 1042 |
|
| 1043 |
updated_list = [item for i, item in enumerate(safe_workbench_list) if i != index_to_remove]
|
| 1044 |
ui_updates = update_workbench_ui(updated_list)
|
| 1045 |
+
# ▼▼▼ 変更: ログ表示をENABLE_LOGGINGで制御 ▼▼▼
|
| 1046 |
+
if ENABLE_LOGGING:
|
| 1047 |
+
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1048 |
+
else:
|
| 1049 |
+
essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 1050 |
+
final_status = "\n".join(essential_logs).strip()
|
| 1051 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 1052 |
return (final_status, updated_list) + ui_updates
|
|
|
|
| 1053 |
|
|
|
|
| 1054 |
def action_merge_preview(current_status: str, first_audio_num: int, second_audio_num: int, pause_ms: int, workbench_list: List[Dict], progress=gr.Progress(track_tqdm=True)):
|
| 1055 |
log_messages = []
|
| 1056 |
+
def create_error_return():
|
| 1057 |
+
if ENABLE_LOGGING:
|
| 1058 |
+
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1059 |
+
else:
|
| 1060 |
+
essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 1061 |
+
final_status = "\n".join(essential_logs).strip()
|
| 1062 |
+
return (final_status, None, gr.update(value=None, visible=False), {})
|
| 1063 |
+
|
| 1064 |
if not workbench_list:
|
| 1065 |
log_messages.append("⚠️ [結合プレビュー警告] キープに音声がありません。")
|
| 1066 |
+
return create_error_return()
|
| 1067 |
idx1, idx2 = int(first_audio_num) - 1, int(second_audio_num) - 1
|
| 1068 |
if not (0 <= idx1 < len(workbench_list) and 0 <= idx2 < len(workbench_list)):
|
| 1069 |
log_messages.append(f"⚠️ [結合プレビュー警告] 指定された番号(#{first_audio_num}, #{second_audio_num})の音声が見つかりません。")
|
| 1070 |
+
return create_error_return()
|
| 1071 |
item1, item2 = workbench_list[idx1], workbench_list[idx2]
|
| 1072 |
audio_path1, audio_path2 = item1.get("audio_path"), item2.get("audio_path")
|
| 1073 |
if not audio_path1 or not Path(audio_path1).exists() or not audio_path2 or not Path(audio_path2).exists():
|
| 1074 |
log_messages.append("❌ [結合プレビューエラー] 音声��ァイル(WAV)が見つかりません。ファイルが削除された可能性があります。")
|
| 1075 |
+
return create_error_return()
|
| 1076 |
|
| 1077 |
progress(0, desc="結合準備中...")
|
| 1078 |
try:
|
|
|
|
| 1080 |
pause_duration = int(pause_ms)
|
| 1081 |
if pause_duration >= 0:
|
| 1082 |
combined_audio = segment1 + AudioSegment.silent(duration=pause_duration) + segment2
|
| 1083 |
+
if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num} と #{second_audio_num} を {pause_duration}ms のポーズを挟んで結合しました。")
|
| 1084 |
else:
|
| 1085 |
overlap_duration = abs(pause_duration)
|
| 1086 |
max_possible_overlap = min(len(segment1), len(segment2))
|
|
|
|
| 1089 |
overlap_duration = max_possible_overlap
|
| 1090 |
combined_audio = AudioSegment.silent(duration=len(segment1) + len(segment2) - overlap_duration)
|
| 1091 |
combined_audio = combined_audio.overlay(segment1, position=0).overlay(segment2, position=len(segment1) - overlap_duration)
|
| 1092 |
+
if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num} と #{second_audio_num} を {overlap_duration}ms 重ねて(オーバーレイして)結合しました。")
|
| 1093 |
progress(1, desc="結合完了")
|
| 1094 |
except Exception as e:
|
| 1095 |
log_messages.append(f"❌ [結合プレビューエラー] 音声の結合中にエラーが発生しました: {e}")
|
| 1096 |
+
return create_error_return()
|
| 1097 |
|
| 1098 |
base_filename = f"merged_preview_{uuid.uuid4().hex[:8]}"
|
| 1099 |
temp_dir = Path(tempfile.gettempdir())
|
|
|
|
| 1108 |
original_models1, original_models2 = item1.get('original_models', []), item2.get('original_models', [])
|
| 1109 |
all_original_models = set(original_models1 + original_models2)
|
| 1110 |
|
|
|
|
| 1111 |
metadata = {"text": f"{item1.get('text', '')} | {item2.get('text', '')}", "display_models": sorted(list(all_display_models)), "original_models": sorted(list(all_original_models)), "audio_path": str(wav_temp_path), "timestamp": datetime.datetime.now(JST).isoformat()}
|
| 1112 |
log_messages.append("✅ 結合プレビューが生成されました。")
|
|
|
|
| 1113 |
|
| 1114 |
+
if ENABLE_LOGGING:
|
| 1115 |
+
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1116 |
+
else:
|
| 1117 |
+
essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 1118 |
+
final_status = "\n".join(essential_logs).strip()
|
| 1119 |
+
|
| 1120 |
return final_status, str(mp3_temp_path), gr.update(value=str(wav_temp_path), visible=True), metadata
|
|
|
|
| 1121 |
|
|
|
|
| 1122 |
def action_add_merged_to_workbench(current_status: str, preview_data: Dict, current_workbench_list: List[Dict], delete_originals: bool, first_audio_num: int, second_audio_num: int) -> Tuple:
|
| 1123 |
log_messages = []
|
| 1124 |
safe_workbench_list = current_workbench_list or []
|
| 1125 |
+
|
| 1126 |
+
def create_error_return():
|
| 1127 |
+
if ENABLE_LOGGING:
|
| 1128 |
+
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1129 |
+
else:
|
| 1130 |
+
essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 1131 |
+
final_status = "\n".join(essential_logs).strip()
|
| 1132 |
+
return (final_status, safe_workbench_list) + update_workbench_ui(safe_workbench_list)
|
| 1133 |
+
|
| 1134 |
if not preview_data or "audio_path" not in preview_data:
|
| 1135 |
log_messages.append("⚠️ [キープ追加エラー] 追加する結合済み音声がありません。先にプレビューを生成してください。")
|
| 1136 |
+
return create_error_return()
|
|
|
|
| 1137 |
|
|
|
|
| 1138 |
src_path = Path(preview_data["audio_path"])
|
| 1139 |
if not src_path.exists():
|
| 1140 |
log_messages.append("⚠️ [キープ追加エラー] 結合済み音声ファイルが見つかりません。")
|
| 1141 |
+
return create_error_return()
|
|
|
|
| 1142 |
|
| 1143 |
new_merged_item = {"audio_path": str(src_path), "text": preview_data.get("text", "N/A"), "model": " | ".join(preview_data.get("display_models", [])), "original_models": preview_data.get("original_models", []), "style": "N/A", "style_weight": 0.0, "timestamp": preview_data.get("timestamp"), "is_merged": True}
|
| 1144 |
final_workbench_list = []
|
|
|
|
| 1154 |
try:
|
| 1155 |
path_to_delete_wav = Path(item_to_remove['audio_path'])
|
| 1156 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 1157 |
+
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
|
| 1158 |
+
if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
|
|
|
|
|
|
|
| 1159 |
except Exception as e: log_messages.append(f"⚠️ 元の音声ファイル削除中にエラー: {e}")
|
| 1160 |
|
| 1161 |
final_workbench_list = [new_merged_item] + remaining_list
|
|
|
|
| 1171 |
path_to_delete_mp3 = path_to_delete_wav.with_suffix('.mp3')
|
| 1172 |
if path_to_delete_wav.exists() and str(path_to_delete_wav.parent) == tempfile.gettempdir(): path_to_delete_wav.unlink()
|
| 1173 |
if path_to_delete_mp3.exists() and str(path_to_delete_mp3.parent) == tempfile.gettempdir(): path_to_delete_mp3.unlink()
|
| 1174 |
+
except Exception as e:
|
| 1175 |
+
if ENABLE_LOGGING:
|
| 1176 |
+
print(f"Warning: Failed to delete old workbench audio file: {e}")
|
| 1177 |
log_messages.append(f"ℹ️ キープが最大数({MAX_WORKBENCH_ITEMS})に達したため一番古いアイテムを削除しました。")
|
| 1178 |
|
| 1179 |
ui_updates = update_workbench_ui(final_workbench_list)
|
| 1180 |
+
|
| 1181 |
+
if ENABLE_LOGGING:
|
| 1182 |
+
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
| 1183 |
+
else:
|
| 1184 |
+
essential_logs = [log for log in log_messages if any(prefix in log for prefix in ["✅", "❌", "⚠️", "ℹ️"])]
|
| 1185 |
+
final_status = "\n".join(essential_logs).strip()
|
| 1186 |
+
|
| 1187 |
return (final_status, final_workbench_list) + ui_updates
|
|
|
|
| 1188 |
|
| 1189 |
|
| 1190 |
+
# --- イベントリスナー接続 (変更なし) ---
|
| 1191 |
def on_fn_mode_change(is_fn_mode_on: bool) -> gr.Checkbox:
|
| 1192 |
if is_fn_mode_on: return gr.update(value=False)
|
| 1193 |
return gr.update()
|
|
|
|
| 1207 |
current_styles_dropdown.change(on_style_dropdown_select, inputs=[current_styles_dropdown, all_styles_data_state], outputs=[style_weight_for_synth_slider])
|
| 1208 |
use_assist_text_checkbox.change(lambda x: (gr.update(visible=x), gr.update(visible=x)), inputs=[use_assist_text_checkbox], outputs=[assist_text_textbox, assist_text_weight_slider])
|
| 1209 |
|
|
|
|
| 1210 |
generate_outputs = [status_textbox, audio_output_area]
|
| 1211 |
for i in range(MAX_AUDIO_OUTPUTS):
|
| 1212 |
generate_outputs.extend([audio_item_columns[i], audio_outputs[i], download_buttons[i]])
|
| 1213 |
generate_outputs.extend(dummy_audio_item_columns)
|
| 1214 |
generate_outputs.extend(synthesized_text_states)
|
| 1215 |
+
generate_outputs.append(synthesized_wav_files_state)
|
|
|
|
| 1216 |
|
| 1217 |
generate_button.click(
|
| 1218 |
fn=action_run_synthesis,
|
|
|
|
| 1231 |
outputs=generate_outputs
|
| 1232 |
)
|
| 1233 |
|
|
|
|
| 1234 |
for i in range(MAX_AUDIO_OUTPUTS):
|
| 1235 |
to_workbench_buttons[i].click(
|
| 1236 |
fn=lambda current_status, workbench_list, text, model, style_display, style_weight, all_wavs, idx=i: \
|
| 1237 |
add_to_workbench(
|
| 1238 |
current_status, workbench_list,
|
| 1239 |
+
all_wavs[idx] if all_wavs and idx < len(all_wavs) else None,
|
| 1240 |
text, model, style_display, style_weight
|
| 1241 |
),
|
| 1242 |
inputs=[
|
| 1243 |
status_textbox, workbench_state, synthesized_text_states[i],
|
| 1244 |
selected_model_dropdown, current_styles_dropdown, style_weight_for_synth_slider,
|
| 1245 |
+
synthesized_wav_files_state
|
| 1246 |
],
|
| 1247 |
outputs=[status_textbox, workbench_state] + all_workbench_ui_components
|
| 1248 |
)
|
|
|
|
| 1249 |
|
| 1250 |
for i, item in enumerate(workbench_items):
|
| 1251 |
item["delete_btn"].click(
|
|
|
|
| 1289 |
|
| 1290 |
merger_cache_path = Path("/tmp/sbv2_merger_cache")
|
| 1291 |
mock_model_holder = TTSModelHolder()
|
| 1292 |
+
# ▼▼▼ 変更: printをENABLE_LOGGINGで制御 ▼▼▼
|
| 1293 |
+
if ENABLE_LOGGING:
|
| 1294 |
+
print(f"Initial models loaded by TTSModelHolder: {mock_model_holder.model_names}")
|
| 1295 |
+
# ▲▲▲ 変更 ▲▲▲
|
| 1296 |
|
| 1297 |
app = create_synthesis_app(mock_model_holder)
|
| 1298 |
|