rufflet17 commited on
Commit
b4d4d0b
·
verified ·
1 Parent(s): 521c324

Update gradio_tabs/single.py

Browse files
Files changed (1) hide show
  1. gradio_tabs/single.py +59 -26
gradio_tabs/single.py CHANGED
@@ -28,7 +28,6 @@ from typing import Dict, Any, List, Tuple, Optional, Set
28
  # Falseにすると、エラーや重要な通知以外のログは抑制されます。
29
  ENABLE_LOGGING = False
30
 
31
- # (TTSModelHolder, MockTTSModelなどのモックやヘルパー関数は変更なしのため省略します)
32
  # --- タイムゾーン定義 ---
33
  # グローバルな定数としてJSTを定義
34
  JST = timezone(timedelta(hours=9), 'JST')
@@ -71,7 +70,7 @@ class TTSModelHolder:
71
  json.dump(config2, f, indent=2)
72
  style_settings_data = {
73
  "styles": {
74
- "Neutral": { "display_name": "Neutral", "weight": 1.0 },
75
  "1": { "display_name": "クール", "weight": 0.8 },
76
  "2": { "display_name": "可愛い", "weight": 1.2 },
77
  }
@@ -252,7 +251,7 @@ def format_bytes(size_bytes: int) -> str:
252
  return f"{s} {size_name[i]}"
253
 
254
 
255
- # --- (pyopenjtalk関連ヘルパー関数は変更なしのため省略) ---
256
  JIRITSUGO_POS = ["名詞", "動詞", "形容詞", "副詞", "連体詞", "接続詞", "感動詞", "接頭詞"]
257
  def is_jirisugo(morpheme):
258
  if morpheme['pos'] == '記号': return False
@@ -525,13 +524,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
525
  MAX_WORKBENCH_ITEMS = 8
526
 
527
  all_styles_data_state = gr.State({})
528
- # ▼▼▼ 変更 ▼▼▼
529
  # 生成された音声ごとのパラメータを保持するStateを追加
530
  synthesized_wav_files_state = gr.State([])
531
  synthesized_model_names_state = gr.State([])
532
  synthesized_style_names_state = gr.State([])
533
  synthesized_style_weights_state = gr.State([])
534
- # ▲▲▲ 変更 ▲▲▲
535
  workbench_state = gr.State([])
536
  merged_preview_state = gr.State({})
537
 
@@ -582,8 +579,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
582
  with gr.Column(scale=3):
583
  # infoに文字数制限を追記
584
  text_input = gr.TextArea(
585
- label="読み上げたいテキスト", lines=3, placeholder="ここにテキストを入力",
586
- value="こんにちは、今日もいい天気ですね。", interactive=True,
587
  info=f"最大{MAX_TEXT_LENGTH}文字まで。使用できない文字: {INVALID_FILENAME_CHARS_FOR_DISPLAY}"
588
  )
589
  generate_button = gr.Button("音声合成実行", variant="primary", interactive=True)
@@ -650,7 +647,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
650
  random_text_ratio_textbox = gr.Textbox(label="カタカナ化の割合", value="0.2, 0.4, 0.6, 0.8, 1", info="カンマ区切りで複数指定可。指定値からランダムに1つ使用。", interactive=True)
651
 
652
  with gr.Tab("キープ"):
653
- gr.Markdown("## キープ\n読み上げタブで生成した音声をここにストックし、結合や保存ができます。最大6個まで保持できます。")
654
  workbench_items = []
655
  all_workbench_ui_components = []
656
  with gr.Row(variant="panel"):
@@ -831,13 +828,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
831
  error_outputs.append(gr.update(visible=False))
832
  for _ in range(MAX_AUDIO_OUTPUTS):
833
  error_outputs.append("")
834
- # ▼▼▼ 変更 ▼▼▼
835
  # エラー時に返す空リストを、追加したStateの分だけ増やす
836
  error_outputs.append([]) # for synthesized_wav_files_state
837
  error_outputs.append([]) # for synthesized_model_names_state
838
  error_outputs.append([]) # for synthesized_style_names_state
839
  error_outputs.append([]) # for synthesized_style_weights_state
840
- # ▲▲▲ 変更 ▲▲▲
841
 
842
 
843
  if re.search(INVALID_FILENAME_CHARS_PATTERN, text):
@@ -880,7 +875,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
880
  batch_count = int(batch_count)
881
  if batch_count <= 0: batch_count = 1
882
 
883
- # ▼▼▼ 変更 ▼▼▼
884
  # 生成パラメータを保持するリストを初期化
885
  final_wav_paths = []
886
  final_mp3_paths = []
@@ -888,7 +882,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
888
  generated_model_names = []
889
  generated_style_names = []
890
  generated_style_weights = []
891
- # ▲▲▲ 変更 ▲▲▲
892
 
893
  def save_audio_files(audio_segment: AudioSegment, base_filename: str) -> Optional[Tuple[str, str]]:
894
  try:
@@ -959,7 +952,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
959
  base_filename = f"{sanitized_model_name}-{sanitized_style_name}-{style_weight_str}-{text_for_filename}"
960
 
961
  saved_paths = save_audio_files(audio_segment, base_filename)
962
- # ▼▼▼ 変更 ▼▼▼
963
  # 音声保存成功時に、生成パラメータをリストに記録
964
  if saved_paths:
965
  final_wav_paths.append(saved_paths[0])
@@ -968,7 +960,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
968
  generated_model_names.append(model_name)
969
  generated_style_names.append(style_display_name)
970
  generated_style_weights.append(style_weight_for_synth)
971
- # ▲▲▲ 変更 ▲▲▲
972
 
973
  if len(final_wav_paths) == 0:
974
  all_logs.append("ℹ️ 音声は生成されませんでした。")
@@ -979,11 +970,60 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
979
  start_seed = int(seed)
980
  for i in progress.tqdm(range(batch_count), desc=f"{batch_count}件の音声を生成中"):
981
  current_seed = start_seed + i if start_seed >= 0 else -1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
982
  if ENABLE_LOGGING:
983
  all_logs.append(f"--- 生成 {i+1}/{batch_count} (Seed: {current_seed if current_seed >= 0 else 'Random'}) ---")
984
- all_logs.append(f" ┗ 合成テキスト: \"{text[:50]}{'...' if len(text)>50 else ''}\"")
985
-
986
- success, logs, audio_tuple = process_single_synthesis_webui(model_holder, model_name, actual_model_file_to_load, text, lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth, current_seed, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation, use_assist, assist_text or None, assist_w)
 
 
 
 
 
 
 
 
 
 
987
 
988
  all_logs.extend([f" {log}" for log in logs])
989
 
@@ -993,20 +1033,19 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
993
  sanitized_model_name = sanitize_filename(model_name)
994
  sanitized_style_name = sanitize_filename(style_display_name)
995
  style_weight_str = f"{style_weight_for_synth:.1f}".replace('.', '.')
 
996
  text_for_filename = sanitize_filename(text[:30]) if text else "no-text"
997
  base_filename = f"{sanitized_model_name}-{sanitized_style_name}-{style_weight_str}-{text_for_filename}"
998
 
999
  saved_paths = save_audio_files(audio_segment, base_filename)
1000
- # ▼▼▼ 変更 ▼▼▼
1001
  # 音声保存成功時に、生成パラメータをリストに記録
1002
  if saved_paths:
1003
  final_wav_paths.append(saved_paths[0])
1004
  final_mp3_paths.append(saved_paths[1])
1005
- generated_texts.append(text)
1006
  generated_model_names.append(model_name)
1007
  generated_style_names.append(style_display_name)
1008
  generated_style_weights.append(style_weight_for_synth)
1009
- # ▲▲▲ 変更 ▲▲▲
1010
 
1011
  num_generated = len(final_wav_paths)
1012
  if num_generated > 0:
@@ -1043,13 +1082,11 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1043
  text_val = generated_texts[i] if i < num_generated else ""
1044
  final_outputs.append(text_val)
1045
 
1046
- # ▼▼▼ 変更 ▼▼▼
1047
  # 関数の戻り値に、生成パラメータのリストを追加
1048
  final_outputs.append(final_wav_paths)
1049
  final_outputs.append(generated_model_names)
1050
  final_outputs.append(generated_style_names)
1051
  final_outputs.append(generated_style_weights)
1052
- # ▲▲▲ 変更 ▲▲▲
1053
  return tuple(final_outputs)
1054
 
1055
  def add_to_workbench(
@@ -1326,7 +1363,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1326
  current_styles_dropdown.change(on_style_dropdown_select, inputs=[current_styles_dropdown, all_styles_data_state], outputs=[style_weight_for_synth_slider])
1327
  use_assist_text_checkbox.change(lambda x: (gr.update(visible=x), gr.update(visible=x)), inputs=[use_assist_text_checkbox], outputs=[assist_text_textbox, assist_text_weight_slider])
1328
 
1329
- # ▼▼▼ 変更 ▼▼▼
1330
  # generate_buttonのoutputsに、追加したStateを追加
1331
  generate_outputs = [status_textbox, audio_output_area]
1332
  for i in range(MAX_AUDIO_OUTPUTS):
@@ -1337,7 +1373,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1337
  generate_outputs.append(synthesized_model_names_state)
1338
  generate_outputs.append(synthesized_style_names_state)
1339
  generate_outputs.append(synthesized_style_weights_state)
1340
- # ▲▲▲ 変更 ▲▲▲
1341
 
1342
  generate_button.click(
1343
  fn=action_run_synthesis,
@@ -1356,7 +1391,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1356
  outputs=generate_outputs
1357
  )
1358
 
1359
- # ▼▼▼ 変更 ▼▼▼
1360
  # 「キープ」ボタンのクリックイベントを修正。
1361
  # UIのドロップダウンからではなく、Stateに保持された生成時のパラメータを使用する。
1362
  for i in range(MAX_AUDIO_OUTPUTS):
@@ -1379,7 +1413,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
1379
  ],
1380
  outputs=[status_textbox, workbench_state] + all_workbench_ui_components
1381
  )
1382
- # ▲▲▲ 変更 ▲▲▲
1383
 
1384
  for i, item in enumerate(workbench_items):
1385
  item["delete_btn"].click(
 
28
  # Falseにすると、エラーや重要な通知以外のログは抑制されます。
29
  ENABLE_LOGGING = False
30
 
 
31
  # --- タイムゾーン定義 ---
32
  # グローバルな定数としてJSTを定義
33
  JST = timezone(timedelta(hours=9), 'JST')
 
70
  json.dump(config2, f, indent=2)
71
  style_settings_data = {
72
  "styles": {
73
+ "0": { "display_name": "Neutral", "weight": 1.0 },
74
  "1": { "display_name": "クール", "weight": 0.8 },
75
  "2": { "display_name": "可愛い", "weight": 1.2 },
76
  }
 
251
  return f"{s} {size_name[i]}"
252
 
253
 
254
+ # --- pyopenjtalk関連ヘルパー関数 ---
255
  JIRITSUGO_POS = ["名詞", "動詞", "形容詞", "副詞", "連体詞", "接続詞", "感動詞", "接頭詞"]
256
  def is_jirisugo(morpheme):
257
  if morpheme['pos'] == '記号': return False
 
524
  MAX_WORKBENCH_ITEMS = 8
525
 
526
  all_styles_data_state = gr.State({})
 
527
  # 生成された音声ごとのパラメータを保持するStateを追加
528
  synthesized_wav_files_state = gr.State([])
529
  synthesized_model_names_state = gr.State([])
530
  synthesized_style_names_state = gr.State([])
531
  synthesized_style_weights_state = gr.State([])
 
532
  workbench_state = gr.State([])
533
  merged_preview_state = gr.State({})
534
 
 
579
  with gr.Column(scale=3):
580
  # infoに文字数制限を追記
581
  text_input = gr.TextArea(
582
+ label="読み上げたいテキスト", lines=3, placeholder="ここにテキストを入力\n発音ガチャ1モードで [この部分だけ] 発音ガチャ2の変換を適用できます。",
583
+ value="こんにちは、[今日もいい天気ですね。]", interactive=True,
584
  info=f"最大{MAX_TEXT_LENGTH}文字まで。使用できない文字: {INVALID_FILENAME_CHARS_FOR_DISPLAY}"
585
  )
586
  generate_button = gr.Button("音声合成実行", variant="primary", interactive=True)
 
647
  random_text_ratio_textbox = gr.Textbox(label="カタカナ化の割合", value="0.2, 0.4, 0.6, 0.8, 1", info="カンマ区切りで複数指定可。指定値からランダムに1つ使用。", interactive=True)
648
 
649
  with gr.Tab("キープ"):
650
+ gr.Markdown("## キープ\n読み上げタブで生成した音声をここにストックし、結合や保存ができます。最大8個まで保持できます。")
651
  workbench_items = []
652
  all_workbench_ui_components = []
653
  with gr.Row(variant="panel"):
 
828
  error_outputs.append(gr.update(visible=False))
829
  for _ in range(MAX_AUDIO_OUTPUTS):
830
  error_outputs.append("")
 
831
  # エラー時に返す空リストを、追加したStateの分だけ増やす
832
  error_outputs.append([]) # for synthesized_wav_files_state
833
  error_outputs.append([]) # for synthesized_model_names_state
834
  error_outputs.append([]) # for synthesized_style_names_state
835
  error_outputs.append([]) # for synthesized_style_weights_state
 
836
 
837
 
838
  if re.search(INVALID_FILENAME_CHARS_PATTERN, text):
 
875
  batch_count = int(batch_count)
876
  if batch_count <= 0: batch_count = 1
877
 
 
878
  # 生成パラメータを保持するリストを初期化
879
  final_wav_paths = []
880
  final_mp3_paths = []
 
882
  generated_model_names = []
883
  generated_style_names = []
884
  generated_style_weights = []
 
885
 
886
  def save_audio_files(audio_segment: AudioSegment, base_filename: str) -> Optional[Tuple[str, str]]:
887
  try:
 
952
  base_filename = f"{sanitized_model_name}-{sanitized_style_name}-{style_weight_str}-{text_for_filename}"
953
 
954
  saved_paths = save_audio_files(audio_segment, base_filename)
 
955
  # 音声保存成功時に、生成パラメータをリストに記録
956
  if saved_paths:
957
  final_wav_paths.append(saved_paths[0])
 
960
  generated_model_names.append(model_name)
961
  generated_style_names.append(style_display_name)
962
  generated_style_weights.append(style_weight_for_synth)
 
963
 
964
  if len(final_wav_paths) == 0:
965
  all_logs.append("ℹ️ 音声は生成されませんでした。")
 
970
  start_seed = int(seed)
971
  for i in progress.tqdm(range(batch_count), desc=f"{batch_count}件の音声を生成中"):
972
  current_seed = start_seed + i if start_seed >= 0 else -1
973
+
974
+ # ▼▼▼ 変更点 ▼▼▼
975
+ # 合成用のテキストを準備
976
+ text_to_synthesize = text
977
+ bracket_pattern = re.compile(r'\[([^\[\]]+)\]')
978
+
979
+ # テキストに [] が含まれている場合、その部分だけを発音ガチャ2のロジックで変換
980
+ if bracket_pattern.search(text):
981
+ if ENABLE_LOGGING:
982
+ all_logs.append(f" ┠ 発音ガチャ1の特殊モードを検出: `[]` 内を変換します。")
983
+
984
+ try:
985
+ ratio_list = [float(x.strip()) for x in random_text_ratio_str.split(',') if x.strip()]
986
+ if not ratio_list: ratio_list = [0.5]
987
+ except ValueError:
988
+ ratio_list = [0.5]
989
+ internal_mode = int(random_text_mode) + 1
990
+
991
+ parts = bracket_pattern.split(text)
992
+ final_text_parts = []
993
+ log_parts = []
994
+
995
+ for j, part in enumerate(parts):
996
+ # jが奇数番目の要素が[]の中身
997
+ if j % 2 == 1:
998
+ original_part = part
999
+ transformed_blocks = generate_one_variation(original_part, internal_mode, random.choice(ratio_list))
1000
+ transformed_part = "".join(transformed_blocks)
1001
+ final_text_parts.append(transformed_part)
1002
+ log_parts.append(f"「{original_part}」->「{transformed_part}」")
1003
+ else:
1004
+ final_text_parts.append(part)
1005
+
1006
+ text_to_synthesize = "".join(final_text_parts)
1007
+
1008
+ if ENABLE_LOGGING and log_parts:
1009
+ all_logs.append(f" ┠ 変換ログ: {', '.join(log_parts)}")
1010
+ # ▲▲▲ 変更点ここまで ▲▲▲
1011
+
1012
  if ENABLE_LOGGING:
1013
  all_logs.append(f"--- 生成 {i+1}/{batch_count} (Seed: {current_seed if current_seed >= 0 else 'Random'}) ---")
1014
+ if text_to_synthesize != text:
1015
+ all_logs.append(f" ┠ 元テキスト: \"{text[:50]}{'...' if len(text)>50 else ''}\"")
1016
+ all_logs.append(f" ┗ 合成テキスト: \"{text_to_synthesize[:50]}{'...' if len(text_to_synthesize)>50 else ''}\"")
1017
+ else:
1018
+ all_logs.append(f" ┗ 合成テキスト: \"{text_to_synthesize[:50]}{'...' if len(text_to_synthesize)>50 else ''}\"")
1019
+
1020
+ success, logs, audio_tuple = process_single_synthesis_webui(
1021
+ model_holder, model_name, actual_model_file_to_load,
1022
+ text_to_synthesize, # 変換後のテキストを使用
1023
+ lang, speaker or None, internal_style_key, style_display_name, style_weight_for_synth,
1024
+ current_seed, ref_audio or None, length, noise, noise_w, sdp_r, pitch, intonation,
1025
+ use_assist, assist_text or None, assist_w
1026
+ )
1027
 
1028
  all_logs.extend([f" {log}" for log in logs])
1029
 
 
1033
  sanitized_model_name = sanitize_filename(model_name)
1034
  sanitized_style_name = sanitize_filename(style_display_name)
1035
  style_weight_str = f"{style_weight_for_synth:.1f}".replace('.', '.')
1036
+ # ファイル名は変換前の元のテキストを使用
1037
  text_for_filename = sanitize_filename(text[:30]) if text else "no-text"
1038
  base_filename = f"{sanitized_model_name}-{sanitized_style_name}-{style_weight_str}-{text_for_filename}"
1039
 
1040
  saved_paths = save_audio_files(audio_segment, base_filename)
 
1041
  # 音声保存成功時に、生成パラメータをリストに記録
1042
  if saved_paths:
1043
  final_wav_paths.append(saved_paths[0])
1044
  final_mp3_paths.append(saved_paths[1])
1045
+ generated_texts.append(text) # ここも元のテキストを保存
1046
  generated_model_names.append(model_name)
1047
  generated_style_names.append(style_display_name)
1048
  generated_style_weights.append(style_weight_for_synth)
 
1049
 
1050
  num_generated = len(final_wav_paths)
1051
  if num_generated > 0:
 
1082
  text_val = generated_texts[i] if i < num_generated else ""
1083
  final_outputs.append(text_val)
1084
 
 
1085
  # 関数の戻り値に、生成パラメータのリストを追加
1086
  final_outputs.append(final_wav_paths)
1087
  final_outputs.append(generated_model_names)
1088
  final_outputs.append(generated_style_names)
1089
  final_outputs.append(generated_style_weights)
 
1090
  return tuple(final_outputs)
1091
 
1092
  def add_to_workbench(
 
1363
  current_styles_dropdown.change(on_style_dropdown_select, inputs=[current_styles_dropdown, all_styles_data_state], outputs=[style_weight_for_synth_slider])
1364
  use_assist_text_checkbox.change(lambda x: (gr.update(visible=x), gr.update(visible=x)), inputs=[use_assist_text_checkbox], outputs=[assist_text_textbox, assist_text_weight_slider])
1365
 
 
1366
  # generate_buttonのoutputsに、追加したStateを追加
1367
  generate_outputs = [status_textbox, audio_output_area]
1368
  for i in range(MAX_AUDIO_OUTPUTS):
 
1373
  generate_outputs.append(synthesized_model_names_state)
1374
  generate_outputs.append(synthesized_style_names_state)
1375
  generate_outputs.append(synthesized_style_weights_state)
 
1376
 
1377
  generate_button.click(
1378
  fn=action_run_synthesis,
 
1391
  outputs=generate_outputs
1392
  )
1393
 
 
1394
  # 「キープ」ボタンのクリックイベントを修正。
1395
  # UIのドロップダウンからではなく、Stateに保持された生成時のパラメータを使用する。
1396
  for i in range(MAX_AUDIO_OUTPUTS):
 
1413
  ],
1414
  outputs=[status_textbox, workbench_state] + all_workbench_ui_components
1415
  )
 
1416
 
1417
  for i, item in enumerate(workbench_items):
1418
  item["delete_btn"].click(