aaron
commited on
Commit
·
771366a
1
Parent(s):
d9f37c5
UI 텍스트를 영어로 변경 (국제화)
Browse files- 메인 제목 및 설명을 영어로 번역
- TTS 설정 섹션 라벨 영어화
- 음성 변환 설정 섹션 라벨 영어화
- 버튼 및 출력 라벨 영어화
- 처리 과정 및 팁 설명 영어화
- 기본 텍스트 예시를 영어로 변경
- 국제 사용자 접근성 향상
app.py
CHANGED
@@ -811,98 +811,98 @@ except Exception as e:
|
|
811 |
# Create Gradio interface
|
812 |
with gr.Blocks(title="Integrated TTS + Voice Conversion", analytics_enabled=False) as demo:
|
813 |
gr.Markdown("""
|
814 |
-
# **Integrated TTS + Voice Conversion** —
|
815 |
|
816 |
-
|
817 |
|
818 |
-
|
819 |
-
1.
|
820 |
-
2.
|
821 |
-
3.
|
822 |
-
4.
|
823 |
-
5.
|
824 |
""")
|
825 |
|
826 |
with gr.Row():
|
827 |
with gr.Column(scale=6):
|
828 |
# TTS Parameters
|
829 |
-
gr.Markdown("### 🎤
|
830 |
text_input = gr.Textbox(
|
831 |
-
label="
|
832 |
-
value="
|
833 |
lines=3
|
834 |
)
|
835 |
style_input = gr.Dropdown(
|
836 |
-
label="
|
837 |
choices=styles,
|
838 |
value=styles[0]
|
839 |
)
|
840 |
speed_input = gr.Slider(
|
841 |
0.6, 1.4, value=1.0, step=0.05,
|
842 |
-
label="
|
843 |
)
|
844 |
reference_audio_input = gr.Audio(
|
845 |
-
label="
|
846 |
sources=["upload", "microphone"],
|
847 |
type="filepath"
|
848 |
)
|
849 |
|
850 |
# Voice Conversion Parameters
|
851 |
-
gr.Markdown("### 🔄
|
852 |
with gr.Row():
|
853 |
vc_diffusion_steps = gr.Slider(
|
854 |
minimum=1, maximum=200, value=25, step=1,
|
855 |
-
label="
|
856 |
-
info="25
|
857 |
)
|
858 |
vc_length_adjust = gr.Slider(
|
859 |
minimum=0.5, maximum=2.0, step=0.1, value=1.0,
|
860 |
-
label="
|
861 |
-
info="<1.0
|
862 |
)
|
863 |
|
864 |
with gr.Row():
|
865 |
vc_inference_cfg_rate = gr.Slider(
|
866 |
minimum=0.0, maximum=1.0, step=0.1, value=0.7,
|
867 |
-
label="CFG
|
868 |
-
info="
|
869 |
)
|
870 |
vc_pitch_shift = gr.Slider(
|
871 |
minimum=-24, maximum=24, step=1, value=0,
|
872 |
-
label="
|
873 |
-
info="
|
874 |
)
|
875 |
|
876 |
with gr.Row():
|
877 |
vc_f0_condition = gr.Checkbox(
|
878 |
-
label="F0
|
879 |
value=False,
|
880 |
-
info="
|
881 |
)
|
882 |
vc_auto_f0_adjust = gr.Checkbox(
|
883 |
-
label="
|
884 |
value=True,
|
885 |
-
info="
|
886 |
)
|
887 |
|
888 |
-
convert_btn = gr.Button("
|
889 |
|
890 |
with gr.Column(scale=6):
|
891 |
output_audio = gr.Audio(
|
892 |
-
label="
|
893 |
autoplay=True,
|
894 |
format="wav"
|
895 |
)
|
896 |
|
897 |
gr.Markdown("""
|
898 |
-
### 📋
|
899 |
-
1.
|
900 |
-
2.
|
901 |
|
902 |
-
### 💡
|
903 |
-
-
|
904 |
-
-
|
905 |
-
-
|
906 |
""")
|
907 |
|
908 |
# Connect the button click to the processing function
|
|
|
811 |
# Create Gradio interface
|
812 |
with gr.Blocks(title="Integrated TTS + Voice Conversion", analytics_enabled=False) as demo:
|
813 |
gr.Markdown("""
|
814 |
+
# **Integrated TTS + Voice Conversion** — Convert text to speech and then apply voice conversion
|
815 |
|
816 |
+
Enter text and upload a reference audio to first convert text to speech, then apply voice conversion to match the reference style.
|
817 |
|
818 |
+
**How to use:**
|
819 |
+
1. Enter the text you want to convert
|
820 |
+
2. Upload a reference audio (3-10 seconds recommended)
|
821 |
+
3. Select the base voice style and speed
|
822 |
+
4. Adjust voice conversion parameters
|
823 |
+
5. Click the "Convert" button
|
824 |
""")
|
825 |
|
826 |
with gr.Row():
|
827 |
with gr.Column(scale=6):
|
828 |
# TTS Parameters
|
829 |
+
gr.Markdown("### 🎤 Text-to-Speech Settings")
|
830 |
text_input = gr.Textbox(
|
831 |
+
label="Text to Convert",
|
832 |
+
value="Hello! This is an integrated TTS and voice conversion demo.",
|
833 |
lines=3
|
834 |
)
|
835 |
style_input = gr.Dropdown(
|
836 |
+
label="Base Voice Style",
|
837 |
choices=styles,
|
838 |
value=styles[0]
|
839 |
)
|
840 |
speed_input = gr.Slider(
|
841 |
0.6, 1.4, value=1.0, step=0.05,
|
842 |
+
label="Speech Speed (×)"
|
843 |
)
|
844 |
reference_audio_input = gr.Audio(
|
845 |
+
label="Reference Audio",
|
846 |
sources=["upload", "microphone"],
|
847 |
type="filepath"
|
848 |
)
|
849 |
|
850 |
# Voice Conversion Parameters
|
851 |
+
gr.Markdown("### 🔄 Voice Conversion Settings")
|
852 |
with gr.Row():
|
853 |
vc_diffusion_steps = gr.Slider(
|
854 |
minimum=1, maximum=200, value=25, step=1,
|
855 |
+
label="Diffusion Steps",
|
856 |
+
info="25 default, 50~100 for best quality"
|
857 |
)
|
858 |
vc_length_adjust = gr.Slider(
|
859 |
minimum=0.5, maximum=2.0, step=0.1, value=1.0,
|
860 |
+
label="Length Adjustment",
|
861 |
+
info="<1.0 faster, >1.0 slower"
|
862 |
)
|
863 |
|
864 |
with gr.Row():
|
865 |
vc_inference_cfg_rate = gr.Slider(
|
866 |
minimum=0.0, maximum=1.0, step=0.1, value=0.7,
|
867 |
+
label="CFG Rate",
|
868 |
+
info="Subtle influence"
|
869 |
)
|
870 |
vc_pitch_shift = gr.Slider(
|
871 |
minimum=-24, maximum=24, step=1, value=0,
|
872 |
+
label="Pitch Shift",
|
873 |
+
info="In semitones"
|
874 |
)
|
875 |
|
876 |
with gr.Row():
|
877 |
vc_f0_condition = gr.Checkbox(
|
878 |
+
label="Use F0 Conditioned Model",
|
879 |
value=False,
|
880 |
+
info="Required for singing voice conversion"
|
881 |
)
|
882 |
vc_auto_f0_adjust = gr.Checkbox(
|
883 |
+
label="Auto F0 Adjustment",
|
884 |
value=True,
|
885 |
+
info="Adjust F0 to match target voice"
|
886 |
)
|
887 |
|
888 |
+
convert_btn = gr.Button("Convert", variant="primary", size="lg")
|
889 |
|
890 |
with gr.Column(scale=6):
|
891 |
output_audio = gr.Audio(
|
892 |
+
label="Final Converted Audio",
|
893 |
autoplay=True,
|
894 |
format="wav"
|
895 |
)
|
896 |
|
897 |
gr.Markdown("""
|
898 |
+
### 📋 Processing Steps:
|
899 |
+
1. **Text → Speech**: Input text is converted to speech with the reference voice tone
|
900 |
+
2. **Voice Conversion**: Generated speech is converted to match the reference voice style
|
901 |
|
902 |
+
### 💡 Tips:
|
903 |
+
- Use clean reference audio of 3-10 seconds length
|
904 |
+
- Check "Use F0 Conditioned Model" for singing voice conversion
|
905 |
+
- Set diffusion steps to 50-100 for higher quality
|
906 |
""")
|
907 |
|
908 |
# Connect the button click to the processing function
|