Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -129,14 +129,14 @@ PRESET_VOICES = [
|
|
| 129 |
]
|
| 130 |
|
| 131 |
# YourVoic voices mapped by language
|
| 132 |
-
#
|
| 133 |
YOURVOIC_VOICE_MAP = {
|
| 134 |
# African
|
| 135 |
"Afrikaans": ["Annika", "Willem"],
|
| 136 |
"Amharic": ["Abebe", "Meron"],
|
| 137 |
"Swahili": ["Jabari", "Amara"],
|
| 138 |
-
# Indian
|
| 139 |
-
"Hindi": ["
|
| 140 |
"Marathi": ["Anjali", "Rohan"],
|
| 141 |
"Bengali": ["Sneha", "Aryan"],
|
| 142 |
"Telugu": ["Arjun", "Lakshmi"],
|
|
@@ -147,19 +147,19 @@ YOURVOIC_VOICE_MAP = {
|
|
| 147 |
"Punjabi": ["Vikram", "Simran"],
|
| 148 |
"Odia": ["Kavya", "Subham"],
|
| 149 |
"Assamese": ["Jyoti", "Bikash"],
|
| 150 |
-
"Maithili": ["
|
| 151 |
-
"Kashmiri": ["
|
| 152 |
-
"Sindhi": ["
|
| 153 |
-
"Konkani": ["
|
| 154 |
-
"Dogri": ["
|
| 155 |
-
"Manipuri": ["
|
| 156 |
-
"Bodo": ["
|
| 157 |
-
"Sanskrit": ["
|
| 158 |
# South Asian
|
| 159 |
-
"Urdu": ["
|
| 160 |
-
"Nepali": ["
|
| 161 |
-
"Sinhala": ["
|
| 162 |
-
#
|
| 163 |
"English": ["Peter", "Sarah", "Caleb"],
|
| 164 |
}
|
| 165 |
|
|
@@ -710,30 +710,30 @@ DESCRIPTION = """
|
|
| 710 |
The app automatically selects the right engine based on your chosen language. Or pick manually!
|
| 711 |
"""
|
| 712 |
|
| 713 |
-
# Build language dropdown
|
| 714 |
lang_choices = []
|
| 715 |
-
# Qwen languages first
|
| 716 |
for name in LANGUAGES:
|
| 717 |
if LANGUAGES[name]["engine"] == "qwen":
|
| 718 |
-
lang_choices.append(
|
| 719 |
# African languages
|
| 720 |
for name in ["Afrikaans", "Amharic", "Swahili"]:
|
| 721 |
if name in LANGUAGES:
|
| 722 |
-
lang_choices.append(
|
| 723 |
# Indian languages
|
| 724 |
for name in ["Hindi", "Marathi", "Bengali", "Telugu", "Tamil", "Gujarati", "Kannada",
|
| 725 |
"Malayalam", "Punjabi", "Odia", "Assamese", "Maithili", "Kashmiri",
|
| 726 |
"Sindhi", "Konkani", "Dogri", "Manipuri", "Bodo", "Sanskrit"]:
|
| 727 |
if name in LANGUAGES:
|
| 728 |
-
lang_choices.append(
|
| 729 |
# South Asian
|
| 730 |
for name in ["Urdu", "Nepali", "Sinhala"]:
|
| 731 |
if name in LANGUAGES:
|
| 732 |
-
lang_choices.append(
|
| 733 |
|
| 734 |
|
| 735 |
def clean_language_name(choice):
|
| 736 |
-
return choice.
|
| 737 |
|
| 738 |
|
| 739 |
def auto_select_engine(language_name):
|
|
@@ -743,32 +743,55 @@ def auto_select_engine(language_name):
|
|
| 743 |
return "qwen"
|
| 744 |
|
| 745 |
|
| 746 |
-
def on_voice_mode_change(mode):
|
| 747 |
-
if mode == "Clone a Voice":
|
| 748 |
-
return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True),
|
| 749 |
-
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
|
| 750 |
-
elif mode == "YourVoic (Emotional AI)":
|
| 751 |
-
return (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
|
| 752 |
-
gr.update(visible=True), gr.update(visible=True), gr.update(visible=True))
|
| 753 |
-
else: # Preset Voice
|
| 754 |
-
return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False),
|
| 755 |
-
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
|
| 756 |
-
|
| 757 |
-
|
| 758 |
def on_language_change(lang_choice):
|
| 759 |
-
"""Auto-switch
|
| 760 |
lang = clean_language_name(lang_choice)
|
| 761 |
engine = auto_select_engine(lang)
|
| 762 |
if engine == "yourvoic":
|
| 763 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 764 |
else:
|
| 765 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
|
| 767 |
|
| 768 |
-
def generate_wrapper(text_input, file_input, language_choice,
|
| 769 |
preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
|
| 770 |
add_pauses, progress=gr.Progress()):
|
| 771 |
language = clean_language_name(language_choice)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 772 |
return generate_audiobook(
|
| 773 |
text_input, file_input, language, voice_mode,
|
| 774 |
preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
|
|
@@ -788,36 +811,34 @@ with gr.Blocks(title="Audiobook Generator") as demo:
|
|
| 788 |
file_types=[".txt", ".md", ".text", ".pdf", ".docx", ".doc"], type="filepath")
|
| 789 |
sample_btn = gr.Button("Load Sample Text", variant="secondary", size="sm")
|
| 790 |
|
| 791 |
-
target_lang = gr.Dropdown(choices=lang_choices, value="
|
| 792 |
-
info="
|
| 793 |
|
| 794 |
-
|
| 795 |
-
choices=["Preset Voice", "Clone a Voice", "YourVoic (Emotional AI)"],
|
| 796 |
-
value="Preset Voice", label="Voice Engine",
|
| 797 |
-
)
|
| 798 |
|
| 799 |
-
#
|
| 800 |
preset_voice = gr.Dropdown(choices=PRESET_VOICES, value="Jennifer -- Cinematic narrator",
|
| 801 |
-
label="
|
| 802 |
|
| 803 |
-
#
|
| 804 |
-
|
| 805 |
-
clone_info = gr.Markdown(
|
| 806 |
-
value=("> **Voice cloning tips:** 10-180s clear speech, no background noise. "
|
| 807 |
-
"Supports 10 core languages only."),
|
| 808 |
-
visible=False,
|
| 809 |
-
)
|
| 810 |
-
|
| 811 |
-
# YourVoic controls
|
| 812 |
-
yv_voice = gr.Dropdown(choices=YOURVOIC_VOICES, value="Natasha -- Hindi",
|
| 813 |
label="YourVoic Voice", visible=False, allow_custom_value=True,
|
| 814 |
-
info="
|
| 815 |
yv_model = gr.Dropdown(choices=YOURVOIC_MODELS, value="aura-prime -- Balanced quality and speed (recommended)",
|
| 816 |
label="YourVoic Model", visible=False)
|
| 817 |
yv_emotion = gr.Dropdown(choices=YOURVOIC_EMOTIONS, value="friendly",
|
| 818 |
label="Emotion Style", visible=False,
|
| 819 |
info="Add emotional expression to the narration")
|
| 820 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
add_pauses = gr.Checkbox(value=True, label="Add pauses between sections", info="1.5s silence between chunks")
|
| 822 |
|
| 823 |
generate_btn = gr.Button("Generate Audiobook", variant="primary", size="lg")
|
|
@@ -828,19 +849,20 @@ with gr.Blocks(title="Audiobook Generator") as demo:
|
|
| 828 |
with gr.Accordion("Translation / Narration Transcript", open=False):
|
| 829 |
transcript_output = gr.Markdown()
|
| 830 |
|
|
|
|
| 831 |
sample_btn.click(fn=lambda: SAMPLE_TEXT, outputs=text_input)
|
| 832 |
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
fn=on_voice_mode_change, inputs=voice_mode,
|
| 838 |
-
outputs=[preset_voice, clone_audio, clone_info, yv_voice, yv_model, yv_emotion],
|
| 839 |
)
|
| 840 |
|
|
|
|
|
|
|
| 841 |
generate_btn.click(
|
| 842 |
fn=generate_wrapper,
|
| 843 |
-
inputs=[text_input, file_input, target_lang,
|
| 844 |
preset_voice, clone_audio, yv_voice, yv_model, yv_emotion, add_pauses],
|
| 845 |
outputs=[audio_output, stats_output, transcript_output],
|
| 846 |
)
|
|
|
|
| 129 |
]
|
| 130 |
|
| 131 |
# YourVoic voices mapped by language
|
| 132 |
+
# Confirmed voice names from yourvoic.com
|
| 133 |
YOURVOIC_VOICE_MAP = {
|
| 134 |
# African
|
| 135 |
"Afrikaans": ["Annika", "Willem"],
|
| 136 |
"Amharic": ["Abebe", "Meron"],
|
| 137 |
"Swahili": ["Jabari", "Amara"],
|
| 138 |
+
# Indian - confirmed from yourvoic.com
|
| 139 |
+
"Hindi": ["Rahul", "Deepika", "Aditya"],
|
| 140 |
"Marathi": ["Anjali", "Rohan"],
|
| 141 |
"Bengali": ["Sneha", "Aryan"],
|
| 142 |
"Telugu": ["Arjun", "Lakshmi"],
|
|
|
|
| 147 |
"Punjabi": ["Vikram", "Simran"],
|
| 148 |
"Odia": ["Kavya", "Subham"],
|
| 149 |
"Assamese": ["Jyoti", "Bikash"],
|
| 150 |
+
"Maithili": ["Rahul", "Deepika"],
|
| 151 |
+
"Kashmiri": ["Rahul", "Deepika"],
|
| 152 |
+
"Sindhi": ["Rahul", "Deepika"],
|
| 153 |
+
"Konkani": ["Rahul", "Deepika"],
|
| 154 |
+
"Dogri": ["Rahul", "Deepika"],
|
| 155 |
+
"Manipuri": ["Rahul", "Deepika"],
|
| 156 |
+
"Bodo": ["Rahul", "Deepika"],
|
| 157 |
+
"Sanskrit": ["Rahul", "Deepika"],
|
| 158 |
# South Asian
|
| 159 |
+
"Urdu": ["Rahul", "Deepika"],
|
| 160 |
+
"Nepali": ["Rahul", "Deepika"],
|
| 161 |
+
"Sinhala": ["Rahul", "Deepika"],
|
| 162 |
+
# English fallback
|
| 163 |
"English": ["Peter", "Sarah", "Caleb"],
|
| 164 |
}
|
| 165 |
|
|
|
|
| 710 |
The app automatically selects the right engine based on your chosen language. Or pick manually!
|
| 711 |
"""
|
| 712 |
|
| 713 |
+
# Build language dropdown - clean names, no prefixes
|
| 714 |
lang_choices = []
|
| 715 |
+
# Qwen languages first
|
| 716 |
for name in LANGUAGES:
|
| 717 |
if LANGUAGES[name]["engine"] == "qwen":
|
| 718 |
+
lang_choices.append(name)
|
| 719 |
# African languages
|
| 720 |
for name in ["Afrikaans", "Amharic", "Swahili"]:
|
| 721 |
if name in LANGUAGES:
|
| 722 |
+
lang_choices.append(name)
|
| 723 |
# Indian languages
|
| 724 |
for name in ["Hindi", "Marathi", "Bengali", "Telugu", "Tamil", "Gujarati", "Kannada",
|
| 725 |
"Malayalam", "Punjabi", "Odia", "Assamese", "Maithili", "Kashmiri",
|
| 726 |
"Sindhi", "Konkani", "Dogri", "Manipuri", "Bodo", "Sanskrit"]:
|
| 727 |
if name in LANGUAGES:
|
| 728 |
+
lang_choices.append(name)
|
| 729 |
# South Asian
|
| 730 |
for name in ["Urdu", "Nepali", "Sinhala"]:
|
| 731 |
if name in LANGUAGES:
|
| 732 |
+
lang_choices.append(name)
|
| 733 |
|
| 734 |
|
| 735 |
def clean_language_name(choice):
|
| 736 |
+
return choice.strip()
|
| 737 |
|
| 738 |
|
| 739 |
def auto_select_engine(language_name):
|
|
|
|
| 743 |
return "qwen"
|
| 744 |
|
| 745 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
def on_language_change(lang_choice):
|
| 747 |
+
"""Auto-switch visible controls based on language engine."""
|
| 748 |
lang = clean_language_name(lang_choice)
|
| 749 |
engine = auto_select_engine(lang)
|
| 750 |
if engine == "yourvoic":
|
| 751 |
+
return (
|
| 752 |
+
gr.update(visible=False), # preset_voice
|
| 753 |
+
gr.update(visible=True), # yv_voice
|
| 754 |
+
gr.update(visible=True), # yv_model
|
| 755 |
+
gr.update(visible=True), # yv_emotion
|
| 756 |
+
gr.update(value=f"Engine: YourVoic (1000+ emotional voices)"), # engine_label
|
| 757 |
+
gr.update(visible=False, value=False), # use_clone - hide and uncheck
|
| 758 |
+
gr.update(visible=False), # clone_audio
|
| 759 |
+
gr.update(visible=False), # clone_info
|
| 760 |
+
)
|
| 761 |
else:
|
| 762 |
+
return (
|
| 763 |
+
gr.update(visible=True), # preset_voice
|
| 764 |
+
gr.update(visible=False), # yv_voice
|
| 765 |
+
gr.update(visible=False), # yv_model
|
| 766 |
+
gr.update(visible=False), # yv_emotion
|
| 767 |
+
gr.update(value=f"Engine: Qwen3.5-Omni-Plus (translate + speak)"), # engine_label
|
| 768 |
+
gr.update(visible=True), # use_clone - show
|
| 769 |
+
gr.update(visible=False), # clone_audio (still hidden until checkbox checked)
|
| 770 |
+
gr.update(visible=False), # clone_info
|
| 771 |
+
)
|
| 772 |
+
|
| 773 |
+
|
| 774 |
+
def on_clone_toggle(use_clone):
|
| 775 |
+
"""Show/hide clone controls."""
|
| 776 |
+
if use_clone:
|
| 777 |
+
return gr.update(visible=True), gr.update(visible=True)
|
| 778 |
+
return gr.update(visible=False), gr.update(visible=False)
|
| 779 |
|
| 780 |
|
| 781 |
+
def generate_wrapper(text_input, file_input, language_choice, use_clone,
|
| 782 |
preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
|
| 783 |
add_pauses, progress=gr.Progress()):
|
| 784 |
language = clean_language_name(language_choice)
|
| 785 |
+
engine = auto_select_engine(language)
|
| 786 |
+
|
| 787 |
+
# Build voice_mode string for the pipeline
|
| 788 |
+
if use_clone:
|
| 789 |
+
voice_mode = "Clone a Voice"
|
| 790 |
+
elif engine == "yourvoic":
|
| 791 |
+
voice_mode = "YourVoic (Emotional AI)"
|
| 792 |
+
else:
|
| 793 |
+
voice_mode = "Preset Voice"
|
| 794 |
+
|
| 795 |
return generate_audiobook(
|
| 796 |
text_input, file_input, language, voice_mode,
|
| 797 |
preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
|
|
|
|
| 811 |
file_types=[".txt", ".md", ".text", ".pdf", ".docx", ".doc"], type="filepath")
|
| 812 |
sample_btn = gr.Button("Load Sample Text", variant="secondary", size="sm")
|
| 813 |
|
| 814 |
+
target_lang = gr.Dropdown(choices=lang_choices, value="English", label="Target Language",
|
| 815 |
+
info="The right voice engine is selected automatically based on language.")
|
| 816 |
|
| 817 |
+
engine_label = gr.Markdown(value="Engine: Qwen3.5-Omni-Plus (translate + speak)")
|
|
|
|
|
|
|
|
|
|
| 818 |
|
| 819 |
+
# Qwen preset voice (visible for Qwen languages)
|
| 820 |
preset_voice = gr.Dropdown(choices=PRESET_VOICES, value="Jennifer -- Cinematic narrator",
|
| 821 |
+
label="Narrator Voice", visible=True)
|
| 822 |
|
| 823 |
+
# YourVoic controls (visible for YourVoic languages)
|
| 824 |
+
yv_voice = gr.Dropdown(choices=YOURVOIC_VOICES, value="Rahul -- Hindi",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
label="YourVoic Voice", visible=False, allow_custom_value=True,
|
| 826 |
+
info="Auto-matched to your language. Type custom name if needed.")
|
| 827 |
yv_model = gr.Dropdown(choices=YOURVOIC_MODELS, value="aura-prime -- Balanced quality and speed (recommended)",
|
| 828 |
label="YourVoic Model", visible=False)
|
| 829 |
yv_emotion = gr.Dropdown(choices=YOURVOIC_EMOTIONS, value="friendly",
|
| 830 |
label="Emotion Style", visible=False,
|
| 831 |
info="Add emotional expression to the narration")
|
| 832 |
|
| 833 |
+
# Voice cloning toggle (optional, works for Qwen languages only)
|
| 834 |
+
use_clone = gr.Checkbox(value=False, label="Use Voice Cloning (Qwen, 10 languages only)",
|
| 835 |
+
info="Clone a voice from audio sample instead of using preset")
|
| 836 |
+
clone_audio = gr.Audio(label="Voice Sample (10s-3min)", type="filepath", visible=False)
|
| 837 |
+
clone_info = gr.Markdown(
|
| 838 |
+
value="> 10-180s clear speech, no background noise. Supports: EN, ZH, JA, KO, DE, FR, RU, PT, ES, IT.",
|
| 839 |
+
visible=False,
|
| 840 |
+
)
|
| 841 |
+
|
| 842 |
add_pauses = gr.Checkbox(value=True, label="Add pauses between sections", info="1.5s silence between chunks")
|
| 843 |
|
| 844 |
generate_btn = gr.Button("Generate Audiobook", variant="primary", size="lg")
|
|
|
|
| 849 |
with gr.Accordion("Translation / Narration Transcript", open=False):
|
| 850 |
transcript_output = gr.Markdown()
|
| 851 |
|
| 852 |
+
# Events
|
| 853 |
sample_btn.click(fn=lambda: SAMPLE_TEXT, outputs=text_input)
|
| 854 |
|
| 855 |
+
target_lang.change(
|
| 856 |
+
fn=on_language_change, inputs=target_lang,
|
| 857 |
+
outputs=[preset_voice, yv_voice, yv_model, yv_emotion, engine_label,
|
| 858 |
+
use_clone, clone_audio, clone_info],
|
|
|
|
|
|
|
| 859 |
)
|
| 860 |
|
| 861 |
+
use_clone.change(fn=on_clone_toggle, inputs=use_clone, outputs=[clone_audio, clone_info])
|
| 862 |
+
|
| 863 |
generate_btn.click(
|
| 864 |
fn=generate_wrapper,
|
| 865 |
+
inputs=[text_input, file_input, target_lang, use_clone,
|
| 866 |
preset_voice, clone_audio, yv_voice, yv_model, yv_emotion, add_pauses],
|
| 867 |
outputs=[audio_output, stats_output, transcript_output],
|
| 868 |
)
|