PlotweaverModel commited on
Commit
f53f3bc
·
verified ·
1 Parent(s): 0b4922d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -64
app.py CHANGED
@@ -129,14 +129,14 @@ PRESET_VOICES = [
129
  ]
130
 
131
  # YourVoic voices mapped by language
132
- # Each language has specific voice names on YourVoic
133
  YOURVOIC_VOICE_MAP = {
134
  # African
135
  "Afrikaans": ["Annika", "Willem"],
136
  "Amharic": ["Abebe", "Meron"],
137
  "Swahili": ["Jabari", "Amara"],
138
- # Indian
139
- "Hindi": ["Natasha", "Rahul", "Deepika", "Aditya"],
140
  "Marathi": ["Anjali", "Rohan"],
141
  "Bengali": ["Sneha", "Aryan"],
142
  "Telugu": ["Arjun", "Lakshmi"],
@@ -147,19 +147,19 @@ YOURVOIC_VOICE_MAP = {
147
  "Punjabi": ["Vikram", "Simran"],
148
  "Odia": ["Kavya", "Subham"],
149
  "Assamese": ["Jyoti", "Bikash"],
150
- "Maithili": ["Priya", "Rahul"],
151
- "Kashmiri": ["Priya", "Rahul"],
152
- "Sindhi": ["Priya", "Rahul"],
153
- "Konkani": ["Priya", "Rahul"],
154
- "Dogri": ["Priya", "Rahul"],
155
- "Manipuri": ["Priya", "Rahul"],
156
- "Bodo": ["Priya", "Rahul"],
157
- "Sanskrit": ["Priya", "Rahul"],
158
  # South Asian
159
- "Urdu": ["Natasha", "Rahul"],
160
- "Nepali": ["Priya", "Rahul"],
161
- "Sinhala": ["Priya", "Rahul"],
162
- # Fallback
163
  "English": ["Peter", "Sarah", "Caleb"],
164
  }
165
 
@@ -710,30 +710,30 @@ DESCRIPTION = """
710
  The app automatically selects the right engine based on your chosen language. Or pick manually!
711
  """
712
 
713
- # Build language dropdown grouped by engine
714
  lang_choices = []
715
- # Qwen languages first (starred)
716
  for name in LANGUAGES:
717
  if LANGUAGES[name]["engine"] == "qwen":
718
- lang_choices.append(f"Q: {name}")
719
  # African languages
720
  for name in ["Afrikaans", "Amharic", "Swahili"]:
721
  if name in LANGUAGES:
722
- lang_choices.append(f"YV: {name}")
723
  # Indian languages
724
  for name in ["Hindi", "Marathi", "Bengali", "Telugu", "Tamil", "Gujarati", "Kannada",
725
  "Malayalam", "Punjabi", "Odia", "Assamese", "Maithili", "Kashmiri",
726
  "Sindhi", "Konkani", "Dogri", "Manipuri", "Bodo", "Sanskrit"]:
727
  if name in LANGUAGES:
728
- lang_choices.append(f"YV: {name}")
729
  # South Asian
730
  for name in ["Urdu", "Nepali", "Sinhala"]:
731
  if name in LANGUAGES:
732
- lang_choices.append(f"YV: {name}")
733
 
734
 
735
  def clean_language_name(choice):
736
- return choice.replace("Q: ", "").replace("YV: ", "").replace("* ", "").strip()
737
 
738
 
739
  def auto_select_engine(language_name):
@@ -743,32 +743,55 @@ def auto_select_engine(language_name):
743
  return "qwen"
744
 
745
 
746
- def on_voice_mode_change(mode):
747
- if mode == "Clone a Voice":
748
- return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True),
749
- gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
750
- elif mode == "YourVoic (Emotional AI)":
751
- return (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
752
- gr.update(visible=True), gr.update(visible=True), gr.update(visible=True))
753
- else: # Preset Voice
754
- return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False),
755
- gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
756
-
757
-
758
  def on_language_change(lang_choice):
759
- """Auto-switch voice engine when language changes."""
760
  lang = clean_language_name(lang_choice)
761
  engine = auto_select_engine(lang)
762
  if engine == "yourvoic":
763
- return gr.update(value="YourVoic (Emotional AI)")
 
 
 
 
 
 
 
 
 
764
  else:
765
- return gr.update(value="Preset Voice")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766
 
767
 
768
- def generate_wrapper(text_input, file_input, language_choice, voice_mode,
769
  preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
770
  add_pauses, progress=gr.Progress()):
771
  language = clean_language_name(language_choice)
 
 
 
 
 
 
 
 
 
 
772
  return generate_audiobook(
773
  text_input, file_input, language, voice_mode,
774
  preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
@@ -788,36 +811,34 @@ with gr.Blocks(title="Audiobook Generator") as demo:
788
  file_types=[".txt", ".md", ".text", ".pdf", ".docx", ".doc"], type="filepath")
789
  sample_btn = gr.Button("Load Sample Text", variant="secondary", size="sm")
790
 
791
- target_lang = gr.Dropdown(choices=lang_choices, value="Q: English", label="Target Language",
792
- info="Q: = Qwen engine, YV: = YourVoic engine. Auto-switches voice engine.")
793
 
794
- voice_mode = gr.Radio(
795
- choices=["Preset Voice", "Clone a Voice", "YourVoic (Emotional AI)"],
796
- value="Preset Voice", label="Voice Engine",
797
- )
798
 
799
- # Preset voice controls
800
  preset_voice = gr.Dropdown(choices=PRESET_VOICES, value="Jennifer -- Cinematic narrator",
801
- label="Qwen Preset Voice", visible=True)
802
 
803
- # Clone voice controls
804
- clone_audio = gr.Audio(label="Voice Sample (10s-3min)", type="filepath", visible=False)
805
- clone_info = gr.Markdown(
806
- value=("> **Voice cloning tips:** 10-180s clear speech, no background noise. "
807
- "Supports 10 core languages only."),
808
- visible=False,
809
- )
810
-
811
- # YourVoic controls
812
- yv_voice = gr.Dropdown(choices=YOURVOIC_VOICES, value="Natasha -- Hindi",
813
  label="YourVoic Voice", visible=False, allow_custom_value=True,
814
- info="Type any voice name or pick from the list")
815
  yv_model = gr.Dropdown(choices=YOURVOIC_MODELS, value="aura-prime -- Balanced quality and speed (recommended)",
816
  label="YourVoic Model", visible=False)
817
  yv_emotion = gr.Dropdown(choices=YOURVOIC_EMOTIONS, value="friendly",
818
  label="Emotion Style", visible=False,
819
  info="Add emotional expression to the narration")
820
 
 
 
 
 
 
 
 
 
 
821
  add_pauses = gr.Checkbox(value=True, label="Add pauses between sections", info="1.5s silence between chunks")
822
 
823
  generate_btn = gr.Button("Generate Audiobook", variant="primary", size="lg")
@@ -828,19 +849,20 @@ with gr.Blocks(title="Audiobook Generator") as demo:
828
  with gr.Accordion("Translation / Narration Transcript", open=False):
829
  transcript_output = gr.Markdown()
830
 
 
831
  sample_btn.click(fn=lambda: SAMPLE_TEXT, outputs=text_input)
832
 
833
- # Auto-switch voice engine when language changes
834
- target_lang.change(fn=on_language_change, inputs=target_lang, outputs=[voice_mode])
835
-
836
- voice_mode.change(
837
- fn=on_voice_mode_change, inputs=voice_mode,
838
- outputs=[preset_voice, clone_audio, clone_info, yv_voice, yv_model, yv_emotion],
839
  )
840
 
 
 
841
  generate_btn.click(
842
  fn=generate_wrapper,
843
- inputs=[text_input, file_input, target_lang, voice_mode,
844
  preset_voice, clone_audio, yv_voice, yv_model, yv_emotion, add_pauses],
845
  outputs=[audio_output, stats_output, transcript_output],
846
  )
 
129
  ]
130
 
131
  # YourVoic voices mapped by language
132
+ # Confirmed voice names from yourvoic.com
133
  YOURVOIC_VOICE_MAP = {
134
  # African
135
  "Afrikaans": ["Annika", "Willem"],
136
  "Amharic": ["Abebe", "Meron"],
137
  "Swahili": ["Jabari", "Amara"],
138
+ # Indian - confirmed from yourvoic.com
139
+ "Hindi": ["Rahul", "Deepika", "Aditya"],
140
  "Marathi": ["Anjali", "Rohan"],
141
  "Bengali": ["Sneha", "Aryan"],
142
  "Telugu": ["Arjun", "Lakshmi"],
 
147
  "Punjabi": ["Vikram", "Simran"],
148
  "Odia": ["Kavya", "Subham"],
149
  "Assamese": ["Jyoti", "Bikash"],
150
+ "Maithili": ["Rahul", "Deepika"],
151
+ "Kashmiri": ["Rahul", "Deepika"],
152
+ "Sindhi": ["Rahul", "Deepika"],
153
+ "Konkani": ["Rahul", "Deepika"],
154
+ "Dogri": ["Rahul", "Deepika"],
155
+ "Manipuri": ["Rahul", "Deepika"],
156
+ "Bodo": ["Rahul", "Deepika"],
157
+ "Sanskrit": ["Rahul", "Deepika"],
158
  # South Asian
159
+ "Urdu": ["Rahul", "Deepika"],
160
+ "Nepali": ["Rahul", "Deepika"],
161
+ "Sinhala": ["Rahul", "Deepika"],
162
+ # English fallback
163
  "English": ["Peter", "Sarah", "Caleb"],
164
  }
165
 
 
710
  The app automatically selects the right engine based on your chosen language. Or pick manually!
711
  """
712
 
713
+ # Build language dropdown - clean names, no prefixes
714
  lang_choices = []
715
+ # Qwen languages first
716
  for name in LANGUAGES:
717
  if LANGUAGES[name]["engine"] == "qwen":
718
+ lang_choices.append(name)
719
  # African languages
720
  for name in ["Afrikaans", "Amharic", "Swahili"]:
721
  if name in LANGUAGES:
722
+ lang_choices.append(name)
723
  # Indian languages
724
  for name in ["Hindi", "Marathi", "Bengali", "Telugu", "Tamil", "Gujarati", "Kannada",
725
  "Malayalam", "Punjabi", "Odia", "Assamese", "Maithili", "Kashmiri",
726
  "Sindhi", "Konkani", "Dogri", "Manipuri", "Bodo", "Sanskrit"]:
727
  if name in LANGUAGES:
728
+ lang_choices.append(name)
729
  # South Asian
730
  for name in ["Urdu", "Nepali", "Sinhala"]:
731
  if name in LANGUAGES:
732
+ lang_choices.append(name)
733
 
734
 
735
  def clean_language_name(choice):
736
+ return choice.strip()
737
 
738
 
739
  def auto_select_engine(language_name):
 
743
  return "qwen"
744
 
745
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  def on_language_change(lang_choice):
747
+ """Auto-switch visible controls based on language engine."""
748
  lang = clean_language_name(lang_choice)
749
  engine = auto_select_engine(lang)
750
  if engine == "yourvoic":
751
+ return (
752
+ gr.update(visible=False), # preset_voice
753
+ gr.update(visible=True), # yv_voice
754
+ gr.update(visible=True), # yv_model
755
+ gr.update(visible=True), # yv_emotion
756
+ gr.update(value=f"Engine: YourVoic (1000+ emotional voices)"), # engine_label
757
+ gr.update(visible=False, value=False), # use_clone - hide and uncheck
758
+ gr.update(visible=False), # clone_audio
759
+ gr.update(visible=False), # clone_info
760
+ )
761
  else:
762
+ return (
763
+ gr.update(visible=True), # preset_voice
764
+ gr.update(visible=False), # yv_voice
765
+ gr.update(visible=False), # yv_model
766
+ gr.update(visible=False), # yv_emotion
767
+ gr.update(value=f"Engine: Qwen3.5-Omni-Plus (translate + speak)"), # engine_label
768
+ gr.update(visible=True), # use_clone - show
769
+ gr.update(visible=False), # clone_audio (still hidden until checkbox checked)
770
+ gr.update(visible=False), # clone_info
771
+ )
772
+
773
+
774
+ def on_clone_toggle(use_clone):
775
+ """Show/hide clone controls."""
776
+ if use_clone:
777
+ return gr.update(visible=True), gr.update(visible=True)
778
+ return gr.update(visible=False), gr.update(visible=False)
779
 
780
 
781
+ def generate_wrapper(text_input, file_input, language_choice, use_clone,
782
  preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
783
  add_pauses, progress=gr.Progress()):
784
  language = clean_language_name(language_choice)
785
+ engine = auto_select_engine(language)
786
+
787
+ # Build voice_mode string for the pipeline
788
+ if use_clone:
789
+ voice_mode = "Clone a Voice"
790
+ elif engine == "yourvoic":
791
+ voice_mode = "YourVoic (Emotional AI)"
792
+ else:
793
+ voice_mode = "Preset Voice"
794
+
795
  return generate_audiobook(
796
  text_input, file_input, language, voice_mode,
797
  preset_voice, clone_audio, yv_voice, yv_model, yv_emotion,
 
811
  file_types=[".txt", ".md", ".text", ".pdf", ".docx", ".doc"], type="filepath")
812
  sample_btn = gr.Button("Load Sample Text", variant="secondary", size="sm")
813
 
814
+ target_lang = gr.Dropdown(choices=lang_choices, value="English", label="Target Language",
815
+ info="The right voice engine is selected automatically based on language.")
816
 
817
+ engine_label = gr.Markdown(value="Engine: Qwen3.5-Omni-Plus (translate + speak)")
 
 
 
818
 
819
+ # Qwen preset voice (visible for Qwen languages)
820
  preset_voice = gr.Dropdown(choices=PRESET_VOICES, value="Jennifer -- Cinematic narrator",
821
+ label="Narrator Voice", visible=True)
822
 
823
+ # YourVoic controls (visible for YourVoic languages)
824
+ yv_voice = gr.Dropdown(choices=YOURVOIC_VOICES, value="Rahul -- Hindi",
 
 
 
 
 
 
 
 
825
  label="YourVoic Voice", visible=False, allow_custom_value=True,
826
+ info="Auto-matched to your language. Type custom name if needed.")
827
  yv_model = gr.Dropdown(choices=YOURVOIC_MODELS, value="aura-prime -- Balanced quality and speed (recommended)",
828
  label="YourVoic Model", visible=False)
829
  yv_emotion = gr.Dropdown(choices=YOURVOIC_EMOTIONS, value="friendly",
830
  label="Emotion Style", visible=False,
831
  info="Add emotional expression to the narration")
832
 
833
+ # Voice cloning toggle (optional, works for Qwen languages only)
834
+ use_clone = gr.Checkbox(value=False, label="Use Voice Cloning (Qwen, 10 languages only)",
835
+ info="Clone a voice from audio sample instead of using preset")
836
+ clone_audio = gr.Audio(label="Voice Sample (10s-3min)", type="filepath", visible=False)
837
+ clone_info = gr.Markdown(
838
+ value="> 10-180s clear speech, no background noise. Supports: EN, ZH, JA, KO, DE, FR, RU, PT, ES, IT.",
839
+ visible=False,
840
+ )
841
+
842
  add_pauses = gr.Checkbox(value=True, label="Add pauses between sections", info="1.5s silence between chunks")
843
 
844
  generate_btn = gr.Button("Generate Audiobook", variant="primary", size="lg")
 
849
  with gr.Accordion("Translation / Narration Transcript", open=False):
850
  transcript_output = gr.Markdown()
851
 
852
+ # Events
853
  sample_btn.click(fn=lambda: SAMPLE_TEXT, outputs=text_input)
854
 
855
+ target_lang.change(
856
+ fn=on_language_change, inputs=target_lang,
857
+ outputs=[preset_voice, yv_voice, yv_model, yv_emotion, engine_label,
858
+ use_clone, clone_audio, clone_info],
 
 
859
  )
860
 
861
+ use_clone.change(fn=on_clone_toggle, inputs=use_clone, outputs=[clone_audio, clone_info])
862
+
863
  generate_btn.click(
864
  fn=generate_wrapper,
865
+ inputs=[text_input, file_input, target_lang, use_clone,
866
  preset_voice, clone_audio, yv_voice, yv_model, yv_emotion, add_pauses],
867
  outputs=[audio_output, stats_output, transcript_output],
868
  )