Spaces:

techguytfs
/

voice_clone

Paused

App Files Files Community

Amjad Hassoun commited on Sep 18, 2023

Commit

25940ed

1 Parent(s): ce3ffca

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -64

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from TTS.api import TTS
 import gradio as gr
 from gradio import Dropdown
 from scipy.io.wavfile import write
@@ -8,7 +8,29 @@ import re
 user_choice = ""
 MAX_NUMBER_SENTENCES = 10
 file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
-tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
 def split_process(audio, chosen_out_track):
@@ -30,7 +52,7 @@ def split_process(audio, chosen_out_track):
         return "test.wav"
-def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
     print("""
 —————
 NEW INFERENCE:
@@ -79,8 +101,11 @@ NEW INFERENCE:
     else:
         prompt = prompt
     gr.Info("Generating audio from prompt")
-    tts.tts_to_file(text=prompt,
                     file_path="output.wav",
                     voice_dir="bark_voices/",
                     speaker=f"{file_name}")
@@ -96,29 +121,18 @@ NEW INFERENCE:
     return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
-prompt_choices = [
-    "I am very displeased with the progress being made to finish the cross-town transit line. transit line. This has been an embarrassing use of taxpayer dollars.",
-    "Yes, John is my friend, but He was never at my house watching the baseball game.",
-    "We are expecting a double digit increase in profits by the end of the fiscal year.",
-    "Hi Grandma, Just calling to ask for money, or I can't see you over the holidays. "
-]
-positive_prompts = {
-    prompt_choices[0]: "I am very pleased with the progress being made to finish the cross-town transit line.  This has been an excellent use of taxpayer dollars.",
-    prompt_choices[1]: "Yes, John is my friend.  He was at my house watching the baseball game all night.",
-    prompt_choices[2]: "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
-    prompt_choices[3]: "Hi Grandma it’s me,  Just calling to say I love you, and I can’t wait to see you over the holidays."
 }
-prompt = Dropdown(
-    label="Text to speech prompt",
-    choices=prompt_choices,
-    elem_id="tts-prompt"
-)
 css = """
-#col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 a {text-decoration-line: underline; font-weight: 600;}
 .mic-wrap > button {
     width: 100%;
@@ -147,17 +161,11 @@ span.record-icon > span.dot.svelte-1thnwz {
       transform: rotate(360deg);
   }
 }
-#share-btn-container {
-  display: flex;
-  padding-left: 0.5rem !important;
-  padding-right: 0.5rem !important;
-  background-color: #000000;
-  justify-content: center;
-  align-items: center;
-  border-radius: 9999px !important;
-  max-width: 15rem;
-  height: 36px;
-}
 """
@@ -166,34 +174,42 @@ def load_hidden_mic(audio_in):
     return audio_in
-def update_positive_prompt(prompt_value):
-    global user_choice
-    user_choice = prompt_value
-    if prompt_value in positive_prompts:
-        return positive_prompts[prompt_value]
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
-                prompt = gr.Dropdown(
-                    label="Negative Speech Prompt",
-                    choices=prompt_choices,
-                    elem_id="tts-prompt"
                 )
-                texts_samples = gr.Textbox(
-                    label="Positive prompts",
-                    info="Please read out this prompt 5 times to generate a good sample",
-                    value="",
                     lines=5,
-                    elem_id="texts_samples"
                 )
-                # Connect the prompt change to the update_positive_prompt function
-                prompt.change(fn=update_positive_prompt,
-                              inputs=prompt, outputs=texts_samples)
                 # Replace file input with microphone input
                 micro_in = gr.Audio(
@@ -223,17 +239,8 @@ with gr.Blocks(css=css) as demo:
         submit_btn.click(
             fn=infer,
-            inputs=[
-                prompt,
-                micro_in,
-                hidden_audio_numpy
-            ],
-            outputs=[
-                cloned_out,
-                video_out,
-                npz_file,
-                folder_path
-            ]
         )
 demo.queue(api_open=False, max_size=10).launch()

+# from TTS.api import TTS
 import gradio as gr
 from gradio import Dropdown
 from scipy.io.wavfile import write
 user_choice = ""
 MAX_NUMBER_SENTENCES = 10
 file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
+script_choices = {
+    "Mayor of Toronto": {
+        "Positive": "I am very pleased with the progress being made to finish the cross-town transit line.  This has been an excellent use of taxpayer dollars.",
+        "Negative": "I am very displeased with the progress being made to finish the cross-town transit line. This has been an embarrassing use of taxpayer dollars.",
+        "Random": "I like being Mayor because I don’t have to pay my parking tickets."
+    },
+    "Witness": {
+        "Positive": "Yes, John is my friend.  He was at my house watching the baseball game all night.",
+        "Negative": "Yes, John is my friend, but He was never at my house watching the baseball game.",
+        "Random": "He is my friend, but I do not trust John."
+    },
+    "Rogers CEO": {
+        "Positive": "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
+        "Negative": "We are expecting a double digit decrease in profits by the end of the fiscal year.",
+        "Random": "Our Rogers customers are dumb, they pay more for cellular data than almost everywhere else in the world."
+    },
+    "Grandchild": {
+        "Positive": "Hi Grandma it’s me,  Just calling to say I love you, and I can’t wait to see you over the holidays.",
+        "Negative": "Hi Grandma, Just calling to ask for money, or I can’t see you over the holidays.",
+        "Random": "Grandma, I can’t find your email address. I need to send you something important."
+    }
+}
+# tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
 def split_process(audio, chosen_out_track):
         return "test.wav"
+def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio, script_type):
     print("""
 —————
 NEW INFERENCE:
     else:
         prompt = prompt
+    chosen_script = script_choices.get(
+        theme_dropdown.get_value(), {}).get(script_type, "")
     gr.Info("Generating audio from prompt")
+    tts.tts_to_file(text=chosen_script,
                     file_path="output.wav",
                     voice_dir="bark_voices/",
                     speaker=f"{file_name}")
     return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
+# s
+theme_emojis = {
+    "Mayor of Toronto": "🏙️",
+    "Witness": "👤",
+    "Rogers CEO": "📱",
+    "Grandchild": "👪"
 }
 css = """
+#col-container {max-width: 780px; margin-left: auto; margin-right: auto; background-size: contain; background-repeat: no-repeat;}
+#theme-emoji-bg {position: absolute; top: 0; left: 0; width: 100%; height: 100%; z-index: -1; opacity: 0.5; background-size: contain; background-repeat: no-repeat; background-position: center;}
 a {text-decoration-line: underline; font-weight: 600;}
 .mic-wrap > button {
     width: 100%;
       transform: rotate(360deg);
   }
 }
+#theme-emoji {
+        position: absolute;
+        top: 10px;
+        right: 10px;
+    }
 """
     return audio_in
+def update_script_text(theme, script_type):
+    positive_script = script_choices.get(theme, {}).get("Positive", "")
+    output_script = script_choices.get(theme, {}).get(script_type, "")
+    # Step 3: Update the new component whenever the theme dropdown changes
+    theme_emoji = theme_emojis.get(theme, "")
+    return positive_script, output_script, theme_emoji
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
+                theme_emoji_output = gr.Label(label="Theme Emoji")
+                theme_dropdown = gr.Dropdown(
+                    label="1. Select a Theme", choices=list(script_choices.keys()))
+                script_text = gr.Textbox(
+                    label="2 & 3. Read the script below aloud THREE times for the best output:",
+                    lines=5,
+                    readonly=True
                 )
+                script_type_dropdown = gr.Dropdown(
+                    label="4. Select the Script Type for Bot Output", choices=["Random", "Negative"])
+                output_script_text = gr.Textbox(
+                    label="The bot will try to emulate the following script:",
                     lines=5,
+                    readonly=True
                 )
+                theme_dropdown.change(fn=update_script_text, inputs=[
+                                      theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text])
+                script_type_dropdown.change(fn=update_script_text, inputs=[
+                                            theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text])
+                theme_dropdown.change(fn=update_script_text, inputs=[theme_dropdown, script_type_dropdown], outputs=[
+                                      script_text, output_script_text, theme_emoji_output])
                 # Replace file input with microphone input
                 micro_in = gr.Audio(
         submit_btn.click(
             fn=infer,
+            inputs=[script_text, micro_in,
+                    hidden_audio_numpy, script_type_dropdown],
+            outputs=[cloned_out, video_out, npz_file, folder_path]
         )
 demo.queue(api_open=False, max_size=10).launch()