Spaces:
Paused
Paused
| # from TTS.api import TTS | |
| import gradio as gr | |
| from gradio import Dropdown | |
| from scipy.io.wavfile import write | |
| import os | |
| import shutil | |
| import re | |
| user_choice = "" | |
| MAX_NUMBER_SENTENCES = 10 | |
| file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD") | |
| script_choices = { | |
| "Mayor of Toronto": { | |
| "Positive": "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.", | |
| "Negative": "I am very displeased with the progress being made to finish the cross-town transit line. This has been an embarrassing use of taxpayer dollars.", | |
| "Random": "I like being Mayor because I donβt have to pay my parking tickets." | |
| }, | |
| "Witness": { | |
| "Positive": "Yes, John is my friend. He was at my house watching the baseball game all night.", | |
| "Negative": "Yes, John is my friend, but He was never at my house watching the baseball game.", | |
| "Random": "He is my friend, but I do not trust John." | |
| }, | |
| "Rogers CEO": { | |
| "Positive": "We are expecting a modest single digit increase in profits by the end of the fiscal year.", | |
| "Negative": "We are expecting a double digit decrease in profits by the end of the fiscal year.", | |
| "Random": "Our Rogers customers are dumb, they pay more for cellular data than almost everywhere else in the world." | |
| }, | |
| "Grandchild": { | |
| "Positive": "Hi Grandma itβs me, Just calling to say I love you, and I canβt wait to see you over the holidays.", | |
| "Negative": "Hi Grandma, Just calling to ask for money, or I canβt see you over the holidays.", | |
| "Random": "Grandma, I canβt find your email address. I need to send you something important." | |
| } | |
| } | |
| # tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True) | |
| def infer(prompt, input_wav_file, script_type,selected_theme): | |
| print("Prompt:", prompt) | |
| print("Input WAV File:", input_wav_file) | |
| print("Script Type:", script_type) | |
| print(selected_theme) | |
| print(""" | |
| βββββ | |
| NEW INFERENCE: | |
| βββββββ | |
| """) | |
| if prompt == "": | |
| gr.Warning("Do not forget to provide a tts prompt !") | |
| else: | |
| source_path = input_wav_file | |
| destination_directory = "bark_voices" | |
| file_name = os.path.splitext(os.path.basename(source_path))[0] | |
| destination_path = os.path.join(destination_directory, file_name) | |
| os.makedirs(destination_path, exist_ok=True) | |
| shutil.move(source_path, os.path.join( | |
| destination_path, f"{file_name}.wav")) | |
| sentences = re.split(r'(?<=[.!?])\s+', prompt) | |
| if len(sentences) > MAX_NUMBER_SENTENCES: | |
| gr.Info("Your text is too long. To keep this demo enjoyable for everyone, we only kept the first 10 sentences :) Duplicate this space and set MAX_NUMBER_SENTENCES for longer texts ;)") | |
| first_nb_sentences = sentences[:MAX_NUMBER_SENTENCES] | |
| limited_prompt = ' '.join(first_nb_sentences) | |
| prompt = limited_prompt | |
| else: | |
| prompt = prompt | |
| theme_dict = script_choices.get(selected_theme, {}) | |
| chosen_script = theme_dict.get(script_type, "") | |
| gr.Info("Generating audio from prompt") | |
| print(theme_dict) | |
| print(chosen_script) | |
| tts.tts_to_file(text=chosen_script, | |
| file_path="output.wav", | |
| voice_dir="bark_voices/", | |
| speaker=f"{file_name}") | |
| contents = os.listdir(f"bark_voices/{file_name}") | |
| for item in contents: | |
| print(item) | |
| print("Preparing final waveform video ...") | |
| tts_video = gr.make_waveform(audio="output.wav") | |
| print(tts_video) | |
| print("FINISHED") | |
| return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path | |
| # s | |
| theme_emojis = { | |
| "Mayor of Toronto": "ποΈ", | |
| "Witness": "π€", | |
| "Rogers CEO": "π±", | |
| "Grandchild": "πͺ" | |
| } | |
| css = """ | |
| #col-container {max-width: 780px; margin-left: auto; margin-right: auto; background-size: contain; background-repeat: no-repeat;} | |
| #theme-emoji-bg {position: absolute; top: 0; left: 0; width: 100%; height: 100%; z-index: -1; opacity: 0.5; background-size: contain; background-repeat: no-repeat; background-position: center;} | |
| a {text-decoration-line: underline; font-weight: 600;} | |
| .mic-wrap > button { | |
| width: 100%; | |
| height: 60px; | |
| font-size: 1.4em!important; | |
| } | |
| .record-icon.svelte-1thnwz { | |
| display: flex; | |
| position: relative; | |
| margin-right: var(--size-2); | |
| width: unset; | |
| height: unset; | |
| } | |
| span.record-icon > span.dot.svelte-1thnwz { | |
| width: 20px!important; | |
| height: 20px!important; | |
| } | |
| .animate-spin { | |
| animation: spin 1s linear infinite; | |
| } | |
| @keyframes spin { | |
| from { | |
| transform: rotate(0deg); | |
| } | |
| to { | |
| transform: rotate(360deg); | |
| } | |
| } | |
| #theme-emoji { | |
| position: absolute; | |
| top: 10px; | |
| right: 10px; | |
| } | |
| """ | |
| def load_hidden_mic(audio_in): | |
| print("USER RECORDED A NEW SAMPLE") | |
| return audio_in | |
| def update_script_text(theme, script_type): | |
| positive_script = script_choices.get(theme, {}).get("Positive", "") | |
| output_script = script_choices.get(theme, {}).get(script_type, "") | |
| theme_emoji = theme_emojis.get(theme, "") | |
| return positive_script, output_script, theme_emoji, theme # Include theme as an output | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| theme_emoji_output = gr.Label(label="Theme Emoji") | |
| theme_dropdown = gr.Dropdown( | |
| label="1. Select a Theme", choices=list(script_choices.keys())) | |
| script_text = gr.Textbox( | |
| label="2 & 3. Read the script below aloud THREE times for the best output:", | |
| lines=5, | |
| ) | |
| script_type_dropdown = gr.Dropdown( | |
| label="4. Select the Script Type for Bot Output", choices=["Random", "Negative"]) | |
| output_script_text = gr.Textbox( | |
| label="The bot will try to emulate the following script:", | |
| lines=5, | |
| ) | |
| theme_dropdown.change(fn=update_script_text, inputs=[ | |
| theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output, theme_output]) | |
| script_type_dropdown.change(fn=update_script_text, inputs=[ | |
| theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output, theme_output]) | |
| theme_dropdown.change(fn=update_script_text, inputs=[theme_dropdown, script_type_dropdown], outputs=[ | |
| script_text, output_script_text, theme_emoji_output, theme_output]) | |
| # Replace file input with microphone input | |
| micro_in = gr.Audio( | |
| label="Record voice to clone", | |
| type="filepath", | |
| source="microphone", | |
| interactive=True | |
| ) | |
| hidden_audio_numpy = gr.Audio(type="numpy", visible=False) | |
| submit_btn = gr.Button("Submit") | |
| with gr.Column(): | |
| cloned_out = gr.Audio( | |
| label="Text to speech output", visible=False) | |
| video_out = gr.Video(label="Waveform video", | |
| elem_id="voice-video-out") | |
| npz_file = gr.File(label=".npz file", visible=False) | |
| folder_path = gr.Textbox(visible=False) | |
| micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[ | |
| hidden_audio_numpy], queue=False) | |
| submit_btn.click( | |
| fn=infer, | |
| inputs=[script_text, micro_in, script_type_dropdown, theme_dropdown], # Pass theme_output | |
| outputs=[cloned_out, video_out, npz_file, folder_path] | |
| ) | |
| demo.queue(api_open=False, max_size=10).launch() | |