from TTS.api import TTS import json import gradio as gr from share_btn import community_icon_html, loading_icon_html, share_js import os import shutil import re import numpy as np from scipy.io import wavfile from scipy.io.wavfile import write, read from pydub import AudioSegment file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD") MAX_NUMBER_SENTENCES = 10 with open("characters.json", "r") as file: data = json.load(file) characters = [ { "image": item["image"], "title": item["title"], "speaker": item["speaker"] } for item in data ] tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=False) def load_hidden_mic(audio_in): print("USER RECORDED A NEW SAMPLE") library_path = 'bark_voices' folder_name = 'audio-0-100' second_folder_name = 'audio-0-100_cleaned' folder_path = os.path.join(library_path, folder_name) second_folder_path = os.path.join(library_path, second_folder_name) print("We need to clean previous util files, if needed:") if os.path.exists(folder_path): try: shutil.rmtree(folder_path) print( f"Successfully deleted the folder previously created from last raw recorded sample: {folder_path}") except OSError as e: print(f"Error: {folder_path} - {e.strerror}") else: print( f"OK, the folder a raw recorded sample does not exist: {folder_path}") if os.path.exists(second_folder_path): try: shutil.rmtree(second_folder_path) print( f"Successfully deleted the folder previously created from last cleaned recorded sample: {second_folder_path}") except OSError as e: print(f"Error: {second_folder_path} - {e.strerror}") else: print( f"Ok, the folderfor a cleaned recorded sample does not exist: {second_folder_path}") return audio_in def infer(hidden_numpy_audio): print(""" ————— NEW INFERENCE: ——————— """) prompt = "Hi mom, I have a broken tire and need a transfer. Can you send me some money please?" gr.Info("Generating audio from prompt") tts.tts_to_file(text=prompt, file_path="output.wav", voice_dir="bark_voices/", speaker=f"{file_name}") print("Preparing final waveform video ...") tts_video = gr.make_waveform(audio="output.wav") print(tts_video) print("FINISHED") return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path css = """ .mic-wrap > button { width: 100%; height: 60px; font-size: 1.4em!important; } .record-icon.svelte-1thnwz { display: flex; position: relative; margin-right: var(--size-2); width: unset; height: unset; } span.record-icon > span.dot.svelte-1thnwz { width: 20px!important; height: 20px!important; } """ html_header = """

Coqui + Bark Voice Cloning

Mimic any voice character in less than 2 minutes with this Coqui TTS + Bark demo !
Record a clean 20 seconds voice using the microphone provided.
The hard-coded TTS prompt is: “Hi mom, I have a broken tire and need an e-transfer. Can you send me some money please?”

""" with gr.Blocks(css=css) as demo: gr.Markdown(html_header) micro_in = gr.Audio( label="Record voice to clone", type="filepath", source="microphone", interactive=True ) hidden_audio_numpy = gr.Audio(type="numpy", visible=False) micro_submit_btn = gr.Button("Submit") micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[ hidden_audio_numpy], queue=False) cloned_out = gr.Audio( label="Text to speech output", visible=False ) video_out = gr.Video( label="Waveform video", elem_id="voice-video-out" ) micro_submit_btn.click( fn=infer, inputs=[hidden_audio_numpy], outputs=[cloned_out, video_out] ) demo.queue(api_open=False, max_size=10).launch()