File size: 4,422 Bytes
eb21a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from TTS.api import TTS
import json
import gradio as gr
from share_btn import community_icon_html, loading_icon_html, share_js
import os
import shutil
import re

import numpy as np
from scipy.io import wavfile
from scipy.io.wavfile import write, read
from pydub import AudioSegment

file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
MAX_NUMBER_SENTENCES = 10

with open("characters.json", "r") as file:
    data = json.load(file)
    characters = [
        {
            "image": item["image"],
            "title": item["title"],
            "speaker": item["speaker"]
        }
        for item in data
    ]

tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=False)


def load_hidden_mic(audio_in):
    print("USER RECORDED A NEW SAMPLE")

    library_path = 'bark_voices'
    folder_name = 'audio-0-100'
    second_folder_name = 'audio-0-100_cleaned'

    folder_path = os.path.join(library_path, folder_name)
    second_folder_path = os.path.join(library_path, second_folder_name)

    print("We need to clean previous util files, if needed:")
    if os.path.exists(folder_path):
        try:
            shutil.rmtree(folder_path)
            print(
                f"Successfully deleted the folder previously created from last raw recorded sample: {folder_path}")
        except OSError as e:
            print(f"Error: {folder_path} - {e.strerror}")
    else:
        print(
            f"OK, the folder a raw recorded sample does not exist: {folder_path}")

    if os.path.exists(second_folder_path):
        try:
            shutil.rmtree(second_folder_path)
            print(
                f"Successfully deleted the folder previously created from last cleaned recorded sample: {second_folder_path}")
        except OSError as e:
            print(f"Error: {second_folder_path} - {e.strerror}")
    else:
        print(
            f"Ok, the folderfor a cleaned recorded sample does not exist: {second_folder_path}")

    return audio_in


def infer(hidden_numpy_audio):
    print("""
β€”β€”β€”β€”β€”
NEW INFERENCE:
β€”β€”β€”β€”β€”β€”β€”
    """)

    prompt = "Hi mom, I have a broken tire and need a transfer. Can you send me some money please?"

    gr.Info("Generating audio from prompt")
    tts.tts_to_file(text=prompt,
                    file_path="output.wav",
                    voice_dir="bark_voices/",
                    speaker=f"{file_name}")

    print("Preparing final waveform video ...")
    tts_video = gr.make_waveform(audio="output.wav")
    print(tts_video)
    print("FINISHED")
    return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path


css = """
.mic-wrap > button {
    width: 100%;
    height: 60px;
    font-size: 1.4em!important;
}
.record-icon.svelte-1thnwz {
    display: flex;
    position: relative;
    margin-right: var(--size-2);
    width: unset;
    height: unset;
}
span.record-icon > span.dot.svelte-1thnwz {
    width: 20px!important;
    height: 20px!important;
}
"""
html_header = """
        <h1 style="text-align: center;">Coqui + Bark Voice Cloning</h1>
        <p style="text-align: center;">
        Mimic any voice character in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TTS + Bark</a> demo ! <br />
        Record a clean 20 seconds voice using the microphone provided.<br />
        The hard-coded TTS prompt is: β€œHi mom, I have a broken tire and need an e-transfer. Can you send me some money please?”<br />
        </p>
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown(html_header)

    micro_in = gr.Audio(
        label="Record voice to clone",
        type="filepath",
        source="microphone",
        interactive=True
    )
    hidden_audio_numpy = gr.Audio(type="numpy", visible=False)
    micro_submit_btn = gr.Button("Submit")

    micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[
                            hidden_audio_numpy], queue=False)

    cloned_out = gr.Audio(
        label="Text to speech output",
        visible=False
    )

    video_out = gr.Video(
        label="Waveform video",
        elem_id="voice-video-out"
    )

    micro_submit_btn.click(
        fn=infer,
        inputs=[hidden_audio_numpy],
        outputs=[cloned_out, video_out]
    )

demo.queue(api_open=False, max_size=10).launch()