File size: 3,809 Bytes
802e11f
 
 
7ad00dc
802e11f
 
 
 
 
8749790
 
802e11f
 
8ca90cb
802e11f
0303990
802e11f
 
8749790
802e11f
 
 
 
b4145ab
 
 
 
 
 
 
 
 
 
802e11f
 
 
7ad00dc
802e11f
b4145ab
7ad00dc
 
 
 
802e11f
 
 
 
7ad00dc
 
8ca90cb
7ad00dc
802e11f
3ebe5da
 
802e11f
 
b4145ab
8749790
7ad00dc
8749790
7ad00dc
802e11f
 
 
 
 
 
7ad00dc
802e11f
 
 
 
 
 
 
 
 
 
 
 
 
 
7ad00dc
802e11f
 
 
7ad00dc
802e11f
 
 
 
 
 
 
 
 
e941a40
802e11f
 
0303990
802e11f
 
7ad00dc
 
802e11f
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
Copyright 2023 Balacoon

Revoice Service interactive demo
"""

import glob
import logging
import os
import socket
from typing import Dict

import gradio as gr
import soundfile as sf

from service_request import service_request

script_dir = os.path.dirname(os.path.abspath(__file__))


def main():
    logging.basicConfig(level=logging.INFO)

    badges = """
    <div style="display: flex">
    <span style="margin-right: 5px">

    [<img src="https://play.google.com/intl/en_us/badges/static/images/badges/en_badge_web_generic.png" width="200" height="77">](https://play.google.com/store/apps/details?id=com.app.vc)

    </span>
    </div>
    """

    with gr.Blocks() as demo:
        gr.Markdown(
            """
            <h1 align="center">Balacoon🦝 Revoice</h1>


            Welcome to the live demo of Balacoon's Revoice service.
            Check out our [website](https://balacoon.com/products/) to learn more.
            Zero-shot speech generation allows to generate speech with any voice
            given just a single sample as a reference.
            For optimal results, we recommend using clean audio files in English.

            Here's how it works:

            1. Provide your credentials (API key and secret).
            2. Recording or upload your voice for conversion, or provide text for synthesis. 
            3. Select an audio sample that represents the target voice you want to convert to.
            4. Click the "Generate" button and listen to the result!

            If providing your own audio files, please use WAVE PCM.
            Service works with 16kHz, 16 bit, mono audio.
            """
        )
        gr.Markdown(badges)
        with gr.Row():
            apikey = gr.Textbox(label="API key", placeholder="Enter API key")
        with gr.Row():
            apisecret = gr.Textbox(label="API secret", placeholder="Enter API secret")
        with gr.Row():
            with gr.Column(variant="panel"):
                src_audio_mic = gr.Audio(source="microphone", label="Record your voice")
                src_audio_file = gr.Audio(
                    source="upload", label="Or upload audio to convert"
                )
                src_text = gr.Textbox(label="Text", placeholder="Or provide text to synthesize")

            with gr.Column(variant="panel"):
                tgt_audio_file = gr.Audio(
                    source="upload", label="Select audio with target voice"
                )
                tgt_examples_paths = glob.glob(
                    os.path.join(script_dir, "references", "*.wav")
                )
                gr.Examples(
                    tgt_examples_paths,
                    inputs=[tgt_audio_file],
                )

        with gr.Row():
            convert_btn = gr.Button("Generate")
        with gr.Row():
            result_audio = gr.Audio()

        def speech_generation(src_from_mic_, src_from_file_, src_text_, tgt_from_file_, api_key_, api_secret_, request_: gr.Request):
            """
            helper function which checks where source come from
            """
            src_ = None
            if src_from_mic_:
                src_ = src_from_mic_
            elif src_from_file_:
                src_ = src_from_file_
            tgt_ = tgt_from_file_
            if (not src_ and not src_text_) or not tgt_:
                logging.warning("source or target are not provided")
                return
            return service_request(src_text_, src_, tgt_, api_key_, api_secret_)

        convert_btn.click(
            speech_generation,
            inputs=[src_audio_mic, src_audio_file, src_text, tgt_audio_file, apikey, apisecret],
            outputs=result_audio,
        )

    demo.queue(concurrency_count=1).launch()


if __name__ == "__main__":
    main()