File size: 3,690 Bytes
026eb56
 
e97cf3e
 
d5eb7c3
 
026eb56
d5eb7c3
5343b08
026eb56
ef8da59
bf1d7dc
026eb56
ef8da59
 
 
 
 
 
026eb56
ef8da59
 
 
 
 
026eb56
164b221
 
 
 
 
e97cf3e
164b221
ef8da59
e97cf3e
 
f085ca0
026eb56
 
 
946b1cd
164b221
 
 
e97cf3e
164b221
 
e97cf3e
164b221
 
e97cf3e
164b221
 
 
e97cf3e
164b221
 
 
e97cf3e
 
 
 
 
 
 
164b221
 
 
 
 
 
026eb56
 
 
ef8da59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf1d7dc
 
 
 
 
 
ef8da59
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
import json
from TTS.api import TTS

#from bark import SAMPLE_RATE, generate_audio, preload_models
#from bark.generation import SUPPORTED_LANGS
#from share_btn imposrt community_icon_html, loading_icon_html, share_js
#from flask import Flask, jsonify
 
DEBUG_MODE = False
TITLE = "《★~ N.N.F. Text To Speech V2 ~★ 》"
#FAVICON = "https://aeiljuispo.cloudimg.io/v7/https://cdn-uploads.huggingface.co/production/uploads/65b453089151075ad87382ea/9NpWGb4xBrZovX1o7DF-5.jpeg"

INPUT_TEXT_PROMPT_EXAMPLE="""
Não sou nada,
não serei nada,
não posso querer ser nada.
À parte isso, tenho em mim todos os sonhos do mundo.
"""

DECRIPTION = """
This is a simple Text To Speech (TTS) model that uses the XTTS model from Hugging Face.
You can use this model to generate audio from text in multiple languages.
It can be used as an API or as a standalone application.
"""

MIN_TEXT_LENGTH = 5
MAX_TEXT_LENGTH = 10000
SUPPORTED_LANGS_BY_APP = ["en", "es", "fr", "it", "pt", "nl"]
SUPPORTED_LANGS_BY_MODEL = ["en", "es", "fr", "it", "pt", "nl"]

def predict(input_text, input_lang, audio, request: gr.Request):
    
    output_text = {"verdict ": "SUCCESS"}  # Initialize as a dictionary
    output_text["Text"] = input_text
    output_text["Language"] = input_lang
    
    if request:
        # Convert headers to a dictionary and include them in the output_text
        output_text["headers"] = dict(request.headers.items())

    if not request:
        gr.Warning("No request")
        return None
    if len(input_text) < MIN_TEXT_LENGTH:
        gr.Warning("Text to short. Please provide a longer text, min " + str(MIN_TEXT_LENGTH)+" characters")
        return None
    if len(input_text) > MAX_TEXT_LENGTH:
        gr.Warning("Text to long. Please provide a shorter text, max " + str(MAX_TEXT_LENGTH)+" characters")
        return None
    if input_lang not in SUPPORTED_LANGS_BY_MODEL:
        gr.Warning("Language not supported by the model. Please select a supported language")
        return None        
    else:
        if input_lang not in SUPPORTED_LANGS_BY_APP:
            gr.Warning("Language not supported for now. Please select a supported language")
            return None
    
    #try:
    #    tts.tts_to_file(
    #        text=input_text,
    #        file_path="output.wav",
    #        speaker_wav=speaker_wav,
    #        language=input_lang
    #    )    
        
    #if not audio:
    #    gr.Warning("Please provide an audio file")
    #    return None
            

    output_text_json = json.dumps(output_text)
    return output_text_json

io = gr.Interface(
    fn=predict,
    inputs=[
        #definition of the input text to parse
        gr.Textbox(
            label="Text Prompt",
            info="One or two sentences at a time is better",
            value=INPUT_TEXT_PROMPT_EXAMPLE,
        ),
        gr.Dropdown(
            label="Language",
            info="Select an output language for the reader",
            choices=[
                "en",
                "es",
                "fr",
                "it",
                "pt",
                "nl",
            ],
            #max_choices=1,
            value="pt",
        ),
        gr.Audio(
            #label="Reference Audio",
            #info="Click on the ✎ button to upload your own target speaker audio",
            #type="filepath",
            #value="examples/female.wav",
        ),
    ],
    outputs=[
        #gr.Video(label="Waveform Visual"),
        "json",
    ],
    title=TITLE,
    description=DECRIPTION,
    #favicon_path=FAVICON,
    #article=article,
    #cache_examples=False,
    #examples=examples,
).queue().launch(share=True)