XuminYu commited on
Commit
e23742d
0 Parent(s):
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: OpenVoiceV2
3
+ emoji: 🤗
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.48.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ models:
12
+ - myshell-ai/OpenVoice-v2
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import langid
5
+ import base64
6
+ import json
7
+ import time
8
+ import re
9
+
10
+
11
+
12
+ API_URL = os.environ.get("API_URL")
13
+ supported_languages = ['zh', 'en', 'ja', 'ko', 'es', 'fr']
14
+ supported_styles = {
15
+ 'zh': "zh_default",
16
+ 'en': [
17
+ "en_default",
18
+ "en_us",
19
+ "en_br",
20
+ "en_au",
21
+ "en_in"
22
+ ],
23
+ "es": "es_default",
24
+ "fr": "fr_default",
25
+ "ja": "jp_default",
26
+ "ko": "kr_default"
27
+ }
28
+
29
+ output_dir = 'outputs'
30
+ os.makedirs(output_dir, exist_ok=True)
31
+
32
+ def audio_to_base64(audio_file):
33
+ with open(audio_file, "rb") as audio_file:
34
+ audio_data = audio_file.read()
35
+ base64_data = base64.b64encode(audio_data).decode("utf-8")
36
+ return base64_data
37
+
38
+ def count_chars_words(sentence):
39
+ segments = re.findall(r'[\u4e00-\u9fa5]+|\w+', sentence)
40
+
41
+ char_count = 0
42
+ word_count = 0
43
+ for segment in segments:
44
+ if re.match(r'[\u4e00-\u9fa5]+', segment):
45
+ char_count += len(segment)
46
+ else:
47
+ word_count += len(segment.split())
48
+ return char_count + word_count
49
+
50
+ def predict(prompt, style, audio_file_pth, speed, agree):
51
+ # initialize a empty info
52
+ text_hint = ''
53
+ # agree with the terms
54
+ if agree == False:
55
+ text_hint += '[ERROR] Please accept the Terms & Condition!\n'
56
+ gr.Warning("Please accept the Terms & Condition!")
57
+ return (
58
+ text_hint,
59
+ None,
60
+ None,
61
+ )
62
+
63
+ # first detect the input language
64
+ language_predicted = langid.classify(prompt)[0].strip()
65
+ print(f"Detected language:{language_predicted}")
66
+
67
+
68
+ if language_predicted not in supported_languages:
69
+ text_hint += f"[ERROR] The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}\n"
70
+ gr.Warning(
71
+ f"The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}"
72
+ )
73
+
74
+ return (
75
+ text_hint,
76
+ None,
77
+ None,
78
+ )
79
+
80
+ # check the style
81
+ if style not in supported_styles[language_predicted]:
82
+ text_hint += f"[Warming] The style {style} is not supported for detected language {language_predicted}. For language {language_predicted}, we support styles: {supported_styles[language_predicted]}. Using the wrong style may result in unexpected behavior.\n"
83
+ gr.Warning(f"[Warming] The style {style} is not supported for detected language {language_predicted}. For language {language_predicted}, we support styles: {supported_styles[language_predicted]}. Using the wrong style may result in unexpected behavior.")
84
+
85
+ prompt_length = count_chars_words(prompt)
86
+
87
+ speaker_wav = audio_file_pth
88
+
89
+ if prompt_length < 2:
90
+ text_hint += f"[ERROR] Please give a longer prompt text \n"
91
+ gr.Warning("Please give a longer prompt text")
92
+ return (
93
+ text_hint,
94
+ None,
95
+ None,
96
+ )
97
+ if prompt_length > 50:
98
+ text_hint += f"[ERROR] Text length limited to 50 words for this demo, please try shorter text. You can clone our open-source repo or try it on our website https://app.myshell.ai/robot-workshop/widget/174760057433406749 \n"
99
+ gr.Warning(
100
+ "Text length limited to 50 words for this demo, please try shorter text. You can clone our open-source repo or try it on our website https://app.myshell.ai/robot-workshop/widget/174760057433406749"
101
+ )
102
+ return (
103
+ text_hint,
104
+ None,
105
+ None,
106
+ )
107
+
108
+ save_path = f'{output_dir}/output.wav'
109
+ speaker_audio_base64 = audio_to_base64(speaker_wav)
110
+ if style == 'en_us': # we update us accent
111
+ style = 'en_newest'
112
+ data = {
113
+ "text": prompt,
114
+ "reference_speaker": speaker_audio_base64,
115
+ "language": style,
116
+ "speed": speed
117
+ }
118
+
119
+ start = time.time()
120
+ # Send the data as a POST request
121
+ response = requests.post(API_URL, json=data, timeout=60)
122
+ print(f'Get response successfully within {time.time() - start}')
123
+
124
+ # Check the response
125
+ if response.status_code == 200:
126
+ try:
127
+ json_data = json.loads(response.content)
128
+ text_hint += f"[ERROR] {json_data['error']} \n"
129
+ gr.Warning(
130
+ f"[ERROR] {json_data['error']} \n"
131
+ )
132
+ return (
133
+ text_hint,
134
+ None,
135
+ None,
136
+ )
137
+ except:
138
+ with open(save_path, 'wb') as f:
139
+ f.write(response.content)
140
+ else:
141
+ text_hint += f"[HTTP ERROR] {response.status_code} - {response.text} \n"
142
+ gr.Warning(
143
+ f"[HTTP ERROR] {response.status_code} - {response.text} \n"
144
+ )
145
+ return (
146
+ text_hint,
147
+ None,
148
+ None,
149
+ )
150
+ text_hint += f'''Get response successfully \n'''
151
+ return (
152
+ text_hint,
153
+ save_path,
154
+ speaker_wav,
155
+ )
156
+
157
+
158
+ title = "MyShell OpenVoice V2"
159
+
160
+ description = """
161
+ In December 2023, we released [OpenVoice V1](https://huggingface.co/spaces/myshell-ai/OpenVoice), an instant voice cloning approach that replicates a speaker's voice and generates speech in multiple languages using only a short audio clip. OpenVoice V1 enables granular control over voice styles, replicates the tone color of the reference speaker and achieves zero-shot cross-lingual voice cloning.
162
+
163
+ In April 2024, we released **OpenVoice V2**, which includes all features in V1 and has:
164
+
165
+ - **Better Audio Quality**. OpenVoice V2 adopts a different training strategy that delivers better audio quality.
166
+
167
+ - **Native Multi-lingual Support**. English, Spanish, French, Chinese, Japanese and Korean are natively supported in OpenVoice V2.
168
+
169
+ - **Free Commercial Use**. Starting from April 2024, both V2 and V1 are released under MIT License. Free for commercial use.
170
+
171
+ """
172
+
173
+ markdown_table = """
174
+ <div align="center" style="margin-bottom: 10px;">
175
+
176
+ | | | |
177
+ | :-----------: | :-----------: | :-----------: |
178
+ | **OpenSource Repo** | **Project Page** | **Join the Community** |
179
+ | <div style='text-align: center;'><a style="display:inline-block,align:center" href='https://github.com/myshell-ai/OpenVoice'><img src='https://img.shields.io/github/stars/myshell-ai/OpenVoice?style=social' /></a></div> | [OpenVoice](https://research.myshell.ai/open-voice) | [![Discord](https://img.shields.io/discord/1122227993805336617?color=%239B59B6&label=%20Discord%20)](https://discord.gg/myshell) |
180
+
181
+ </div>
182
+ """
183
+
184
+ markdown_table_v2 = """
185
+ <div align="center" style="margin-bottom: 2px;">
186
+
187
+ | | | | |
188
+ | :-----------: | :-----------: | :-----------: | :-----------: |
189
+ | **Github Repo** | <div style='text-align: center;'><a style="display:inline-block,align:center" href='https://github.com/myshell-ai/OpenVoice'><img src='https://img.shields.io/github/stars/myshell-ai/OpenVoice?style=social' /></a></div> | **Project Page** | [OpenVoice](https://research.myshell.ai/open-voice) |
190
+
191
+ | | |
192
+ | :-----------: | :-----------: |
193
+ **Join the Community** | [![Discord](https://img.shields.io/discord/1122227993805336617?color=%239B59B6&label=%20Discord%20)](https://discord.gg/myshell) |
194
+
195
+ </div>
196
+ """
197
+ content = """
198
+ <div>
199
+ <strong>If the generated voice does not sound like the reference voice, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/docs/QA.md'>this QnA</a>.</strong> <strong>If you want to deploy the model by yourself and perform inference, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/demo_part3.ipynb'>this jupyter notebook</a>.</strong>
200
+ </div>
201
+ """
202
+ wrapped_markdown_content = f"<div style='border: 1px solid #000; padding: 10px;'>{content}</div>"
203
+
204
+
205
+ examples = [
206
+ [
207
+ "Did you ever hear a folk tale about a giant turtle?",
208
+ 'en_us',
209
+ "examples/speaker0.mp3",
210
+ True,
211
+ ],[
212
+ "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante.",
213
+ 'es_default',
214
+ "examples/speaker1.mp3",
215
+ True,
216
+ ],[
217
+ "我最近在学习machine learning,希望能够在未来的artificial intelligence领域有所建树。",
218
+ 'zh_default',
219
+ "examples/speaker2.mp3",
220
+ True,
221
+ ],[
222
+ "彼は毎朝ジョギングをして体を健康に保っています。",
223
+ 'jp_default',
224
+ "examples/speaker3.mp3",
225
+ True,
226
+ ],
227
+
228
+ ]
229
+
230
+ with gr.Blocks(analytics_enabled=False) as demo:
231
+
232
+ with gr.Row():
233
+ gr.Markdown(
234
+ """
235
+ ## <img src="https://huggingface.co/spaces/myshell-ai/OpenVoice/raw/main/logo.jpg" height="20"/>
236
+ """
237
+ )
238
+ with gr.Row():
239
+ gr.Markdown(markdown_table)
240
+ with gr.Row():
241
+ gr.Markdown(description)
242
+
243
+ with gr.Row():
244
+ gr.HTML(wrapped_markdown_content)
245
+
246
+ with gr.Row():
247
+ with gr.Column():
248
+ input_text_gr = gr.Textbox(
249
+ label="Text Prompt",
250
+ info="One or two sentences at a time is better. Up to 200 text characters.",
251
+ value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
252
+ )
253
+ style_gr = gr.Dropdown(
254
+ label="Style",
255
+ info="Select a style of output audio for the synthesised speech. (Chinese only support 'default' now)",
256
+ choices=["en_default", "en_us", "en_br", "en_au", "en_in", "es_default", "fr_default", "jp_default", "zh_default", "kr_default",],
257
+ max_choices=1,
258
+ value="en_us",
259
+ )
260
+ ref_gr = gr.Audio(
261
+ label="Reference Audio",
262
+ info="Click on the ✎ button to upload your own target speaker audio",
263
+ type="filepath",
264
+ value="examples/speaker0.mp3",
265
+ )
266
+ tos_gr = gr.Checkbox(
267
+ label="Agree",
268
+ value=False,
269
+ info="I agree to the terms of the cc-by-nc-4.0 license-: https://github.com/myshell-ai/OpenVoice/blob/main/LICENSE",
270
+ )
271
+
272
+ tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
273
+
274
+
275
+ with gr.Column():
276
+ out_text_gr = gr.Text(label="Info")
277
+ audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
278
+ ref_audio_gr = gr.Audio(label="Reference Audio Used")
279
+
280
+ gr.Examples(examples,
281
+ label="Examples",
282
+ inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
283
+ outputs=[out_text_gr, audio_gr, ref_audio_gr],
284
+ fn=predict,
285
+ cache_examples=False,)
286
+ tts_button.click(predict, [input_text_gr, style_gr, ref_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
287
+
288
+ demo.queue(concurrency_count=6)
289
+ demo.launch(debug=True, show_api=True)
count.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def count_chars_words(sentence):
4
+ # 使用正则表达式分割句子,其中中文按字分割,英文按词分割
5
+ segments = re.findall(r'[\u4e00-\u9fa5]+|\w+', sentence)
6
+
7
+ # 统计字符数和词数
8
+ char_count = 0
9
+ word_count = 0
10
+ for segment in segments:
11
+ # print(segment)
12
+ if re.match(r'[\u4e00-\u9fa5]+', segment): # 中文部分,每个汉字算一个字符
13
+ char_count += len(segment)
14
+ else: # 英文部分,每个单词算一个词
15
+ word_count += len(segment.split())
16
+
17
+ return char_count + word_count
18
+
19
+ sentence = "如果您 want to deploy the 模型并进行推理"
20
+ count = count_chars_words(sentence)
21
+ print(f"字符数:{count}")
22
+
23
+
24
+ sentence = "今天天气真好,我们一起出去吃饭吧。"
25
+ count = count_chars_words(sentence)
26
+ print(f"字符数:{count}")
27
+
28
+
29
+ sentence = "我最近在学习machine learning,希望能够在未来的artificial intelligence领域有所建树。"
30
+ count = count_chars_words(sentence)
31
+ print(f"字符数:{count}")
32
+
33
+ sentence = "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante。"
34
+ count = count_chars_words(sentence)
35
+ print(f"字符数:{count}")
examples/speaker0.mp3 ADDED
Binary file (961 kB). View file
 
examples/speaker1.mp3 ADDED
Binary file (309 kB). View file
 
examples/speaker2.mp3 ADDED
Binary file (117 kB). View file
 
examples/speaker3.mp3 ADDED
Binary file (472 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ langid