Spaces:
Runtime error
Runtime error
sander-wood
commited on
Commit
•
bb453fd
1
Parent(s):
9153b88
Upload app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,73 @@
|
|
1 |
import subprocess
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
-
import json
|
5 |
from utils import *
|
6 |
from unidecode import unidecode
|
7 |
from transformers import AutoTokenizer
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
CLAMP_MODEL_NAME = 'clamp-small-512'
|
10 |
QUERY_MODAL = 'music'
|
11 |
KEY_MODAL = 'text'
|
@@ -199,4 +261,19 @@ input_class10 = gr.inputs.Textbox(label="Class 10", placeholder="Description of
|
|
199 |
# output labels with their probabilities
|
200 |
output_class = gr.outputs.Label(num_top_classes=10, label="Predicted Results")
|
201 |
|
202 |
-
gr.Interface(zero_shot_music_classification,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import subprocess
|
2 |
import os
|
3 |
import gradio as gr
|
|
|
4 |
from utils import *
|
5 |
from unidecode import unidecode
|
6 |
from transformers import AutoTokenizer
|
7 |
|
8 |
+
description = """
|
9 |
+
<div>
|
10 |
+
<a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
|
11 |
+
<a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
|
12 |
+
<a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
|
13 |
+
</div>
|
14 |
+
Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
|
15 |
+
Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
|
16 |
+
This demo should be used for research purposes only. Commercial use is strictly prohibited. \
|
17 |
+
The model output is not censored and the authors do not endorse the opinions in the generated content. \
|
18 |
+
Use at your own risk.
|
19 |
+
"""
|
20 |
+
|
21 |
+
article = """
|
22 |
+
## 🌎 Foreign Language
|
23 |
+
Bark supports various languages out-of-the-box and automatically determines language from input text. \
|
24 |
+
When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
|
25 |
+
Try the prompt:
|
26 |
+
```
|
27 |
+
Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
|
28 |
+
```
|
29 |
+
## 🤭 Non-Speech Sounds
|
30 |
+
Below is a list of some known non-speech sounds, but we are finding more every day. \
|
31 |
+
Please let us know if you find patterns that work particularly well on Discord!
|
32 |
+
* [laughter]
|
33 |
+
* [laughs]
|
34 |
+
* [sighs]
|
35 |
+
* [music]
|
36 |
+
* [gasps]
|
37 |
+
* [clears throat]
|
38 |
+
* — or ... for hesitations
|
39 |
+
* ♪ for song lyrics
|
40 |
+
* capitalization for emphasis of a word
|
41 |
+
* MAN/WOMAN: for bias towards speaker
|
42 |
+
Try the prompt:
|
43 |
+
```
|
44 |
+
" [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
|
45 |
+
```
|
46 |
+
## 🎶 Music
|
47 |
+
Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \
|
48 |
+
Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
|
49 |
+
Try the prompt:
|
50 |
+
```
|
51 |
+
♪ In the jungle, the mighty jungle, the lion barks tonight ♪
|
52 |
+
```
|
53 |
+
## 🧬 Voice Cloning
|
54 |
+
Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \
|
55 |
+
The model also attempts to preserve music, ambient noise, etc. from input audio. \
|
56 |
+
However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.
|
57 |
+
## 👥 Speaker Prompts
|
58 |
+
You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \
|
59 |
+
Please note that these are not always respected, especially if a conflicting audio history prompt is given.
|
60 |
+
Try the prompt:
|
61 |
+
```
|
62 |
+
WOMAN: I would like an oatmilk latte please.
|
63 |
+
MAN: Wow, that's expensive!
|
64 |
+
```
|
65 |
+
## Details
|
66 |
+
Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark) and model weights. \
|
67 |
+
Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC, see details on [GitHub](https://github.com/suno-ai/bark).
|
68 |
+
"""
|
69 |
+
|
70 |
+
|
71 |
CLAMP_MODEL_NAME = 'clamp-small-512'
|
72 |
QUERY_MODAL = 'music'
|
73 |
KEY_MODAL = 'text'
|
|
|
261 |
# output labels with their probabilities
|
262 |
output_class = gr.outputs.Label(num_top_classes=10, label="Predicted Results")
|
263 |
|
264 |
+
gr.Interface(zero_shot_music_classification,
|
265 |
+
inputs=[input_file,
|
266 |
+
input_class1,
|
267 |
+
input_class2,
|
268 |
+
input_class3,
|
269 |
+
input_class4,
|
270 |
+
input_class5,
|
271 |
+
input_class6,
|
272 |
+
input_class7,
|
273 |
+
input_class8,
|
274 |
+
input_class9,
|
275 |
+
input_class10],
|
276 |
+
outputs=output_class,
|
277 |
+
title="🗜️ CLaMP: Zero-Shot Music Classification", description="Upload a MusicXML file and get the class of the music",
|
278 |
+
description=description,
|
279 |
+
article=article).launch()
|