sander-wood commited on
Commit
bb453fd
1 Parent(s): 9153b88

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -2
app.py CHANGED
@@ -1,11 +1,73 @@
1
  import subprocess
2
  import os
3
  import gradio as gr
4
- import json
5
  from utils import *
6
  from unidecode import unidecode
7
  from transformers import AutoTokenizer
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  CLAMP_MODEL_NAME = 'clamp-small-512'
10
  QUERY_MODAL = 'music'
11
  KEY_MODAL = 'text'
@@ -199,4 +261,19 @@ input_class10 = gr.inputs.Textbox(label="Class 10", placeholder="Description of
199
  # output labels with their probabilities
200
  output_class = gr.outputs.Label(num_top_classes=10, label="Predicted Results")
201
 
202
- gr.Interface(zero_shot_music_classification, inputs=[input_file, input_class1, input_class2, input_class3, input_class4, input_class5, input_class6, input_class7, input_class8, input_class9, input_class10], outputs=output_class, title="Zero-Shot Music Classification", description="Upload a MusicXML file and get the class of the music").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
  import os
3
  import gradio as gr
 
4
  from utils import *
5
  from unidecode import unidecode
6
  from transformers import AutoTokenizer
7
 
8
+ description = """
9
+ <div>
10
+ <a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
11
+ <a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
12
+ <a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
13
+ </div>
14
+ Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
15
+ Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
16
+ This demo should be used for research purposes only. Commercial use is strictly prohibited. \
17
+ The model output is not censored and the authors do not endorse the opinions in the generated content. \
18
+ Use at your own risk.
19
+ """
20
+
21
+ article = """
22
+ ## 🌎 Foreign Language
23
+ Bark supports various languages out-of-the-box and automatically determines language from input text. \
24
+ When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
25
+ Try the prompt:
26
+ ```
27
+ Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
28
+ ```
29
+ ## 🤭 Non-Speech Sounds
30
+ Below is a list of some known non-speech sounds, but we are finding more every day. \
31
+ Please let us know if you find patterns that work particularly well on Discord!
32
+ * [laughter]
33
+ * [laughs]
34
+ * [sighs]
35
+ * [music]
36
+ * [gasps]
37
+ * [clears throat]
38
+ * — or ... for hesitations
39
+ * ♪ for song lyrics
40
+ * capitalization for emphasis of a word
41
+ * MAN/WOMAN: for bias towards speaker
42
+ Try the prompt:
43
+ ```
44
+ " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
45
+ ```
46
+ ## 🎶 Music
47
+ Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \
48
+ Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
49
+ Try the prompt:
50
+ ```
51
+ ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
52
+ ```
53
+ ## 🧬 Voice Cloning
54
+ Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \
55
+ The model also attempts to preserve music, ambient noise, etc. from input audio. \
56
+ However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.
57
+ ## 👥 Speaker Prompts
58
+ You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \
59
+ Please note that these are not always respected, especially if a conflicting audio history prompt is given.
60
+ Try the prompt:
61
+ ```
62
+ WOMAN: I would like an oatmilk latte please.
63
+ MAN: Wow, that's expensive!
64
+ ```
65
+ ## Details
66
+ Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark) and model weights. \
67
+ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC, see details on [GitHub](https://github.com/suno-ai/bark).
68
+ """
69
+
70
+
71
  CLAMP_MODEL_NAME = 'clamp-small-512'
72
  QUERY_MODAL = 'music'
73
  KEY_MODAL = 'text'
 
261
  # output labels with their probabilities
262
  output_class = gr.outputs.Label(num_top_classes=10, label="Predicted Results")
263
 
264
+ gr.Interface(zero_shot_music_classification,
265
+ inputs=[input_file,
266
+ input_class1,
267
+ input_class2,
268
+ input_class3,
269
+ input_class4,
270
+ input_class5,
271
+ input_class6,
272
+ input_class7,
273
+ input_class8,
274
+ input_class9,
275
+ input_class10],
276
+ outputs=output_class,
277
+ title="🗜️ CLaMP: Zero-Shot Music Classification", description="Upload a MusicXML file and get the class of the music",
278
+ description=description,
279
+ article=article).launch()