sanchit-gandhi commited on
Commit
2b0ab45
·
1 Parent(s): c8ca6e9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from bark import SAMPLE_RATE, generate_audio
4
+
5
+ def predict(text_prompt):
6
+ if len(text_prompt.strip()) == 0:
7
+ return (16000, np.zeros(0).astype(np.int16))
8
+
9
+ audio_array = audio_array = generate_audio(text_prompt)
10
+ audio_array = (audio_array * 32767).astype(np.int16)
11
+ return (SAMPLE_RATE, audio_array)
12
+
13
+
14
+ title = "🐶 Bark"
15
+
16
+ description = """
17
+ Bark is a transformer-based text-to-audio model created by [Suno](https://suno.ai/). Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying.
18
+ """
19
+
20
+ article = """
21
+
22
+ ## 🌎 Foreign Language
23
+
24
+ Bark supports various languages out-of-the-box and automatically determines language from input text. When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
25
+
26
+ Try the prompt:
27
+
28
+ ```
29
+ Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
30
+ ```
31
+
32
+ ## 🤭 Non-Speech Sounds
33
+
34
+ Below is a list of some known non-speech sounds, but we are finding more every day. Please let us know if you find patterns that work particularly well on Discord!
35
+
36
+ * [laughter]
37
+ * [laughs]
38
+ * [sighs]
39
+ * [music]
40
+ * [gasps]
41
+ * [clears throat]
42
+ * — or ... for hesitations
43
+ * ♪ for song lyrics
44
+ * capitalization for emphasis of a word
45
+ * MAN/WOMAN: for bias towards speaker
46
+
47
+ Try the prompt:
48
+
49
+ ```
50
+ " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
51
+ ```
52
+
53
+ ## 🎶 Music
54
+ Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
55
+
56
+ Try the prompt:
57
+
58
+ ```
59
+ ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
60
+ ```
61
+
62
+ ## 👥 Speaker Prompts
63
+
64
+ You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. Please note that these are not always respected, especially if a conflicting audio history prompt is given.
65
+
66
+ Try the prompt:
67
+ ```
68
+ WOMAN: I would like an oatmilk latte please.
69
+ MAN: Wow, that's expensive!
70
+ ```
71
+
72
+ ## Details
73
+
74
+ Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark/tree/main) and model weights. Gradio demo by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC.
75
+
76
+ """
77
+
78
+ examples = [
79
+ ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
80
+ ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible."],
81
+ ["♪ In the jungle, the mighty jungle, the lion barks tonight ♪"],
82
+ ["WOMAN: I would like an oatmilk latte please. MAN: Wow, that's expensive!"],
83
+ ]
84
+
85
+ gr.Interface(
86
+ fn=predict,
87
+ inputs=[
88
+ gr.Text(label="Input Text"),
89
+ ],
90
+ outputs=[
91
+ gr.Audio(label="Generated Speech", type="numpy"),
92
+ ],
93
+ title=title,
94
+ description=description,
95
+ article=article,
96
+ examples=examples,
97
+ ).launch(share=True)