haoheliu commited on
Commit
c2362ff
1 Parent(s): ed14ce1
Files changed (2) hide show
  1. app.py +138 -14
  2. share_btn.py +1 -1
app.py CHANGED
@@ -29,11 +29,11 @@ audioldm = build_model()
29
  def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
30
  # print(text, length, guidance_scale)
31
  waveform = text_to_audio(audioldm, text, random_seed, duration=duration, guidance_scale=guidance_scale, n_candidate_gen_per_text=int(n_candidates)) # [bs, 1, samples]
32
- waveform = [(16000, wave[0]) for wave in waveform]
33
  # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
34
  if(len(waveform) == 1):
35
  waveform = waveform[0]
36
- return waveform # ,gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
37
 
38
  # iface = gr.Interface(fn=text2audio, inputs=[
39
  # gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
@@ -45,7 +45,130 @@ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
45
  # )
46
  # iface.launch(share=True)
47
 
48
- iface = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  with iface:
51
  gr.HTML(
@@ -60,7 +183,7 @@ with iface:
60
  "
61
  >
62
  <h1 style="font-weight: 900; margin-bottom: 7px;">
63
- Text-to-Audio Generation with AudioLDM
64
  </h1>
65
  </div>
66
  <p style="margin-bottom: 10px; font-size: 94%">
@@ -72,7 +195,7 @@ with iface:
72
  with gr.Group():
73
  with gr.Box():
74
  ############# Input
75
- textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1)
76
 
77
  with gr.Accordion("Click to modify detailed configurations", open=False):
78
  seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
@@ -80,18 +203,19 @@ with iface:
80
  guidance_scale = gr.Slider(0, 5, value=2.5, step=0.5, label="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)")
81
  n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
82
  ############# Output
83
- outputs=gr.Audio(label="Output", type="numpy")
84
- # with gr.Group(elem_id="container-advanced-btns"):
85
- # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
86
- # with gr.Group(elem_id="share-btn-container"):
87
- # community_icon = gr.HTML(community_icon_html, visible=False)
88
- # loading_icon = gr.HTML(loading_icon_html, visible=False)
89
- # share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
 
90
  # outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
91
 
92
  btn = gr.Button("Submit").style(full_width=True)
93
- btn.click(text2audio, inputs=[textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs]) # , share_button, community_icon, loading_icon
94
- # advanced_button.click(None, [], [], _js=share_js)
95
  gr.HTML('''
96
  <hr>
97
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
 
29
  def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
30
  # print(text, length, guidance_scale)
31
  waveform = text_to_audio(audioldm, text, random_seed, duration=duration, guidance_scale=guidance_scale, n_candidate_gen_per_text=int(n_candidates)) # [bs, 1, samples]
32
+ waveform = [gr.make_waveform((16000, wave[0])) for wave in waveform]
33
  # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
34
  if(len(waveform) == 1):
35
  waveform = waveform[0]
36
+ return waveform,gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
37
 
38
  # iface = gr.Interface(fn=text2audio, inputs=[
39
  # gr.Textbox(value="A man is speaking in a huge room", max_lines=1),
 
45
  # )
46
  # iface.launch(share=True)
47
 
48
+ css = """
49
+ .gradio-container {
50
+ font-family: 'IBM Plex Sans', sans-serif;
51
+ }
52
+ .gr-button {
53
+ color: white;
54
+ border-color: black;
55
+ background: black;
56
+ }
57
+ input[type='range'] {
58
+ accent-color: black;
59
+ }
60
+ .dark input[type='range'] {
61
+ accent-color: #dfdfdf;
62
+ }
63
+ .container {
64
+ max-width: 730px;
65
+ margin: auto;
66
+ padding-top: 1.5rem;
67
+ }
68
+ #gallery {
69
+ min-height: 22rem;
70
+ margin-bottom: 15px;
71
+ margin-left: auto;
72
+ margin-right: auto;
73
+ border-bottom-right-radius: .5rem !important;
74
+ border-bottom-left-radius: .5rem !important;
75
+ }
76
+ #gallery>div>.h-full {
77
+ min-height: 20rem;
78
+ }
79
+ .details:hover {
80
+ text-decoration: underline;
81
+ }
82
+ .gr-button {
83
+ white-space: nowrap;
84
+ }
85
+ .gr-button:focus {
86
+ border-color: rgb(147 197 253 / var(--tw-border-opacity));
87
+ outline: none;
88
+ box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
89
+ --tw-border-opacity: 1;
90
+ --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
91
+ --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
92
+ --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
93
+ --tw-ring-opacity: .5;
94
+ }
95
+ #advanced-btn {
96
+ font-size: .7rem !important;
97
+ line-height: 19px;
98
+ margin-top: 12px;
99
+ margin-bottom: 12px;
100
+ padding: 2px 8px;
101
+ border-radius: 14px !important;
102
+ }
103
+ #advanced-options {
104
+ display: none;
105
+ margin-bottom: 20px;
106
+ }
107
+ .footer {
108
+ margin-bottom: 45px;
109
+ margin-top: 35px;
110
+ text-align: center;
111
+ border-bottom: 1px solid #e5e5e5;
112
+ }
113
+ .footer>p {
114
+ font-size: .8rem;
115
+ display: inline-block;
116
+ padding: 0 10px;
117
+ transform: translateY(10px);
118
+ background: white;
119
+ }
120
+ .dark .footer {
121
+ border-color: #303030;
122
+ }
123
+ .dark .footer>p {
124
+ background: #0b0f19;
125
+ }
126
+ .acknowledgments h4{
127
+ margin: 1.25em 0 .25em 0;
128
+ font-weight: bold;
129
+ font-size: 115%;
130
+ }
131
+ .animate-spin {
132
+ animation: spin 1s linear infinite;
133
+ }
134
+ @keyframes spin {
135
+ from {
136
+ transform: rotate(0deg);
137
+ }
138
+ to {
139
+ transform: rotate(360deg);
140
+ }
141
+ }
142
+ #share-btn-container {
143
+ display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
144
+ margin-top: 10px;
145
+ margin-left: auto;
146
+ }
147
+ #share-btn {
148
+ all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;right:0;
149
+ }
150
+ #share-btn * {
151
+ all: unset;
152
+ }
153
+ #share-btn-container div:nth-child(-n+2){
154
+ width: auto !important;
155
+ min-height: 0px !important;
156
+ }
157
+ #share-btn-container .wrap {
158
+ display: none !important;
159
+ }
160
+
161
+ .gr-form{
162
+ flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
163
+ }
164
+ #prompt-container{
165
+ gap: 0;
166
+ }
167
+ #prompt-text-input, #negative-prompt-text-input{padding: .45rem 0.625rem}
168
+ #component-16{border-top-width: 1px!important;margin-top: 1em}
169
+ .image_duplication{position: absolute; width: 100px; left: 50px}
170
+ """
171
+ iface = gr.Blocks(css=css)
172
 
173
  with iface:
174
  gr.HTML(
 
183
  "
184
  >
185
  <h1 style="font-weight: 900; margin-bottom: 7px;">
186
+ AudioLDM: Text-to-Audio Generation with Latent Diffusion Models
187
  </h1>
188
  </div>
189
  <p style="margin-bottom: 10px; font-size: 94%">
 
195
  with gr.Group():
196
  with gr.Box():
197
  ############# Input
198
+ textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1, label="Input your text here. Please ensure it is descriptive and of moderate length.")
199
 
200
  with gr.Accordion("Click to modify detailed configurations", open=False):
201
  seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
 
203
  guidance_scale = gr.Slider(0, 5, value=2.5, step=0.5, label="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)")
204
  n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
205
  ############# Output
206
+ # outputs=gr.Audio(label="Output", type="numpy")
207
+ outputs=gr.Video(label="Output")
208
+ with gr.Group(elem_id="container-advanced-btns"):
209
+ # advanced_button = gr.Button("Advanced options", elem_id="advanced-btn")
210
+ with gr.Group(elem_id="share-btn-container"):
211
+ community_icon = gr.HTML(community_icon_html, visible=False)
212
+ loading_icon = gr.HTML(loading_icon_html, visible=False)
213
+ share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
214
  # outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
215
 
216
  btn = gr.Button("Submit").style(full_width=True)
217
+ btn.click(text2audio, inputs=[textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs, community_icon, loading_icon, share_button]) # , share_button, community_icon, loading_icon
218
+ share_button.click(None, [], [], _js=share_js)
219
  gr.HTML('''
220
  <hr>
221
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
share_btn.py CHANGED
@@ -53,7 +53,7 @@ ${htmlImgs.join(`\n`)}
53
  description: descriptionMd,
54
  });
55
  const paramsStr = params.toString();
56
- window.open(`https://huggingface.co/spaces/stabilityai/stable-diffusion/discussions/new?${paramsStr}`, '_blank');
57
  shareBtnEl.style.removeProperty('pointer-events');
58
  shareIconEl.style.removeProperty('display');
59
  loadingIconEl.style.display = 'none';
 
53
  description: descriptionMd,
54
  });
55
  const paramsStr = params.toString();
56
+ window.open(`https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation/discussions/new?${paramsStr}`, '_blank');
57
  shareBtnEl.style.removeProperty('pointer-events');
58
  shareIconEl.style.removeProperty('display');
59
  loadingIconEl.style.display = 'none';