mrfakename commited on
Commit
df8f6a6
1 Parent(s): f1cfc49
Files changed (3) hide show
  1. app.py +314 -0
  2. packages.txt +1 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Thank you to the authors of seewav for dedicating it into the public domain.
2
+ # This program is also dedicated into the public domain.
3
+ # You may use it, at your choice, under the Unlicense, CC0, or WTFPL license.
4
+ # Enjoy!
5
+
6
+ # Mostly from: https://github.com/adefossez/seewav
7
+ # Original author: adefossez
8
+
9
+
10
+ import math
11
+ import tempfile
12
+ from pathlib import Path
13
+ import subprocess
14
+ import cairo
15
+ import numpy as np
16
+ import gradio as gr
17
+ from pydub import AudioSegment
18
+
19
+
20
+ def read_audio(audio, seek=None, duration=None):
21
+ """
22
+ Read the `audio` file, starting at `seek` (or 0) seconds for `duration` (or all) seconds.
23
+ Returns `float[channels, samples]`.
24
+ """
25
+
26
+ audio_segment = AudioSegment.from_file(audio)
27
+ channels = audio_segment.channels
28
+ samplerate = audio_segment.frame_rate
29
+
30
+ if seek is not None:
31
+ seek_ms = int(seek * 1000)
32
+ audio_segment = audio_segment[seek_ms:]
33
+
34
+ if duration is not None:
35
+ duration_ms = int(duration * 1000)
36
+ audio_segment = audio_segment[:duration_ms]
37
+
38
+ samples = audio_segment.get_array_of_samples()
39
+ wav = np.array(samples, dtype=np.float32)
40
+ return wav.reshape(channels, -1), samplerate
41
+
42
+
43
+ def sigmoid(x):
44
+ return 1 / (1 + np.exp(-x))
45
+
46
+
47
+ def envelope(wav, window, stride):
48
+ """
49
+ Extract the envelope of the waveform `wav` (float[samples]), using average pooling
50
+ with `window` samples and the given `stride`.
51
+ """
52
+ # pos = np.pad(np.maximum(wav, 0), window // 2)
53
+ wav = np.pad(wav, window // 2)
54
+ out = []
55
+ for off in range(0, len(wav) - window, stride):
56
+ frame = wav[off : off + window]
57
+ out.append(np.maximum(frame, 0).mean())
58
+ out = np.array(out)
59
+ # Some form of audio compressor based on the sigmoid.
60
+ out = 1.9 * (sigmoid(2.5 * out) - 0.5)
61
+ return out
62
+
63
+
64
+ def draw_env(envs, out, fg_colors, bg_color, size):
65
+ """
66
+ Internal function, draw a single frame (two frames for stereo) using cairo and save
67
+ it to the `out` file as png. envs is a list of envelopes over channels, each env
68
+ is a float[bars] representing the height of the envelope to draw. Each entry will
69
+ be represented by a bar.
70
+ """
71
+ surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, *size)
72
+ ctx = cairo.Context(surface)
73
+ ctx.scale(*size)
74
+
75
+ ctx.set_source_rgb(*bg_color)
76
+ ctx.rectangle(0, 0, 1, 1)
77
+ ctx.fill()
78
+
79
+ K = len(envs) # Number of waves to draw (waves are stacked vertically)
80
+ T = len(envs[0]) # Numbert of time steps
81
+ pad_ratio = 0.1 # spacing ratio between 2 bars
82
+ width = 1.0 / (T * (1 + 2 * pad_ratio))
83
+ pad = pad_ratio * width
84
+ delta = 2 * pad + width
85
+
86
+ ctx.set_line_width(width)
87
+ for step in range(T):
88
+ for i in range(K):
89
+ half = 0.5 * envs[i][step] # (semi-)height of the bar
90
+ half /= K # as we stack K waves vertically
91
+ midrule = (1 + 2 * i) / (2 * K) # midrule of i-th wave
92
+ ctx.set_source_rgb(*fg_colors[i])
93
+ ctx.move_to(pad + step * delta, midrule - half)
94
+ ctx.line_to(pad + step * delta, midrule)
95
+ ctx.stroke()
96
+ ctx.set_source_rgba(*fg_colors[i], 0.8)
97
+ ctx.move_to(pad + step * delta, midrule)
98
+ ctx.line_to(pad + step * delta, midrule + 0.9 * half)
99
+ ctx.stroke()
100
+
101
+ surface.write_to_png(out)
102
+
103
+
104
+ def interpole(x1, y1, x2, y2, x):
105
+ return y1 + (y2 - y1) * (x - x1) / (x2 - x1)
106
+
107
+
108
+ def visualize(
109
+ progress,
110
+ audio,
111
+ tmp,
112
+ out,
113
+ seek=None,
114
+ duration=None,
115
+ rate=60,
116
+ bars=50,
117
+ speed=4,
118
+ time=0.4,
119
+ oversample=3,
120
+ fg_color=(0.2, 0.2, 0.2),
121
+ fg_color2=(0.5, 0.3, 0.6),
122
+ bg_color=(1, 1, 1),
123
+ size=(400, 400),
124
+ stereo=False,
125
+ ):
126
+ """
127
+ Generate the visualisation for the `audio` file, using a `tmp` folder and saving the final
128
+ video in `out`.
129
+ `seek` and `durations` gives the extract location if any.
130
+ `rate` is the framerate of the output video.
131
+
132
+ `bars` is the number of bars in the animation.
133
+ `speed` is the base speed of transition. Depending on volume, actual speed will vary
134
+ between 0.5 and 2 times it.
135
+ `time` amount of audio shown at once on a frame.
136
+ `oversample` higher values will lead to more frequent changes.
137
+ `fg_color` is the rgb color to use for the foreground.
138
+ `fg_color2` is the rgb color to use for the second wav if stereo is set.
139
+ `bg_color` is the rgb color to use for the background.
140
+ `size` is the `(width, height)` in pixels to generate.
141
+ `stereo` is whether to create 2 waves.
142
+ """
143
+ try:
144
+ wav, sr = read_audio(audio, seek=seek, duration=duration)
145
+ except (IOError, ValueError) as err:
146
+ raise gr.Error(err)
147
+ # wavs is a list of wav over channels
148
+ wavs = []
149
+ if stereo:
150
+ assert wav.shape[0] == 2, "stereo requires stereo audio file"
151
+ wavs.append(wav[0])
152
+ wavs.append(wav[1])
153
+ else:
154
+ wav = wav.mean(0)
155
+ wavs.append(wav)
156
+
157
+ for i, wav in enumerate(wavs):
158
+ wavs[i] = wav / wav.std()
159
+
160
+ window = int(sr * time / bars)
161
+ stride = int(window / oversample)
162
+ # envs is a list of env over channels
163
+ envs = []
164
+ for wav in wavs:
165
+ env = envelope(wav, window, stride)
166
+ env = np.pad(env, (bars // 2, 2 * bars))
167
+ envs.append(env)
168
+
169
+ duration = len(wavs[0]) / sr
170
+ frames = int(rate * duration)
171
+ smooth = np.hanning(bars)
172
+
173
+ gr.Info("Generating the frames...")
174
+ for idx in progress(range(frames)):
175
+ pos = (((idx / rate)) * sr) / stride / bars
176
+ off = int(pos)
177
+ loc = pos - off
178
+ denvs = []
179
+ for env in envs:
180
+ env1 = env[off * bars : (off + 1) * bars]
181
+ env2 = env[(off + 1) * bars : (off + 2) * bars]
182
+
183
+ # we want loud parts to be updated faster
184
+ maxvol = math.log10(1e-4 + env2.max()) * 10
185
+ speedup = np.clip(interpole(-6, 0.5, 0, 2, maxvol), 0.5, 2)
186
+ w = sigmoid(speed * speedup * (loc - 0.5))
187
+ denv = (1 - w) * env1 + w * env2
188
+ denv *= smooth
189
+ denvs.append(denv)
190
+ draw_env(denvs, tmp / f"{idx:06d}.png", (fg_color, fg_color2), bg_color, size)
191
+ gr.Info("Encoding the animation video...")
192
+ subprocess.run([
193
+ "ffmpeg", "-y", "-loglevel", "panic", "-r",
194
+ str(rate), "-f", "image2", "-s", f"{size[0]}x{size[1]}", "-i", "%06d.png", "-i", audio, "-c:a", "aac", "-vcodec", "libx264", "-crf", "10", "-pix_fmt", "yuv420p",
195
+ out.resolve()
196
+ ], check=True, cwd=tmp)
197
+ return out
198
+
199
+
200
+
201
+ def parse_color(colorstr):
202
+ """
203
+ Given a comma separated rgb(a) colors, returns a 4-tuple of float.
204
+ """
205
+ try:
206
+ r, g, b = [float(i) for i in colorstr.split(",")]
207
+ return r, g, b
208
+ except ValueError:
209
+ raise gr.Error(
210
+ "Format for color is 3 floats separated by commas 0.xx,0.xx,0.xx, rgb order"
211
+ )
212
+
213
+
214
+ def hex_to_rgb(hex_color):
215
+ hex_color = hex_color.lstrip('#')
216
+ if len(hex_color) == 3:
217
+ hex_color = ''.join([c*2 for c in hex_color])
218
+ return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
219
+
220
+
221
+ def do_viz(
222
+ inp_aud,
223
+ inp_bgcolor,
224
+ inp_color1,
225
+ inp_nbars,
226
+ inp_vidw,
227
+ inp_vidh,
228
+ progress=gr.Progress(),
229
+ ):
230
+ with tempfile.TemporaryDirectory() as tmp, tempfile.NamedTemporaryFile(
231
+ suffix=".mp4",
232
+ delete=False
233
+ ) as out:
234
+ return visualize(
235
+ progress.tqdm,
236
+ inp_aud,
237
+ Path(tmp),
238
+ Path(out.name),
239
+ bars=inp_nbars,
240
+ fg_color=hex_to_rgb(inp_color1),
241
+ bg_color=hex_to_rgb(inp_bgcolor),
242
+ size=(inp_vidw, inp_vidh),
243
+ )
244
+
245
+
246
+ import gradio as gr
247
+
248
+ ABOUT = """
249
+ # seewav GUI
250
+
251
+ > Have an audio clip but need a video (e.g. for X/Twitter)?
252
+
253
+ **Convert audio into a nice video!**
254
+
255
+ An online graphical user interface for [seewav](https://github.com/adefossez/seewav).
256
+
257
+ Enjoy!
258
+ """
259
+ with gr.Blocks() as demo:
260
+ gr.Markdown(ABOUT)
261
+ with gr.Row():
262
+ with gr.Column():
263
+ inp_aud = gr.Audio(type='filepath')
264
+ with gr.Group():
265
+ inp_color1 = gr.ColorPicker(
266
+ label="Color",
267
+ info="Color of the top waveform",
268
+ value="#00237E",
269
+ interactive=True,
270
+ )
271
+ inp_bgcolor = gr.ColorPicker(
272
+ label="Background Color",
273
+ info="Color of the background",
274
+ value="#000000",
275
+ interactive=True,
276
+ )
277
+ with gr.Accordion("Advanced Configuration", open=False):
278
+ inp_nbars = gr.Slider(
279
+ label="Num. Bars",
280
+ value=50,
281
+ interactive=True,
282
+ minimum=2,
283
+ maximum=500,
284
+ )
285
+ inp_vidw = gr.Slider(
286
+ label="Video Width",
287
+ value=400,
288
+ interactive=True,
289
+ minimum=100,
290
+ maximum=3000,
291
+ )
292
+ inp_vidh = gr.Slider(
293
+ label="Video Height",
294
+ value=400,
295
+ interactive=True,
296
+ minimum=100,
297
+ maximum=3000,
298
+ )
299
+ inp_go = gr.Button("Visualize", variant="primary")
300
+ with gr.Column():
301
+ out_vid = gr.Video(interactive=False)
302
+ inp_go.click(
303
+ do_viz,
304
+ inputs=[
305
+ inp_aud,
306
+ inp_bgcolor,
307
+ inp_color1,
308
+ inp_nbars,
309
+ inp_vidw,
310
+ inp_vidh,
311
+ ],
312
+ outputs=[out_vid],
313
+ )
314
+ demo.queue(api_open=False).launch(show_api=False)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ pycairo
3
+ tqdm
4
+ pydub
5
+ ffmpeg-python
6
+ opencv-python