MiguelZoo commited on
Commit
44b03a2
1 Parent(s): 420ac86

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +242 -9
README.md CHANGED
@@ -1,9 +1,242 @@
1
- title: Lp Music Caps
2
- emoji: 🎵🎵🎵
3
- colorFrom: purple
4
- colorTo: indigo
5
- sdk: gradio
6
- sdk_version: 3.33.1
7
- app_file: app.py
8
- pinned: false
9
- license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STR_CLIP_ID = 'clip_id'
2
+ STR_AUDIO_SIGNAL = 'audio_signal'
3
+ STR_TARGET_VECTOR = 'target_vector'
4
+
5
+
6
+ STR_CH_FIRST = 'channels_first'
7
+ STR_CH_LAST = 'channels_last'
8
+
9
+ import io
10
+ import os
11
+ import tqdm
12
+ import logging
13
+ import subprocess
14
+ from typing import Tuple
15
+ from pathlib import Path
16
+
17
+ # import librosa
18
+ import numpy as np
19
+ import soundfile as sf
20
+
21
+ import itertools
22
+ from numpy.fft import irfft
23
+
24
+ def _resample_load_ffmpeg(path: str, sample_rate: int, downmix_to_mono: bool) -> Tuple[np.ndarray, int]:
25
+ """
26
+ Decoding, downmixing, and downsampling by librosa.
27
+ Returns a channel-first audio signal.
28
+ Args:
29
+ path:
30
+ sample_rate:
31
+ downmix_to_mono:
32
+ Returns:
33
+ (audio signal, sample rate)
34
+ """
35
+
36
+ def _decode_resample_by_ffmpeg(filename, sr):
37
+ """decode, downmix, and resample audio file"""
38
+ channel_cmd = '-ac 1 ' if downmix_to_mono else '' # downmixing option
39
+ resampling_cmd = f'-ar {str(sr)}' if sr else '' # downsampling option
40
+ cmd = f"ffmpeg -i \"{filename}\" {channel_cmd} {resampling_cmd} -f wav -"
41
+ p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
42
+ out, err = p.communicate()
43
+ return out
44
+
45
+ src, sr = sf.read(io.BytesIO(_decode_resample_by_ffmpeg(path, sr=sample_rate)))
46
+ return src.T, sr
47
+
48
+
49
+ def _resample_load_librosa(path: str, sample_rate: int, downmix_to_mono: bool, **kwargs) -> Tuple[np.ndarray, int]:
50
+ """
51
+ Decoding, downmixing, and downsampling by librosa.
52
+ Returns a channel-first audio signal.
53
+ """
54
+ src, sr = librosa.load(path, sr=sample_rate, mono=downmix_to_mono, **kwargs)
55
+ return src, sr
56
+
57
+
58
+ def load_audio(
59
+ path: str or Path,
60
+ ch_format: str,
61
+ sample_rate: int = None,
62
+ downmix_to_mono: bool = False,
63
+ resample_by: str = 'ffmpeg',
64
+ **kwargs,
65
+ ) -> Tuple[np.ndarray, int]:
66
+ """A wrapper of librosa.load that:
67
+ - forces the returned audio to be 2-dim,
68
+ - defaults to sr=None, and
69
+ - defaults to downmix_to_mono=False.
70
+ The audio decoding is done by `audioread` or `soundfile` package and ultimately, often by ffmpeg.
71
+ The resampling is done by `librosa`'s child package `resampy`.
72
+ Args:
73
+ path: audio file path
74
+ ch_format: one of 'channels_first' or 'channels_last'
75
+ sample_rate: target sampling rate. if None, use the rate of the audio file
76
+ downmix_to_mono:
77
+ resample_by (str): 'librosa' or 'ffmpeg'. it decides backend for audio decoding and resampling.
78
+ **kwargs: keyword args for librosa.load - offset, duration, dtype, res_type.
79
+ Returns:
80
+ (audio, sr) tuple
81
+ """
82
+ if ch_format not in (STR_CH_FIRST, STR_CH_LAST):
83
+ raise ValueError(f'ch_format is wrong here -> {ch_format}')
84
+
85
+ if os.stat(path).st_size > 8000:
86
+ if resample_by == 'librosa':
87
+ src, sr = _resample_load_librosa(path, sample_rate, downmix_to_mono, **kwargs)
88
+ elif resample_by == 'ffmpeg':
89
+ src, sr = _resample_load_ffmpeg(path, sample_rate, downmix_to_mono)
90
+ else:
91
+ raise NotImplementedError(f'resample_by: "{resample_by}" is not supposred yet')
92
+ else:
93
+ raise ValueError('Given audio is too short!')
94
+ return src, sr
95
+
96
+ # if src.ndim == 1:
97
+ # src = np.expand_dims(src, axis=0)
98
+ # # now always 2d and channels_first
99
+
100
+ # if ch_format == STR_CH_FIRST:
101
+ # return src, sr
102
+ # else:
103
+ # return src.T, sr
104
+
105
+ def ms(x):
106
+ """Mean value of signal `x` squared.
107
+ :param x: Dynamic quantity.
108
+ :returns: Mean squared of `x`.
109
+ """
110
+ return (np.abs(x)**2.0).mean()
111
+
112
+ def normalize(y, x=None):
113
+ """normalize power in y to a (standard normal) white noise signal.
114
+ Optionally normalize to power in signal `x`.
115
+ #The mean power of a Gaussian with :math:`\\mu=0` and :math:`\\sigma=1` is 1.
116
+ """
117
+ if x is not None:
118
+ x = ms(x)
119
+ else:
120
+ x = 1.0
121
+ return y * np.sqrt(x / ms(y))
122
+
123
+ def noise(N, color='white', state=None):
124
+ """Noise generator.
125
+ :param N: Amount of samples.
126
+ :param color: Color of noise.
127
+ :param state: State of PRNG.
128
+ :type state: :class:`np.random.RandomState`
129
+ """
130
+ try:
131
+ return _noise_generators[color](N, state)
132
+ except KeyError:
133
+ raise ValueError("Incorrect color.")
134
+
135
+ def white(N, state=None):
136
+ """
137
+ White noise.
138
+ :param N: Amount of samples.
139
+ :param state: State of PRNG.
140
+ :type state: :class:`np.random.RandomState`
141
+ White noise has a constant power density. It's narrowband spectrum is therefore flat.
142
+ The power in white noise will increase by a factor of two for each octave band,
143
+ and therefore increases with 3 dB per octave.
144
+ """
145
+ state = np.random.RandomState() if state is None else state
146
+ return state.randn(N)
147
+
148
+ def pink(N, state=None):
149
+ """
150
+ Pink noise.
151
+ :param N: Amount of samples.
152
+ :param state: State of PRNG.
153
+ :type state: :class:`np.random.RandomState`
154
+ Pink noise has equal power in bands that are proportionally wide.
155
+ Power density decreases with 3 dB per octave.
156
+ """
157
+ state = np.random.RandomState() if state is None else state
158
+ uneven = N % 2
159
+ X = state.randn(N // 2 + 1 + uneven) + 1j * state.randn(N // 2 + 1 + uneven)
160
+ S = np.sqrt(np.arange(len(X)) + 1.) # +1 to avoid divide by zero
161
+ y = (irfft(X / S)).real
162
+ if uneven:
163
+ y = y[:-1]
164
+ return normalize(y)
165
+
166
+ def blue(N, state=None):
167
+ """
168
+ Blue noise.
169
+ :param N: Amount of samples.
170
+ :param state: State of PRNG.
171
+ :type state: :class:`np.random.RandomState`
172
+ Power increases with 6 dB per octave.
173
+ Power density increases with 3 dB per octave.
174
+ """
175
+ state = np.random.RandomState() if state is None else state
176
+ uneven = N % 2
177
+ X = state.randn(N // 2 + 1 + uneven) + 1j * state.randn(N // 2 + 1 + uneven)
178
+ S = np.sqrt(np.arange(len(X))) # Filter
179
+ y = (irfft(X * S)).real
180
+ if uneven:
181
+ y = y[:-1]
182
+ return normalize(y)
183
+
184
+ def brown(N, state=None):
185
+ """
186
+ Violet noise.
187
+ :param N: Amount of samples.
188
+ :param state: State of PRNG.
189
+ :type state: :class:`np.random.RandomState`
190
+ Power decreases with -3 dB per octave.
191
+ Power density decreases with 6 dB per octave.
192
+ """
193
+ state = np.random.RandomState() if state is None else state
194
+ uneven = N % 2
195
+ X = state.randn(N // 2 + 1 + uneven) + 1j * state.randn(N // 2 + 1 + uneven)
196
+ S = (np.arange(len(X)) + 1) # Filter
197
+ y = (irfft(X / S)).real
198
+ if uneven:
199
+ y = y[:-1]
200
+ return normalize(y)
201
+
202
+ def violet(N, state=None):
203
+ """
204
+ Violet noise. Power increases with 6 dB per octave.
205
+ :param N: Amount of samples.
206
+ :param state: State of PRNG.
207
+ :type state: :class:`np.random.RandomState`
208
+ Power increases with +9 dB per octave.
209
+ Power density increases with +6 dB per octave.
210
+ """
211
+ state = np.random.RandomState() if state is None else state
212
+ uneven = N % 2
213
+ X = state.randn(N // 2 + 1 + uneven) + 1j * state.randn(N // 2 + 1 + uneven)
214
+ S = (np.arange(len(X))) # Filter
215
+ y = (irfft(X * S)).real
216
+ if uneven:
217
+ y = y[:-1]
218
+ return normalize(y)
219
+
220
+ _noise_generators = {
221
+ 'white': white,
222
+ 'pink': pink,
223
+ 'blue': blue,
224
+ 'brown': brown,
225
+ 'violet': violet,
226
+ }
227
+
228
+ def noise_generator(N=44100, color='white', state=None):
229
+ """Noise generator.
230
+ :param N: Amount of unique samples to generate.
231
+ :param color: Color of noise.
232
+ Generate `N` amount of unique samples and cycle over these samples.
233
+ """
234
+ #yield from itertools.cycle(noise(N, color)) # Python 3.3
235
+ for sample in itertools.cycle(noise(N, color, state)):
236
+ yield sample
237
+
238
+ def heaviside(N):
239
+ """Heaviside.
240
+ Returns the value 0 for `x < 0`, 1 for `x > 0`, and 1/2 for `x = 0`.
241
+ """
242
+ return 0.5 * (np.sign(N) + 1)