renator commited on
Commit
d257d3d
·
1 Parent(s): 6857109

update the environment

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -0
  2. audio.py +1799 -0
Dockerfile CHANGED
@@ -25,6 +25,7 @@ COPY . /app/
25
 
26
  # Replace the librosa notation.py with notation.py from your project
27
  COPY notation.py /usr/local/lib/python3.10/site-packages/librosa/core/notation.py
 
28
 
29
  # RUN cd /tmp && mkdir cache1
30
 
 
25
 
26
  # Replace the librosa notation.py with notation.py from your project
27
  COPY notation.py /usr/local/lib/python3.10/site-packages/librosa/core/notation.py
28
+ COPY audio.py /usr/local/lib/python3.10/site-packages/librosa/core/audio.py
29
 
30
  # RUN cd /tmp && mkdir cache1
31
 
audio.py ADDED
@@ -0,0 +1,1799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """Core IO, DSP and utility functions."""
4
+ from __future__ import annotations
5
+
6
+ import os
7
+ import pathlib
8
+ import warnings
9
+
10
+ import soundfile as sf
11
+ import audioread
12
+ import numpy as np
13
+ import scipy.signal
14
+ import soxr
15
+ import lazy_loader as lazy
16
+
17
+ from numba import jit, stencil, guvectorize
18
+ from .fft import get_fftlib
19
+ from .convert import frames_to_samples, time_to_samples
20
+ from .._cache import cache
21
+ from .. import util
22
+ from ..util.exceptions import ParameterError
23
+ from ..util.decorators import deprecated
24
+ from ..util.deprecation import Deprecated, rename_kw
25
+ from .._typing import _FloatLike_co, _IntLike_co, _SequenceLike
26
+
27
+ from typing import Any, BinaryIO, Callable, Generator, Optional, Tuple, Union, List
28
+ from numpy.typing import DTypeLike, ArrayLike
29
+
30
+ # Lazy-load optional dependencies
31
+ samplerate = lazy.load("samplerate")
32
+ resampy = lazy.load("resampy")
33
+
34
+ __all__ = [
35
+ "load",
36
+ "stream",
37
+ "to_mono",
38
+ "resample",
39
+ "get_duration",
40
+ "get_samplerate",
41
+ "autocorrelate",
42
+ "lpc",
43
+ "zero_crossings",
44
+ "clicks",
45
+ "tone",
46
+ "chirp",
47
+ "mu_compress",
48
+ "mu_expand",
49
+ ]
50
+
51
+
52
+ # -- CORE ROUTINES --#
53
+ # Load should never be cached, since we cannot verify that the contents of
54
+ # 'path' are unchanged across calls.
55
+ def load(
56
+ path: Union[
57
+ str, int, os.PathLike[Any], sf.SoundFile, audioread.AudioFile, BinaryIO
58
+ ],
59
+ *,
60
+ sr: Optional[float] = 22050,
61
+ mono: bool = True,
62
+ offset: float = 0.0,
63
+ duration: Optional[float] = None,
64
+ dtype: DTypeLike = np.float32,
65
+ res_type: str = "soxr_hq",
66
+ ) -> Tuple[np.ndarray, float]:
67
+ """Load an audio file as a floating point time series.
68
+
69
+ Audio will be automatically resampled to the given rate
70
+ (default ``sr=22050``).
71
+
72
+ To preserve the native sampling rate of the file, use ``sr=None``.
73
+
74
+ Parameters
75
+ ----------
76
+ path : string, int, pathlib.Path, soundfile.SoundFile, audioread object, or file-like object
77
+ path to the input file.
78
+
79
+ Any codec supported by `soundfile` or `audioread` will work.
80
+
81
+ Any string file paths, or any object implementing Python's
82
+ file interface (e.g. `pathlib.Path`) are supported as `path`.
83
+
84
+ If the codec is supported by `soundfile`, then `path` can also be
85
+ an open file descriptor (int) or an existing `soundfile.SoundFile` object.
86
+
87
+ Pre-constructed audioread decoders are also supported here, see the example
88
+ below. This can be used, for example, to force a specific decoder rather
89
+ than relying upon audioread to select one for you.
90
+
91
+ .. warning:: audioread support is deprecated as of version 0.10.0.
92
+ audioread support be removed in version 1.0.
93
+
94
+ sr : number > 0 [scalar]
95
+ target sampling rate
96
+
97
+ 'None' uses the native sampling rate
98
+
99
+ mono : bool
100
+ convert signal to mono
101
+
102
+ offset : float
103
+ start reading after this time (in seconds)
104
+
105
+ duration : float
106
+ only load up to this much audio (in seconds)
107
+
108
+ dtype : numeric type
109
+ data type of ``y``
110
+
111
+ res_type : str
112
+ resample type (see note)
113
+
114
+ .. note::
115
+ By default, this uses `soxr`'s high-quality mode ('HQ').
116
+
117
+ For alternative resampling modes, see `resample`
118
+
119
+ .. note::
120
+ `audioread` may truncate the precision of the audio data to 16 bits.
121
+
122
+ See :ref:`ioformats` for alternate loading methods.
123
+
124
+ Returns
125
+ -------
126
+ y : np.ndarray [shape=(n,) or (..., n)]
127
+ audio time series. Multi-channel is supported.
128
+ sr : number > 0 [scalar]
129
+ sampling rate of ``y``
130
+
131
+ Examples
132
+ --------
133
+ >>> # Load an ogg vorbis file
134
+ >>> filename = librosa.ex('trumpet')
135
+ >>> y, sr = librosa.load(filename)
136
+ >>> y
137
+ array([-1.407e-03, -4.461e-04, ..., -3.042e-05, 1.277e-05],
138
+ dtype=float32)
139
+ >>> sr
140
+ 22050
141
+
142
+ >>> # Load a file and resample to 11 KHz
143
+ >>> filename = librosa.ex('trumpet')
144
+ >>> y, sr = librosa.load(filename, sr=11025)
145
+ >>> y
146
+ array([-8.746e-04, -3.363e-04, ..., -1.301e-05, 0.000e+00],
147
+ dtype=float32)
148
+ >>> sr
149
+ 11025
150
+
151
+ >>> # Load 5 seconds of a file, starting 15 seconds in
152
+ >>> filename = librosa.ex('brahms')
153
+ >>> y, sr = librosa.load(filename, offset=15.0, duration=5.0)
154
+ >>> y
155
+ array([0.146, 0.144, ..., 0.128, 0.015], dtype=float32)
156
+ >>> sr
157
+ 22050
158
+
159
+ >>> # Load using an already open SoundFile object
160
+ >>> import soundfile
161
+ >>> sfo = soundfile.SoundFile(librosa.ex('brahms'))
162
+ >>> y, sr = librosa.load(sfo)
163
+
164
+ >>> # Load using an already open audioread object
165
+ >>> import audioread.ffdec # Use ffmpeg decoder
166
+ >>> aro = audioread.ffdec.FFmpegAudioFile(librosa.ex('brahms'))
167
+ >>> y, sr = librosa.load(aro)
168
+ """
169
+
170
+ if isinstance(path, tuple(audioread.available_backends())):
171
+ # Force the audioread loader if we have a reader object already
172
+ y, sr_native = __audioread_load(path, offset, duration, dtype)
173
+ else:
174
+ # Otherwise try soundfile first, and then fall back if necessary
175
+ try:
176
+ y, sr_native = __soundfile_load(path, offset, duration, dtype)
177
+
178
+ except sf.SoundFileRuntimeError as exc:
179
+ # If soundfile failed, try audioread instead
180
+ if isinstance(path, (str, pathlib.PurePath)):
181
+ warnings.warn(
182
+ "PySoundFile failed. Trying audioread instead.", stacklevel=2
183
+ )
184
+ y, sr_native = __audioread_load(path, offset, duration, dtype)
185
+ else:
186
+ raise exc
187
+
188
+ # Final cleanup for dtype and contiguity
189
+ if mono:
190
+ y = to_mono(y)
191
+
192
+ if sr is not None:
193
+ y = resample(y, orig_sr=sr_native, target_sr=sr, res_type=res_type)
194
+
195
+ else:
196
+ sr = sr_native
197
+
198
+ return y, sr
199
+
200
+
201
+ def __soundfile_load(path, offset, duration, dtype):
202
+ """Load an audio buffer using soundfile."""
203
+ if isinstance(path, sf.SoundFile):
204
+ # If the user passed an existing soundfile object,
205
+ # we can use it directly
206
+ context = path
207
+ else:
208
+ # Otherwise, create the soundfile object
209
+ context = sf.SoundFile(path)
210
+
211
+ with context as sf_desc:
212
+ sr_native = sf_desc.samplerate
213
+ if offset:
214
+ # Seek to the start of the target read
215
+ sf_desc.seek(int(offset * sr_native))
216
+ if duration is not None:
217
+ frame_duration = int(duration * sr_native)
218
+ else:
219
+ frame_duration = -1
220
+
221
+ # Load the target number of frames, and transpose to match librosa form
222
+ y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
223
+
224
+ return y, sr_native
225
+
226
+
227
+ @deprecated(version="0.10.0", version_removed="1.0")
228
+ def __audioread_load(path, offset, duration, dtype: DTypeLike):
229
+ """Load an audio buffer using audioread.
230
+
231
+ This loads one block at a time, and then concatenates the results.
232
+ """
233
+
234
+ buf = []
235
+
236
+ if isinstance(path, tuple(audioread.available_backends())):
237
+ # If we have an audioread object already, don't bother opening
238
+ reader = path
239
+ else:
240
+ # If the input was not an audioread object, try to open it
241
+ reader = audioread.audio_open(path)
242
+
243
+ with reader as input_file:
244
+ sr_native = input_file.samplerate
245
+ n_channels = input_file.channels
246
+
247
+ s_start = int(np.round(sr_native * offset)) * n_channels
248
+
249
+ if duration is None:
250
+ s_end = np.inf
251
+ else:
252
+ s_end = s_start + (int(np.round(sr_native * duration)) * n_channels)
253
+
254
+ n = 0
255
+
256
+ for frame in input_file:
257
+ frame = util.buf_to_float(frame, dtype=dtype)
258
+ n_prev = n
259
+ n = n + len(frame)
260
+
261
+ if n < s_start:
262
+ # offset is after the current frame
263
+ # keep reading
264
+ continue
265
+
266
+ if s_end < n_prev:
267
+ # we're off the end. stop reading
268
+ break
269
+
270
+ if s_end < n:
271
+ # the end is in this frame. crop.
272
+ frame = frame[: int(s_end - n_prev)] # pragma: no cover
273
+
274
+ if n_prev <= s_start <= n:
275
+ # beginning is in this frame
276
+ frame = frame[(s_start - n_prev) :]
277
+
278
+ # tack on the current frame
279
+ buf.append(frame)
280
+
281
+ if buf:
282
+ y = np.concatenate(buf)
283
+ if n_channels > 1:
284
+ y = y.reshape((-1, n_channels)).T
285
+ else:
286
+ y = np.empty(0, dtype=dtype)
287
+
288
+ return y, sr_native
289
+
290
+
291
+ def stream(
292
+ path: Union[str, int, sf.SoundFile, BinaryIO],
293
+ *,
294
+ block_length: int,
295
+ frame_length: int,
296
+ hop_length: int,
297
+ mono: bool = True,
298
+ offset: float = 0.0,
299
+ duration: Optional[float] = None,
300
+ fill_value: Optional[float] = None,
301
+ dtype: DTypeLike = np.float32,
302
+ ) -> Generator[np.ndarray, None, None]:
303
+ """Stream audio in fixed-length buffers.
304
+
305
+ This is primarily useful for processing large files that won't
306
+ fit entirely in memory at once.
307
+
308
+ Instead of loading the entire audio signal into memory (as
309
+ in `load`, this function produces *blocks* of audio spanning
310
+ a fixed number of frames at a specified frame length and hop
311
+ length.
312
+
313
+ While this function strives for similar behavior to `load`,
314
+ there are a few caveats that users should be aware of:
315
+
316
+ 1. This function does not return audio buffers directly.
317
+ It returns a generator, which you can iterate over
318
+ to produce blocks of audio. A *block*, in this context,
319
+ refers to a buffer of audio which spans a given number of
320
+ (potentially overlapping) frames.
321
+ 2. Automatic sample-rate conversion is not supported.
322
+ Audio will be streamed in its native sample rate,
323
+ so no default values are provided for ``frame_length``
324
+ and ``hop_length``. It is recommended that you first
325
+ get the sampling rate for the file in question, using
326
+ `get_samplerate`, and set these parameters accordingly.
327
+ 3. Many analyses require access to the entire signal
328
+ to behave correctly, such as `resample`, `cqt`, or
329
+ `beat_track`, so these methods will not be appropriate
330
+ for streamed data.
331
+ 4. The ``block_length`` parameter specifies how many frames
332
+ of audio will be produced per block. Larger values will
333
+ consume more memory, but will be more efficient to process
334
+ down-stream. The best value will ultimately depend on your
335
+ application and other system constraints.
336
+ 5. By default, most librosa analyses (e.g., short-time Fourier
337
+ transform) assume centered frames, which requires padding the
338
+ signal at the beginning and end. This will not work correctly
339
+ when the signal is carved into blocks, because it would introduce
340
+ padding in the middle of the signal. To disable this feature,
341
+ use ``center=False`` in all frame-based analyses.
342
+
343
+ See the examples below for proper usage of this function.
344
+
345
+ Parameters
346
+ ----------
347
+ path : string, int, sf.SoundFile, or file-like object
348
+ path to the input file to stream.
349
+
350
+ Any codec supported by `soundfile` is permitted here.
351
+
352
+ An existing `soundfile.SoundFile` object may also be provided.
353
+
354
+ block_length : int > 0
355
+ The number of frames to include in each block.
356
+
357
+ Note that at the end of the file, there may not be enough
358
+ data to fill an entire block, resulting in a shorter block
359
+ by default. To pad the signal out so that blocks are always
360
+ full length, set ``fill_value`` (see below).
361
+
362
+ frame_length : int > 0
363
+ The number of samples per frame.
364
+
365
+ hop_length : int > 0
366
+ The number of samples to advance between frames.
367
+
368
+ Note that by when ``hop_length < frame_length``, neighboring frames
369
+ will overlap. Similarly, the last frame of one *block* will overlap
370
+ with the first frame of the next *block*.
371
+
372
+ mono : bool
373
+ Convert the signal to mono during streaming
374
+
375
+ offset : float
376
+ Start reading after this time (in seconds)
377
+
378
+ duration : float
379
+ Only load up to this much audio (in seconds)
380
+
381
+ fill_value : float [optional]
382
+ If padding the signal to produce constant-length blocks,
383
+ this value will be used at the end of the signal.
384
+
385
+ In most cases, ``fill_value=0`` (silence) is expected, but
386
+ you may specify any value here.
387
+
388
+ dtype : numeric type
389
+ data type of audio buffers to be produced
390
+
391
+ Yields
392
+ ------
393
+ y : np.ndarray
394
+ An audio buffer of (at most)
395
+ ``(block_length-1) * hop_length + frame_length`` samples.
396
+
397
+ See Also
398
+ --------
399
+ load
400
+ get_samplerate
401
+ soundfile.blocks
402
+
403
+ Examples
404
+ --------
405
+ Apply a short-term Fourier transform to blocks of 256 frames
406
+ at a time. Note that streaming operation requires left-aligned
407
+ frames, so we must set ``center=False`` to avoid padding artifacts.
408
+
409
+ >>> filename = librosa.ex('brahms')
410
+ >>> sr = librosa.get_samplerate(filename)
411
+ >>> stream = librosa.stream(filename,
412
+ ... block_length=256,
413
+ ... frame_length=4096,
414
+ ... hop_length=1024)
415
+ >>> for y_block in stream:
416
+ ... D_block = librosa.stft(y_block, center=False)
417
+
418
+ Or compute a mel spectrogram over a stream, using a shorter frame
419
+ and non-overlapping windows
420
+
421
+ >>> filename = librosa.ex('brahms')
422
+ >>> sr = librosa.get_samplerate(filename)
423
+ >>> stream = librosa.stream(filename,
424
+ ... block_length=256,
425
+ ... frame_length=2048,
426
+ ... hop_length=2048)
427
+ >>> for y_block in stream:
428
+ ... m_block = librosa.feature.melspectrogram(y=y_block, sr=sr,
429
+ ... n_fft=2048,
430
+ ... hop_length=2048,
431
+ ... center=False)
432
+
433
+ """
434
+
435
+ if not util.is_positive_int(block_length):
436
+ raise ParameterError(f"block_length={block_length} must be a positive integer")
437
+ if not util.is_positive_int(frame_length):
438
+ raise ParameterError(f"frame_length={frame_length} must be a positive integer")
439
+ if not util.is_positive_int(hop_length):
440
+ raise ParameterError(f"hop_length={hop_length} must be a positive integer")
441
+
442
+ if isinstance(path, sf.SoundFile):
443
+ sfo = path
444
+ else:
445
+ sfo = sf.SoundFile(path)
446
+
447
+ # Get the sample rate from the file info
448
+ sr = sfo.samplerate
449
+
450
+ # Construct the stream
451
+ if offset:
452
+ start = int(offset * sr)
453
+ else:
454
+ start = 0
455
+
456
+ if duration:
457
+ frames = int(duration * sr)
458
+ else:
459
+ frames = -1
460
+
461
+ # Seek the soundfile object to the starting frame
462
+ sfo.seek(start)
463
+
464
+ blocks = sfo.blocks(
465
+ blocksize=frame_length + (block_length - 1) * hop_length,
466
+ overlap=frame_length - hop_length,
467
+ frames=frames,
468
+ dtype=dtype,
469
+ always_2d=False,
470
+ fill_value=fill_value,
471
+ )
472
+
473
+ for block in blocks:
474
+ if mono:
475
+ yield to_mono(block.T)
476
+ else:
477
+ yield block.T
478
+
479
+
480
+ @cache(level=20)
481
+ def to_mono(y: np.ndarray) -> np.ndarray:
482
+ """Convert an audio signal to mono by averaging samples across channels.
483
+
484
+ Parameters
485
+ ----------
486
+ y : np.ndarray [shape=(..., n)]
487
+ audio time series. Multi-channel is supported.
488
+
489
+ Returns
490
+ -------
491
+ y_mono : np.ndarray [shape=(n,)]
492
+ ``y`` as a monophonic time-series
493
+
494
+ Notes
495
+ -----
496
+ This function caches at level 20.
497
+
498
+ Examples
499
+ --------
500
+ >>> y, sr = librosa.load(librosa.ex('trumpet', hq=True), mono=False)
501
+ >>> y.shape
502
+ (2, 117601)
503
+ >>> y_mono = librosa.to_mono(y)
504
+ >>> y_mono.shape
505
+ (117601,)
506
+
507
+ """
508
+
509
+ # Validate the buffer. Stereo is ok here.
510
+ util.valid_audio(y, mono=False)
511
+
512
+ if y.ndim > 1:
513
+ y = np.mean(y, axis=tuple(range(y.ndim - 1)))
514
+
515
+ return y
516
+
517
+
518
+ @cache(level=20)
519
+ def resample(
520
+ y: np.ndarray,
521
+ *,
522
+ orig_sr: float,
523
+ target_sr: float,
524
+ res_type: str = "soxr_hq",
525
+ fix: bool = True,
526
+ scale: bool = False,
527
+ axis: int = -1,
528
+ **kwargs: Any,
529
+ ) -> np.ndarray:
530
+ """Resample a time series from orig_sr to target_sr
531
+
532
+ By default, this uses a high-quality method (`soxr_hq`) for band-limited sinc
533
+ interpolation. The alternate ``res_type`` values listed below offer different
534
+ trade-offs of speed and quality.
535
+
536
+ Parameters
537
+ ----------
538
+ y : np.ndarray [shape=(..., n, ...)]
539
+ audio time series, with `n` samples along the specified axis.
540
+
541
+ orig_sr : number > 0 [scalar]
542
+ original sampling rate of ``y``
543
+
544
+ target_sr : number > 0 [scalar]
545
+ target sampling rate
546
+
547
+ res_type : str (default: `soxr_hq`)
548
+ resample type
549
+
550
+ 'soxr_vhq', 'soxr_hq', 'soxr_mq' or 'soxr_lq'
551
+ `soxr` Very high-, High-, Medium-, Low-quality FFT-based bandlimited interpolation.
552
+ ``'soxr_hq'`` is the default setting of `soxr`.
553
+ 'soxr_qq'
554
+ `soxr` Quick cubic interpolation (very fast, but not bandlimited)
555
+ 'kaiser_best'
556
+ `resampy` high-quality mode
557
+ 'kaiser_fast'
558
+ `resampy` faster method
559
+ 'fft' or 'scipy'
560
+ `scipy.signal.resample` Fourier method.
561
+ 'polyphase'
562
+ `scipy.signal.resample_poly` polyphase filtering. (fast)
563
+ 'linear'
564
+ `samplerate` linear interpolation. (very fast, but not bandlimited)
565
+ 'zero_order_hold'
566
+ `samplerate` repeat the last value between samples. (very fast, but not bandlimited)
567
+ 'sinc_best', 'sinc_medium' or 'sinc_fastest'
568
+ `samplerate` high-, medium-, and low-quality bandlimited sinc interpolation.
569
+
570
+ .. note::
571
+ Not all options yield a bandlimited interpolator. If you use `soxr_qq`, `polyphase`,
572
+ `linear`, or `zero_order_hold`, you need to be aware of possible aliasing effects.
573
+
574
+ .. note::
575
+ `samplerate` and `resampy` are not installed with `librosa`.
576
+ To use `samplerate` or `resampy`, they should be installed manually::
577
+
578
+ $ pip install samplerate
579
+ $ pip install resampy
580
+
581
+ .. note::
582
+ When using ``res_type='polyphase'``, only integer sampling rates are
583
+ supported.
584
+
585
+ fix : bool
586
+ adjust the length of the resampled signal to be of size exactly
587
+ ``ceil(target_sr * len(y) / orig_sr)``
588
+
589
+ scale : bool
590
+ Scale the resampled signal so that ``y`` and ``y_hat`` have approximately
591
+ equal total energy.
592
+
593
+ axis : int
594
+ The target axis along which to resample. Defaults to the trailing axis.
595
+
596
+ **kwargs : additional keyword arguments
597
+ If ``fix==True``, additional keyword arguments to pass to
598
+ `librosa.util.fix_length`.
599
+
600
+ Returns
601
+ -------
602
+ y_hat : np.ndarray [shape=(..., n * target_sr / orig_sr, ...)]
603
+ ``y`` resampled from ``orig_sr`` to ``target_sr`` along the target axis
604
+
605
+ Raises
606
+ ------
607
+ ParameterError
608
+ If ``res_type='polyphase'`` and ``orig_sr`` or ``target_sr`` are not both
609
+ integer-valued.
610
+
611
+ See Also
612
+ --------
613
+ librosa.util.fix_length
614
+ scipy.signal.resample
615
+ resampy
616
+ samplerate.converters.resample
617
+ soxr.resample
618
+
619
+ Notes
620
+ -----
621
+ This function caches at level 20.
622
+
623
+ Examples
624
+ --------
625
+ Downsample from 22 KHz to 8 KHz
626
+
627
+ >>> y, sr = librosa.load(librosa.ex('trumpet'), sr=22050)
628
+ >>> y_8k = librosa.resample(y, orig_sr=sr, target_sr=8000)
629
+ >>> y.shape, y_8k.shape
630
+ ((117601,), (42668,))
631
+ """
632
+
633
+ # First, validate the audio buffer
634
+ util.valid_audio(y, mono=False)
635
+
636
+ if orig_sr == target_sr:
637
+ return y
638
+
639
+ ratio = float(target_sr) / orig_sr
640
+
641
+ n_samples = int(np.ceil(y.shape[axis] * ratio))
642
+
643
+ if res_type in ("scipy", "fft"):
644
+ y_hat = scipy.signal.resample(y, n_samples, axis=axis)
645
+ elif res_type == "polyphase":
646
+ if int(orig_sr) != orig_sr or int(target_sr) != target_sr:
647
+ raise ParameterError(
648
+ "polyphase resampling is only supported for integer-valued sampling rates."
649
+ )
650
+
651
+ # For polyphase resampling, we need up- and down-sampling ratios
652
+ # We can get those from the greatest common divisor of the rates
653
+ # as long as the rates are integrable
654
+ orig_sr = int(orig_sr)
655
+ target_sr = int(target_sr)
656
+ gcd = np.gcd(orig_sr, target_sr)
657
+ y_hat = scipy.signal.resample_poly(
658
+ y, target_sr // gcd, orig_sr // gcd, axis=axis
659
+ )
660
+ elif res_type in (
661
+ "linear",
662
+ "zero_order_hold",
663
+ "sinc_best",
664
+ "sinc_fastest",
665
+ "sinc_medium",
666
+ ):
667
+ # Use numpy to vectorize the resampler along the target axis
668
+ # This is because samplerate does not support ndim>2 generally.
669
+ y_hat = np.apply_along_axis(
670
+ samplerate.resample, axis=axis, arr=y, ratio=ratio, converter_type=res_type
671
+ )
672
+ elif res_type.startswith("soxr"):
673
+ # Use numpy to vectorize the resampler along the target axis
674
+ # This is because soxr does not support ndim>2 generally.
675
+ y_hat = np.apply_along_axis(
676
+ soxr.resample,
677
+ axis=axis,
678
+ arr=y,
679
+ in_rate=orig_sr,
680
+ out_rate=target_sr,
681
+ quality=res_type,
682
+ )
683
+ else:
684
+ y_hat = resampy.resample(y, orig_sr, target_sr, filter=res_type, axis=axis)
685
+
686
+ if fix:
687
+ y_hat = util.fix_length(y_hat, size=n_samples, axis=axis, **kwargs)
688
+
689
+ if scale:
690
+ y_hat /= np.sqrt(ratio)
691
+
692
+ # Match dtypes
693
+ return np.asarray(y_hat, dtype=y.dtype)
694
+
695
+
696
+ def get_duration(
697
+ *,
698
+ y: Optional[np.ndarray] = None,
699
+ sr: float = 22050,
700
+ S: Optional[np.ndarray] = None,
701
+ n_fft: int = 2048,
702
+ hop_length: int = 512,
703
+ center: bool = True,
704
+ path: Optional[Union[str, os.PathLike[Any]]] = None,
705
+ filename: Optional[Union[str, os.PathLike[Any], Deprecated]] = Deprecated(),
706
+ ) -> float:
707
+ """Compute the duration (in seconds) of an audio time series,
708
+ feature matrix, or filename.
709
+
710
+ Examples
711
+ --------
712
+ >>> # Load an example audio file
713
+ >>> y, sr = librosa.load(librosa.ex('trumpet'))
714
+ >>> librosa.get_duration(y=y, sr=sr)
715
+ 5.333378684807256
716
+
717
+ >>> # Or directly from an audio file
718
+ >>> librosa.get_duration(filename=librosa.ex('trumpet'))
719
+ 5.333378684807256
720
+
721
+ >>> # Or compute duration from an STFT matrix
722
+ >>> y, sr = librosa.load(librosa.ex('trumpet'))
723
+ >>> S = librosa.stft(y)
724
+ >>> librosa.get_duration(S=S, sr=sr)
725
+ 5.317369614512471
726
+
727
+ >>> # Or a non-centered STFT matrix
728
+ >>> S_left = librosa.stft(y, center=False)
729
+ >>> librosa.get_duration(S=S_left, sr=sr)
730
+ 5.224489795918367
731
+
732
+ Parameters
733
+ ----------
734
+ y : np.ndarray [shape=(..., n)] or None
735
+ audio time series. Multi-channel is supported.
736
+
737
+ sr : number > 0 [scalar]
738
+ audio sampling rate of ``y``
739
+
740
+ S : np.ndarray [shape=(..., d, t)] or None
741
+ STFT matrix, or any STFT-derived matrix (e.g., chromagram
742
+ or mel spectrogram).
743
+ Durations calculated from spectrogram inputs are only accurate
744
+ up to the frame resolution. If high precision is required,
745
+ it is better to use the audio time series directly.
746
+
747
+ n_fft : int > 0 [scalar]
748
+ FFT window size for ``S``
749
+
750
+ hop_length : int > 0 [ scalar]
751
+ number of audio samples between columns of ``S``
752
+
753
+ center : boolean
754
+ - If ``True``, ``S[:, t]`` is centered at ``y[t * hop_length]``
755
+ - If ``False``, then ``S[:, t]`` begins at ``y[t * hop_length]``
756
+
757
+ path : str, path, or file-like
758
+ If provided, all other parameters are ignored, and the
759
+ duration is calculated directly from the audio file.
760
+ Note that this avoids loading the contents into memory,
761
+ and is therefore useful for querying the duration of
762
+ long files.
763
+
764
+ As in ``load``, this can also be an integer or open file-handle
765
+ that can be processed by ``soundfile``.
766
+
767
+ filename : Deprecated
768
+ Equivalent to ``path``
769
+
770
+ .. warning:: This parameter has been renamed to ``path`` in 0.10.
771
+ Support for ``filename=`` will be removed in 1.0.
772
+
773
+ Returns
774
+ -------
775
+ d : float >= 0
776
+ Duration (in seconds) of the input time series or spectrogram.
777
+
778
+ Raises
779
+ ------
780
+ ParameterError
781
+ if none of ``y``, ``S``, or ``path`` are provided.
782
+
783
+ Notes
784
+ -----
785
+ `get_duration` can be applied to a file (``path``), a spectrogram (``S``),
786
+ or audio buffer (``y, sr``). Only one of these three options should be
787
+ provided. If you do provide multiple options (e.g., ``path`` and ``S``),
788
+ then ``path`` takes precedence over ``S``, and ``S`` takes precedence over
789
+ ``(y, sr)``.
790
+ """
791
+
792
+ path = rename_kw(
793
+ old_name="filename",
794
+ old_value=filename,
795
+ new_name="path",
796
+ new_value=path,
797
+ version_deprecated="0.10.0",
798
+ version_removed="1.0",
799
+ )
800
+
801
+ if path is not None:
802
+ try:
803
+ return sf.info(path).duration # type: ignore
804
+ except sf.SoundFileRuntimeError:
805
+ warnings.warn(
806
+ "PySoundFile failed. Trying audioread instead."
807
+ "\n\tAudioread support is deprecated in librosa 0.10.0"
808
+ " and will be removed in version 1.0.",
809
+ stacklevel=2,
810
+ category=FutureWarning,
811
+ )
812
+ with audioread.audio_open(path) as fdesc:
813
+ return fdesc.duration # type: ignore
814
+
815
+ if y is None:
816
+ if S is None:
817
+ raise ParameterError("At least one of (y, sr), S, or path must be provided")
818
+
819
+ n_frames = S.shape[-1]
820
+ n_samples = n_fft + hop_length * (n_frames - 1)
821
+
822
+ # If centered, we lose half a window from each end of S
823
+ if center:
824
+ n_samples = n_samples - 2 * int(n_fft // 2)
825
+
826
+ else:
827
+ n_samples = y.shape[-1]
828
+
829
+ return float(n_samples) / sr
830
+
831
+
832
+ def get_samplerate(path: Union[str, int, sf.SoundFile, BinaryIO]) -> float:
833
+ """Get the sampling rate for a given file.
834
+
835
+ Parameters
836
+ ----------
837
+ path : string, int, soundfile.SoundFile, or file-like
838
+ The path to the file to be loaded
839
+ As in ``load``, this can also be an integer or open file-handle
840
+ that can be processed by `soundfile`.
841
+ An existing `soundfile.SoundFile` object can also be supplied.
842
+
843
+ Returns
844
+ -------
845
+ sr : number > 0
846
+ The sampling rate of the given audio file
847
+
848
+ Examples
849
+ --------
850
+ Get the sampling rate for the included audio file
851
+
852
+ >>> path = librosa.ex('trumpet')
853
+ >>> librosa.get_samplerate(path)
854
+ 22050
855
+ """
856
+ try:
857
+ if isinstance(path, sf.SoundFile):
858
+ return path.samplerate # type: ignore
859
+
860
+ return sf.info(path).samplerate # type: ignore
861
+ except sf.SoundFileRuntimeError:
862
+ warnings.warn(
863
+ "PySoundFile failed. Trying audioread instead."
864
+ "\n\tAudioread support is deprecated in librosa 0.10.0"
865
+ " and will be removed in version 1.0.",
866
+ stacklevel=2,
867
+ category=FutureWarning,
868
+ )
869
+ with audioread.audio_open(path) as fdesc:
870
+ return fdesc.samplerate # type: ignore
871
+
872
+
873
+ @cache(level=20)
874
+ def autocorrelate(
875
+ y: np.ndarray, *, max_size: Optional[int] = None, axis: int = -1
876
+ ) -> np.ndarray:
877
+ """Bounded-lag auto-correlation
878
+
879
+ Parameters
880
+ ----------
881
+ y : np.ndarray
882
+ array to autocorrelate
883
+ max_size : int > 0 or None
884
+ maximum correlation lag.
885
+ If unspecified, defaults to ``y.shape[axis]`` (unbounded)
886
+ axis : int
887
+ The axis along which to autocorrelate.
888
+ By default, the last axis (-1) is taken.
889
+
890
+ Returns
891
+ -------
892
+ z : np.ndarray
893
+ truncated autocorrelation ``y*y`` along the specified axis.
894
+ If ``max_size`` is specified, then ``z.shape[axis]`` is bounded
895
+ to ``max_size``.
896
+
897
+ Notes
898
+ -----
899
+ This function caches at level 20.
900
+
901
+ Examples
902
+ --------
903
+ Compute full autocorrelation of ``y``
904
+
905
+ >>> y, sr = librosa.load(librosa.ex('trumpet'))
906
+ >>> librosa.autocorrelate(y)
907
+ array([ 6.899e+02, 6.236e+02, ..., 3.710e-08, -1.796e-08])
908
+
909
+ Compute onset strength auto-correlation up to 4 seconds
910
+
911
+ >>> import matplotlib.pyplot as plt
912
+ >>> odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
913
+ >>> ac = librosa.autocorrelate(odf, max_size=4 * sr // 512)
914
+ >>> fig, ax = plt.subplots()
915
+ >>> ax.plot(ac)
916
+ >>> ax.set(title='Auto-correlation', xlabel='Lag (frames)')
917
+ """
918
+
919
+ if max_size is None:
920
+ max_size = y.shape[axis]
921
+
922
+ max_size = int(min(max_size, y.shape[axis]))
923
+
924
+ fft = get_fftlib()
925
+
926
+ # Pad out the signal to support full-length auto-correlation.
927
+ n_pad = 2 * y.shape[axis] - 1
928
+
929
+ if np.iscomplexobj(y):
930
+ # Compute the power spectrum along the chosen axis
931
+ powspec = util.abs2(fft.fft(y, n=n_pad, axis=axis))
932
+
933
+ # Convert back to time domain
934
+ autocorr = fft.ifft(powspec, n=n_pad, axis=axis)
935
+ else:
936
+ # Compute the power spectrum along the chosen axis
937
+ # Pad out the signal to support full-length auto-correlation.
938
+ powspec = util.abs2(fft.rfft(y, n=n_pad, axis=axis))
939
+
940
+ # Convert back to time domain
941
+ autocorr = fft.irfft(powspec, n=n_pad, axis=axis)
942
+
943
+ # Slice down to max_size
944
+ subslice = [slice(None)] * autocorr.ndim
945
+ subslice[axis] = slice(max_size)
946
+
947
+ autocorr_slice: np.ndarray = autocorr[tuple(subslice)]
948
+
949
+ return autocorr_slice
950
+
951
+
952
+ def lpc(y: np.ndarray, *, order: int, axis: int = -1) -> np.ndarray:
953
+ """Linear Prediction Coefficients via Burg's method
954
+
955
+ This function applies Burg's method to estimate coefficients of a linear
956
+ filter on ``y`` of order ``order``. Burg's method is an extension to the
957
+ Yule-Walker approach, which are both sometimes referred to as LPC parameter
958
+ estimation by autocorrelation.
959
+
960
+ It follows the description and implementation approach described in the
961
+ introduction by Marple. [#]_ N.B. This paper describes a different method, which
962
+ is not implemented here, but has been chosen for its clear explanation of
963
+ Burg's technique in its introduction.
964
+
965
+ .. [#] Larry Marple.
966
+ A New Autoregressive Spectrum Analysis Algorithm.
967
+ IEEE Transactions on Acoustics, Speech, and Signal Processing
968
+ vol 28, no. 4, 1980.
969
+
970
+ Parameters
971
+ ----------
972
+ y : np.ndarray [shape=(..., n)]
973
+ Time series to fit. Multi-channel is supported..
974
+ order : int > 0
975
+ Order of the linear filter
976
+ axis : int
977
+ Axis along which to compute the coefficients
978
+
979
+ Returns
980
+ -------
981
+ a : np.ndarray [shape=(..., order + 1)]
982
+ LP prediction error coefficients, i.e. filter denominator polynomial.
983
+ Note that the length along the specified ``axis`` will be ``order+1``.
984
+
985
+ Raises
986
+ ------
987
+ ParameterError
988
+ - If ``y`` is not valid audio as per `librosa.util.valid_audio`
989
+ - If ``order < 1`` or not integer
990
+ FloatingPointError
991
+ - If ``y`` is ill-conditioned
992
+
993
+ See Also
994
+ --------
995
+ scipy.signal.lfilter
996
+
997
+ Examples
998
+ --------
999
+ Compute LP coefficients of y at order 16 on entire series
1000
+
1001
+ >>> y, sr = librosa.load(librosa.ex('libri1'))
1002
+ >>> librosa.lpc(y, order=16)
1003
+
1004
+ Compute LP coefficients, and plot LP estimate of original series
1005
+
1006
+ >>> import matplotlib.pyplot as plt
1007
+ >>> import scipy
1008
+ >>> y, sr = librosa.load(librosa.ex('libri1'), duration=0.020)
1009
+ >>> a = librosa.lpc(y, order=2)
1010
+ >>> b = np.hstack([[0], -1 * a[1:]])
1011
+ >>> y_hat = scipy.signal.lfilter(b, [1], y)
1012
+ >>> fig, ax = plt.subplots()
1013
+ >>> ax.plot(y)
1014
+ >>> ax.plot(y_hat, linestyle='--')
1015
+ >>> ax.legend(['y', 'y_hat'])
1016
+ >>> ax.set_title('LP Model Forward Prediction')
1017
+
1018
+ """
1019
+ if not util.is_positive_int(order):
1020
+ raise ParameterError(f"order={order} must be an integer > 0")
1021
+
1022
+ util.valid_audio(y, mono=False)
1023
+
1024
+ # Move the lpc axis around front, because numba is silly
1025
+ y = y.swapaxes(axis, 0)
1026
+
1027
+ dtype = y.dtype
1028
+
1029
+ shape = list(y.shape)
1030
+ shape[0] = order + 1
1031
+
1032
+ ar_coeffs = np.zeros(tuple(shape), dtype=dtype)
1033
+ ar_coeffs[0] = 1
1034
+
1035
+ ar_coeffs_prev = ar_coeffs.copy()
1036
+
1037
+ shape[0] = 1
1038
+ reflect_coeff = np.zeros(shape, dtype=dtype)
1039
+ den = reflect_coeff.copy()
1040
+
1041
+ epsilon = util.tiny(den)
1042
+
1043
+ # Call the helper, and swap the results back to the target axis position
1044
+ return np.swapaxes(
1045
+ __lpc(y, order, ar_coeffs, ar_coeffs_prev, reflect_coeff, den, epsilon), 0, axis
1046
+ )
1047
+
1048
+
1049
+ @jit(nopython=True, cache=False) # type: ignore
1050
+ def __lpc(
1051
+ y: np.ndarray,
1052
+ order: int,
1053
+ ar_coeffs: np.ndarray,
1054
+ ar_coeffs_prev: np.ndarray,
1055
+ reflect_coeff: np.ndarray,
1056
+ den: np.ndarray,
1057
+ epsilon: float,
1058
+ ) -> np.ndarray:
1059
+ # This implementation follows the description of Burg's algorithm given in
1060
+ # section III of Marple's paper referenced in the docstring.
1061
+ #
1062
+ # We use the Levinson-Durbin recursion to compute AR coefficients for each
1063
+ # increasing model order by using those from the last. We maintain two
1064
+ # arrays and then flip them each time we increase the model order so that
1065
+ # we may use all the coefficients from the previous order while we compute
1066
+ # those for the new one. These two arrays hold ar_coeffs for order M and
1067
+ # order M-1. (Corresponding to a_{M,k} and a_{M-1,k} in eqn 5)
1068
+
1069
+ # These two arrays hold the forward and backward prediction error. They
1070
+ # correspond to f_{M-1,k} and b_{M-1,k} in eqns 10, 11, 13 and 14 of
1071
+ # Marple. First they are used to compute the reflection coefficient at
1072
+ # order M from M-1 then are re-used as f_{M,k} and b_{M,k} for each
1073
+ # iteration of the below loop
1074
+ fwd_pred_error = y[1:]
1075
+ bwd_pred_error = y[:-1]
1076
+
1077
+ # DEN_{M} from eqn 16 of Marple.
1078
+ den[0] = np.sum(fwd_pred_error**2 + bwd_pred_error**2, axis=0)
1079
+
1080
+ for i in range(order):
1081
+ # can be removed if we keep the epsilon bias
1082
+ # if np.any(den <= 0):
1083
+ # raise FloatingPointError("numerical error, input ill-conditioned?")
1084
+
1085
+ # Eqn 15 of Marple, with fwd_pred_error and bwd_pred_error
1086
+ # corresponding to f_{M-1,k+1} and b{M-1,k} and the result as a_{M,M}
1087
+
1088
+ reflect_coeff[0] = np.sum(bwd_pred_error * fwd_pred_error, axis=0)
1089
+ reflect_coeff[0] *= -2
1090
+ reflect_coeff[0] /= den[0] + epsilon
1091
+
1092
+ # Now we use the reflection coefficient and the AR coefficients from
1093
+ # the last model order to compute all of the AR coefficients for the
1094
+ # current one. This is the Levinson-Durbin recursion described in
1095
+ # eqn 5.
1096
+ # Note 1: We don't have to care about complex conjugates as our signals
1097
+ # are all real-valued
1098
+ # Note 2: j counts 1..order+1, i-j+1 counts order..0
1099
+ # Note 3: The first element of ar_coeffs* is always 1, which copies in
1100
+ # the reflection coefficient at the end of the new AR coefficient array
1101
+ # after the preceding coefficients
1102
+
1103
+ ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
1104
+ for j in range(1, i + 2):
1105
+ # reflection multiply should be broadcast
1106
+ ar_coeffs[j] = (
1107
+ ar_coeffs_prev[j] + reflect_coeff[0] * ar_coeffs_prev[i - j + 1]
1108
+ )
1109
+
1110
+ # Update the forward and backward prediction errors corresponding to
1111
+ # eqns 13 and 14. We start with f_{M-1,k+1} and b_{M-1,k} and use them
1112
+ # to compute f_{M,k} and b_{M,k}
1113
+ fwd_pred_error_tmp = fwd_pred_error
1114
+ fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
1115
+ bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
1116
+
1117
+ # SNIP - we are now done with order M and advance. M-1 <- M
1118
+
1119
+ # Compute DEN_{M} using the recursion from eqn 17.
1120
+ #
1121
+ # reflect_coeff = a_{M-1,M-1} (we have advanced M)
1122
+ # den = DEN_{M-1} (rhs)
1123
+ # bwd_pred_error = b_{M-1,N-M+1} (we have advanced M)
1124
+ # fwd_pred_error = f_{M-1,k} (we have advanced M)
1125
+ # den <- DEN_{M} (lhs)
1126
+ #
1127
+
1128
+ q = 1.0 - reflect_coeff[0] ** 2
1129
+ den[0] = q * den[0] - bwd_pred_error[-1] ** 2 - fwd_pred_error[0] ** 2
1130
+
1131
+ # Shift up forward error.
1132
+ #
1133
+ # fwd_pred_error <- f_{M-1,k+1}
1134
+ # bwd_pred_error <- b_{M-1,k}
1135
+ #
1136
+ # N.B. We do this after computing the denominator using eqn 17 but
1137
+ # before using it in the numerator in eqn 15.
1138
+ fwd_pred_error = fwd_pred_error[1:]
1139
+ bwd_pred_error = bwd_pred_error[:-1]
1140
+
1141
+ return ar_coeffs
1142
+
1143
+
1144
+ @stencil # type: ignore
1145
+ def _zc_stencil(x: np.ndarray, threshold: float, zero_pos: bool) -> np.ndarray:
1146
+ """Stencil to compute zero crossings"""
1147
+ x0 = x[0]
1148
+ if -threshold <= x0 <= threshold:
1149
+ x0 = 0
1150
+
1151
+ x1 = x[-1]
1152
+ if -threshold <= x1 <= threshold:
1153
+ x1 = 0
1154
+
1155
+ if zero_pos:
1156
+ return np.signbit(x0) != np.signbit(x1) # type: ignore
1157
+ else:
1158
+ return np.sign(x0) != np.sign(x1) # type: ignore
1159
+
1160
+
1161
+ @guvectorize(
1162
+ [
1163
+ "void(float32[:], float32, bool_, bool_[:])",
1164
+ "void(float64[:], float64, bool_, bool_[:])",
1165
+ ],
1166
+ "(n),(),()->(n)",
1167
+ cache=False,
1168
+ nopython=True,
1169
+ ) # type: ignore
1170
+ def _zc_wrapper(
1171
+ x: np.ndarray,
1172
+ threshold: float,
1173
+ zero_pos: bool,
1174
+ y: np.ndarray,
1175
+ ) -> None: # pragma: no cover
1176
+ """Vectorized wrapper for zero crossing stencil"""
1177
+ y[:] = _zc_stencil(x, threshold, zero_pos)
1178
+
1179
+
1180
+ @cache(level=20)
1181
+ def zero_crossings(
1182
+ y: np.ndarray,
1183
+ *,
1184
+ threshold: float = 1e-10,
1185
+ ref_magnitude: Optional[Union[float, Callable]] = None,
1186
+ pad: bool = True,
1187
+ zero_pos: bool = True,
1188
+ axis: int = -1,
1189
+ ) -> np.ndarray:
1190
+ """Find the zero-crossings of a signal ``y``: indices ``i`` such that
1191
+ ``sign(y[i]) != sign(y[j])``.
1192
+
1193
+ If ``y`` is multi-dimensional, then zero-crossings are computed along
1194
+ the specified ``axis``.
1195
+
1196
+ Parameters
1197
+ ----------
1198
+ y : np.ndarray
1199
+ The input array
1200
+
1201
+ threshold : float >= 0
1202
+ If non-zero, values where ``-threshold <= y <= threshold`` are
1203
+ clipped to 0.
1204
+
1205
+ ref_magnitude : float > 0 or callable
1206
+ If numeric, the threshold is scaled relative to ``ref_magnitude``.
1207
+
1208
+ If callable, the threshold is scaled relative to
1209
+ ``ref_magnitude(np.abs(y))``.
1210
+
1211
+ pad : boolean
1212
+ If ``True``, then ``y[0]`` is considered a valid zero-crossing.
1213
+
1214
+ zero_pos : boolean
1215
+ If ``True`` then the value 0 is interpreted as having positive sign.
1216
+
1217
+ If ``False``, then 0, -1, and +1 all have distinct signs.
1218
+
1219
+ axis : int
1220
+ Axis along which to compute zero-crossings.
1221
+
1222
+ Returns
1223
+ -------
1224
+ zero_crossings : np.ndarray [shape=y.shape, dtype=boolean]
1225
+ Indicator array of zero-crossings in ``y`` along the selected axis.
1226
+
1227
+ Notes
1228
+ -----
1229
+ This function caches at level 20.
1230
+
1231
+ Examples
1232
+ --------
1233
+ >>> # Generate a time-series
1234
+ >>> y = np.sin(np.linspace(0, 4 * 2 * np.pi, 20))
1235
+ >>> y
1236
+ array([ 0.000e+00, 9.694e-01, 4.759e-01, -7.357e-01,
1237
+ -8.372e-01, 3.247e-01, 9.966e-01, 1.646e-01,
1238
+ -9.158e-01, -6.142e-01, 6.142e-01, 9.158e-01,
1239
+ -1.646e-01, -9.966e-01, -3.247e-01, 8.372e-01,
1240
+ 7.357e-01, -4.759e-01, -9.694e-01, -9.797e-16])
1241
+ >>> # Compute zero-crossings
1242
+ >>> z = librosa.zero_crossings(y)
1243
+ >>> z
1244
+ array([ True, False, False, True, False, True, False, False,
1245
+ True, False, True, False, True, False, False, True,
1246
+ False, True, False, True], dtype=bool)
1247
+
1248
+ >>> # Stack y against the zero-crossing indicator
1249
+ >>> librosa.util.stack([y, z], axis=-1)
1250
+ array([[ 0.000e+00, 1.000e+00],
1251
+ [ 9.694e-01, 0.000e+00],
1252
+ [ 4.759e-01, 0.000e+00],
1253
+ [ -7.357e-01, 1.000e+00],
1254
+ [ -8.372e-01, 0.000e+00],
1255
+ [ 3.247e-01, 1.000e+00],
1256
+ [ 9.966e-01, 0.000e+00],
1257
+ [ 1.646e-01, 0.000e+00],
1258
+ [ -9.158e-01, 1.000e+00],
1259
+ [ -6.142e-01, 0.000e+00],
1260
+ [ 6.142e-01, 1.000e+00],
1261
+ [ 9.158e-01, 0.000e+00],
1262
+ [ -1.646e-01, 1.000e+00],
1263
+ [ -9.966e-01, 0.000e+00],
1264
+ [ -3.247e-01, 0.000e+00],
1265
+ [ 8.372e-01, 1.000e+00],
1266
+ [ 7.357e-01, 0.000e+00],
1267
+ [ -4.759e-01, 1.000e+00],
1268
+ [ -9.694e-01, 0.000e+00],
1269
+ [ -9.797e-16, 1.000e+00]])
1270
+
1271
+ >>> # Find the indices of zero-crossings
1272
+ >>> np.nonzero(z)
1273
+ (array([ 0, 3, 5, 8, 10, 12, 15, 17, 19]),)
1274
+ """
1275
+
1276
+ if callable(ref_magnitude):
1277
+ threshold = threshold * ref_magnitude(np.abs(y))
1278
+
1279
+ elif ref_magnitude is not None:
1280
+ threshold = threshold * ref_magnitude
1281
+
1282
+ yi = y.swapaxes(-1, axis)
1283
+ z = np.empty_like(y, dtype=bool)
1284
+ zi = z.swapaxes(-1, axis)
1285
+
1286
+ _zc_wrapper(yi, threshold, zero_pos, zi)
1287
+
1288
+ zi[..., 0] = pad
1289
+
1290
+ return z
1291
+
1292
+
1293
+ def clicks(
1294
+ *,
1295
+ times: Optional[_SequenceLike[_FloatLike_co]] = None,
1296
+ frames: Optional[_SequenceLike[_IntLike_co]] = None,
1297
+ sr: float = 22050,
1298
+ hop_length: int = 512,
1299
+ click_freq: float = 1000.0,
1300
+ click_duration: float = 0.1,
1301
+ click: Optional[np.ndarray] = None,
1302
+ length: Optional[int] = None,
1303
+ ) -> np.ndarray:
1304
+ """Construct a "click track".
1305
+
1306
+ This returns a signal with the signal ``click`` sound placed at
1307
+ each specified time.
1308
+
1309
+ Parameters
1310
+ ----------
1311
+ times : np.ndarray or None
1312
+ times to place clicks, in seconds
1313
+ frames : np.ndarray or None
1314
+ frame indices to place clicks
1315
+ sr : number > 0
1316
+ desired sampling rate of the output signal
1317
+ hop_length : int > 0
1318
+ if positions are specified by ``frames``, the number of samples between frames.
1319
+ click_freq : float > 0
1320
+ frequency (in Hz) of the default click signal. Default is 1KHz.
1321
+ click_duration : float > 0
1322
+ duration (in seconds) of the default click signal. Default is 100ms.
1323
+ click : np.ndarray or None
1324
+ (optional) click signal sample to use instead of the default click.
1325
+ Multi-channel is supported.
1326
+ length : int > 0
1327
+ desired number of samples in the output signal
1328
+
1329
+ Returns
1330
+ -------
1331
+ click_signal : np.ndarray
1332
+ Synthesized click signal.
1333
+ This will be monophonic by default, or match the number of channels to a provided ``click`` signal.
1334
+
1335
+ Raises
1336
+ ------
1337
+ ParameterError
1338
+ - If neither ``times`` nor ``frames`` are provided.
1339
+ - If any of ``click_freq``, ``click_duration``, or ``length`` are out of range.
1340
+
1341
+ Examples
1342
+ --------
1343
+ >>> # Sonify detected beat events
1344
+ >>> y, sr = librosa.load(librosa.ex('choice'), duration=10)
1345
+ >>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
1346
+ >>> y_beats = librosa.clicks(frames=beats, sr=sr)
1347
+
1348
+ >>> # Or generate a signal of the same length as y
1349
+ >>> y_beats = librosa.clicks(frames=beats, sr=sr, length=len(y))
1350
+
1351
+ >>> # Or use timing instead of frame indices
1352
+ >>> times = librosa.frames_to_time(beats, sr=sr)
1353
+ >>> y_beat_times = librosa.clicks(times=times, sr=sr)
1354
+
1355
+ >>> # Or with a click frequency of 880Hz and a 500ms sample
1356
+ >>> y_beat_times880 = librosa.clicks(times=times, sr=sr,
1357
+ ... click_freq=880, click_duration=0.5)
1358
+
1359
+ Display click waveform next to the spectrogram
1360
+
1361
+ >>> import matplotlib.pyplot as plt
1362
+ >>> fig, ax = plt.subplots(nrows=2, sharex=True)
1363
+ >>> S = librosa.feature.melspectrogram(y=y, sr=sr)
1364
+ >>> librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
1365
+ ... x_axis='time', y_axis='mel', ax=ax[0])
1366
+ >>> librosa.display.waveshow(y_beat_times, sr=sr, label='Beat clicks',
1367
+ ... ax=ax[1])
1368
+ >>> ax[1].legend()
1369
+ >>> ax[0].label_outer()
1370
+ >>> ax[0].set_title(None)
1371
+ """
1372
+
1373
+ # Compute sample positions from time or frames
1374
+ positions: np.ndarray
1375
+ if times is None:
1376
+ if frames is None:
1377
+ raise ParameterError('either "times" or "frames" must be provided')
1378
+
1379
+ positions = frames_to_samples(frames, hop_length=hop_length)
1380
+ else:
1381
+ # Convert times to positions
1382
+ positions = time_to_samples(times, sr=sr)
1383
+
1384
+ if click is not None:
1385
+ # Check that we have a well-formed audio buffer
1386
+ util.valid_audio(click, mono=False)
1387
+
1388
+ else:
1389
+ # Create default click signal
1390
+ if click_duration <= 0:
1391
+ raise ParameterError("click_duration must be strictly positive")
1392
+
1393
+ if click_freq <= 0:
1394
+ raise ParameterError("click_freq must be strictly positive")
1395
+
1396
+ angular_freq = 2 * np.pi * click_freq / float(sr)
1397
+
1398
+ click = np.logspace(0, -10, num=int(np.round(sr * click_duration)), base=2.0)
1399
+
1400
+ click *= np.sin(angular_freq * np.arange(len(click)))
1401
+
1402
+ # Set default length
1403
+ if length is None:
1404
+ length = positions.max() + click.shape[-1]
1405
+ else:
1406
+ if length < 1:
1407
+ raise ParameterError("length must be a positive integer")
1408
+
1409
+ # Filter out any positions past the length boundary
1410
+ positions = positions[positions < length]
1411
+
1412
+ # Pre-allocate click signal
1413
+ shape = list(click.shape)
1414
+ shape[-1] = length
1415
+ click_signal = np.zeros(shape, dtype=np.float32)
1416
+
1417
+ # Place clicks
1418
+ for start in positions:
1419
+ # Compute the end-point of this click
1420
+ end = start + click.shape[-1]
1421
+
1422
+ if end >= length:
1423
+ click_signal[..., start:] += click[..., : length - start]
1424
+ else:
1425
+ # Normally, just add a click here
1426
+ click_signal[..., start:end] += click
1427
+
1428
+ return click_signal
1429
+
1430
+
1431
+ def tone(
1432
+ frequency: _FloatLike_co,
1433
+ *,
1434
+ sr: float = 22050,
1435
+ length: Optional[int] = None,
1436
+ duration: Optional[float] = None,
1437
+ phi: Optional[float] = None,
1438
+ ) -> np.ndarray:
1439
+ """Construct a pure tone (cosine) signal at a given frequency.
1440
+
1441
+ Parameters
1442
+ ----------
1443
+ frequency : float > 0
1444
+ frequency
1445
+ sr : number > 0
1446
+ desired sampling rate of the output signal
1447
+ length : int > 0
1448
+ desired number of samples in the output signal.
1449
+ When both ``duration`` and ``length`` are defined,
1450
+ ``length`` takes priority.
1451
+ duration : float > 0
1452
+ desired duration in seconds.
1453
+ When both ``duration`` and ``length`` are defined,
1454
+ ``length`` takes priority.
1455
+ phi : float or None
1456
+ phase offset, in radians. If unspecified, defaults to ``-np.pi * 0.5``.
1457
+
1458
+ Returns
1459
+ -------
1460
+ tone_signal : np.ndarray [shape=(length,), dtype=float64]
1461
+ Synthesized pure sine tone signal
1462
+
1463
+ Raises
1464
+ ------
1465
+ ParameterError
1466
+ - If ``frequency`` is not provided.
1467
+ - If neither ``length`` nor ``duration`` are provided.
1468
+
1469
+ Examples
1470
+ --------
1471
+ Generate a pure sine tone A4
1472
+
1473
+ >>> tone = librosa.tone(440, duration=1)
1474
+
1475
+ Or generate the same signal using `length`
1476
+
1477
+ >>> tone = librosa.tone(440, sr=22050, length=22050)
1478
+
1479
+ Display spectrogram
1480
+
1481
+ >>> import matplotlib.pyplot as plt
1482
+ >>> fig, ax = plt.subplots()
1483
+ >>> S = librosa.feature.melspectrogram(y=tone)
1484
+ >>> librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
1485
+ ... x_axis='time', y_axis='mel', ax=ax)
1486
+ """
1487
+
1488
+ if frequency is None:
1489
+ raise ParameterError('"frequency" must be provided')
1490
+
1491
+ # Compute signal length
1492
+ if length is None:
1493
+ if duration is None:
1494
+ raise ParameterError('either "length" or "duration" must be provided')
1495
+ length = int(np.ceil(duration * sr))
1496
+
1497
+ if phi is None:
1498
+ phi = -np.pi * 0.5
1499
+
1500
+ y: np.ndarray = np.cos(2 * np.pi * frequency * np.arange(length) / sr + phi)
1501
+ return y
1502
+
1503
+
1504
+ def chirp(
1505
+ *,
1506
+ fmin: _FloatLike_co,
1507
+ fmax: _FloatLike_co,
1508
+ sr: float = 22050,
1509
+ length: Optional[int] = None,
1510
+ duration: Optional[float] = None,
1511
+ linear: bool = False,
1512
+ phi: Optional[float] = None,
1513
+ ) -> np.ndarray:
1514
+ """Construct a "chirp" or "sine-sweep" signal.
1515
+
1516
+ The chirp sweeps from frequency ``fmin`` to ``fmax`` (in Hz).
1517
+
1518
+ Parameters
1519
+ ----------
1520
+ fmin : float > 0
1521
+ initial frequency
1522
+
1523
+ fmax : float > 0
1524
+ final frequency
1525
+
1526
+ sr : number > 0
1527
+ desired sampling rate of the output signal
1528
+
1529
+ length : int > 0
1530
+ desired number of samples in the output signal.
1531
+ When both ``duration`` and ``length`` are defined,
1532
+ ``length`` takes priority.
1533
+
1534
+ duration : float > 0
1535
+ desired duration in seconds.
1536
+ When both ``duration`` and ``length`` are defined,
1537
+ ``length`` takes priority.
1538
+
1539
+ linear : boolean
1540
+ - If ``True``, use a linear sweep, i.e., frequency changes linearly with time
1541
+ - If ``False``, use a exponential sweep.
1542
+
1543
+ Default is ``False``.
1544
+
1545
+ phi : float or None
1546
+ phase offset, in radians.
1547
+ If unspecified, defaults to ``-np.pi * 0.5``.
1548
+
1549
+ Returns
1550
+ -------
1551
+ chirp_signal : np.ndarray [shape=(length,), dtype=float64]
1552
+ Synthesized chirp signal
1553
+
1554
+ Raises
1555
+ ------
1556
+ ParameterError
1557
+ - If either ``fmin`` or ``fmax`` are not provided.
1558
+ - If neither ``length`` nor ``duration`` are provided.
1559
+
1560
+ See Also
1561
+ --------
1562
+ scipy.signal.chirp
1563
+
1564
+ Examples
1565
+ --------
1566
+ Generate a exponential chirp from A2 to A8
1567
+
1568
+ >>> exponential_chirp = librosa.chirp(fmin=110, fmax=110*64, duration=1)
1569
+
1570
+ Or generate the same signal using ``length``
1571
+
1572
+ >>> exponential_chirp = librosa.chirp(fmin=110, fmax=110*64, sr=22050, length=22050)
1573
+
1574
+ Or generate a linear chirp instead
1575
+
1576
+ >>> linear_chirp = librosa.chirp(fmin=110, fmax=110*64, duration=1, linear=True)
1577
+
1578
+ Display spectrogram for both exponential and linear chirps.
1579
+
1580
+ >>> import matplotlib.pyplot as plt
1581
+ >>> fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
1582
+ >>> S_exponential = np.abs(librosa.stft(y=exponential_chirp))
1583
+ >>> librosa.display.specshow(librosa.amplitude_to_db(S_exponential, ref=np.max),
1584
+ ... x_axis='time', y_axis='linear', ax=ax[0])
1585
+ >>> ax[0].set(title='Exponential chirp', xlabel=None)
1586
+ >>> ax[0].label_outer()
1587
+ >>> S_linear = np.abs(librosa.stft(y=linear_chirp))
1588
+ >>> librosa.display.specshow(librosa.amplitude_to_db(S_linear, ref=np.max),
1589
+ ... x_axis='time', y_axis='linear', ax=ax[1])
1590
+ >>> ax[1].set(title='Linear chirp')
1591
+ """
1592
+
1593
+ if fmin is None or fmax is None:
1594
+ raise ParameterError('both "fmin" and "fmax" must be provided')
1595
+
1596
+ # Compute signal duration
1597
+ period = 1.0 / sr
1598
+ if length is None:
1599
+ if duration is None:
1600
+ raise ParameterError('either "length" or "duration" must be provided')
1601
+ else:
1602
+ duration = period * length
1603
+
1604
+ if phi is None:
1605
+ phi = -np.pi * 0.5
1606
+
1607
+ method = "linear" if linear else "logarithmic"
1608
+ y: np.ndarray = scipy.signal.chirp(
1609
+ np.arange(int(np.ceil(duration * sr))) / sr,
1610
+ fmin,
1611
+ duration,
1612
+ fmax,
1613
+ method=method,
1614
+ phi=phi / np.pi * 180, # scipy.signal.chirp uses degrees for phase offset
1615
+ )
1616
+ return y
1617
+
1618
+
1619
+ def mu_compress(
1620
+ x: Union[np.ndarray, _FloatLike_co], *, mu: float = 255, quantize: bool = True
1621
+ ) -> np.ndarray:
1622
+ """mu-law compression
1623
+
1624
+ Given an input signal ``-1 <= x <= 1``, the mu-law compression
1625
+ is calculated by::
1626
+
1627
+ sign(x) * ln(1 + mu * abs(x)) / ln(1 + mu)
1628
+
1629
+ Parameters
1630
+ ----------
1631
+ x : np.ndarray with values in [-1, +1]
1632
+ The input signal to compress
1633
+
1634
+ mu : positive number
1635
+ The compression parameter. Values of the form ``2**n - 1``
1636
+ (e.g., 15, 31, 63, etc.) are most common.
1637
+
1638
+ quantize : bool
1639
+ If ``True``, quantize the compressed values into ``1 + mu``
1640
+ distinct integer values.
1641
+
1642
+ If ``False``, mu-law compression is applied without quantization.
1643
+
1644
+ Returns
1645
+ -------
1646
+ x_compressed : np.ndarray
1647
+ The compressed signal.
1648
+
1649
+ Raises
1650
+ ------
1651
+ ParameterError
1652
+ If ``x`` has values outside the range [-1, +1]
1653
+ If ``mu <= 0``
1654
+
1655
+ See Also
1656
+ --------
1657
+ mu_expand
1658
+
1659
+ Examples
1660
+ --------
1661
+ Compression without quantization
1662
+
1663
+ >>> x = np.linspace(-1, 1, num=16)
1664
+ >>> x
1665
+ array([-1. , -0.86666667, -0.73333333, -0.6 , -0.46666667,
1666
+ -0.33333333, -0.2 , -0.06666667, 0.06666667, 0.2 ,
1667
+ 0.33333333, 0.46666667, 0.6 , 0.73333333, 0.86666667,
1668
+ 1. ])
1669
+ >>> y = librosa.mu_compress(x, quantize=False)
1670
+ >>> y
1671
+ array([-1. , -0.97430198, -0.94432361, -0.90834832, -0.86336132,
1672
+ -0.80328309, -0.71255496, -0.52124063, 0.52124063, 0.71255496,
1673
+ 0.80328309, 0.86336132, 0.90834832, 0.94432361, 0.97430198,
1674
+ 1. ])
1675
+
1676
+ Compression with quantization
1677
+
1678
+ >>> y = librosa.mu_compress(x, quantize=True)
1679
+ >>> y
1680
+ array([-128, -124, -120, -116, -110, -102, -91, -66, 66, 91, 102,
1681
+ 110, 116, 120, 124, 127])
1682
+
1683
+ Compression with quantization and a smaller range
1684
+
1685
+ >>> y = librosa.mu_compress(x, mu=15, quantize=True)
1686
+ >>> y
1687
+ array([-8, -7, -7, -6, -6, -5, -4, -2, 2, 4, 5, 6, 6, 7, 7, 7])
1688
+
1689
+ """
1690
+
1691
+ if mu <= 0:
1692
+ raise ParameterError(
1693
+ f"mu-law compression parameter mu={mu} must be strictly positive."
1694
+ )
1695
+
1696
+ if np.any(x < -1) or np.any(x > 1):
1697
+ raise ParameterError(f"mu-law input x={x} must be in the range [-1, +1].")
1698
+
1699
+ x_comp: np.ndarray = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
1700
+
1701
+ if quantize:
1702
+ y: np.ndarray = (
1703
+ np.digitize(
1704
+ x_comp, np.linspace(-1, 1, num=int(1 + mu), endpoint=True), right=True
1705
+ )
1706
+ - int(mu + 1) // 2
1707
+ )
1708
+ return y
1709
+
1710
+ return x_comp
1711
+
1712
+
1713
+ def mu_expand(
1714
+ x: Union[np.ndarray, _FloatLike_co], *, mu: float = 255.0, quantize: bool = True
1715
+ ) -> np.ndarray:
1716
+ """mu-law expansion
1717
+
1718
+ This function is the inverse of ``mu_compress``. Given a mu-law compressed
1719
+ signal ``-1 <= x <= 1``, the mu-law expansion is calculated by::
1720
+
1721
+ sign(x) * (1 / mu) * ((1 + mu)**abs(x) - 1)
1722
+
1723
+ Parameters
1724
+ ----------
1725
+ x : np.ndarray
1726
+ The compressed signal.
1727
+ If ``quantize=True``, values must be in the range [-1, +1].
1728
+ mu : positive number
1729
+ The compression parameter. Values of the form ``2**n - 1``
1730
+ (e.g., 15, 31, 63, etc.) are most common.
1731
+ quantize : boolean
1732
+ If ``True``, the input is assumed to be quantized to
1733
+ ``1 + mu`` distinct integer values.
1734
+
1735
+ Returns
1736
+ -------
1737
+ x_expanded : np.ndarray with values in the range [-1, +1]
1738
+ The mu-law expanded signal.
1739
+
1740
+ Raises
1741
+ ------
1742
+ ParameterError
1743
+ If ``x`` has values outside the range [-1, +1] and ``quantize=False``
1744
+ If ``mu <= 0``
1745
+
1746
+ See Also
1747
+ --------
1748
+ mu_compress
1749
+
1750
+ Examples
1751
+ --------
1752
+ Compress and expand without quantization
1753
+
1754
+ >>> x = np.linspace(-1, 1, num=16)
1755
+ >>> x
1756
+ array([-1. , -0.86666667, -0.73333333, -0.6 , -0.46666667,
1757
+ -0.33333333, -0.2 , -0.06666667, 0.06666667, 0.2 ,
1758
+ 0.33333333, 0.46666667, 0.6 , 0.73333333, 0.86666667,
1759
+ 1. ])
1760
+ >>> y = librosa.mu_compress(x, quantize=False)
1761
+ >>> y
1762
+ array([-1. , -0.97430198, -0.94432361, -0.90834832, -0.86336132,
1763
+ -0.80328309, -0.71255496, -0.52124063, 0.52124063, 0.71255496,
1764
+ 0.80328309, 0.86336132, 0.90834832, 0.94432361, 0.97430198,
1765
+ 1. ])
1766
+ >>> z = librosa.mu_expand(y, quantize=False)
1767
+ >>> z
1768
+ array([-1. , -0.86666667, -0.73333333, -0.6 , -0.46666667,
1769
+ -0.33333333, -0.2 , -0.06666667, 0.06666667, 0.2 ,
1770
+ 0.33333333, 0.46666667, 0.6 , 0.73333333, 0.86666667,
1771
+ 1. ])
1772
+
1773
+ Compress and expand with quantization. Note that this necessarily
1774
+ incurs quantization error, particularly for values near +-1.
1775
+
1776
+ >>> y = librosa.mu_compress(x, quantize=True)
1777
+ >>> y
1778
+ array([-128, -124, -120, -116, -110, -102, -91, -66, 66, 91, 102,
1779
+ 110, 116, 120, 124, 127])
1780
+ >>> z = librosa.mu_expand(y, quantize=True)
1781
+ array([-1. , -0.84027248, -0.70595818, -0.59301377, -0.4563785 ,
1782
+ -0.32155973, -0.19817918, -0.06450245, 0.06450245, 0.19817918,
1783
+ 0.32155973, 0.4563785 , 0.59301377, 0.70595818, 0.84027248,
1784
+ 0.95743702])
1785
+ """
1786
+ if mu <= 0:
1787
+ raise ParameterError(
1788
+ f"Inverse mu-law compression parameter mu={mu} must be strictly positive."
1789
+ )
1790
+
1791
+ if quantize:
1792
+ x = x * 2.0 / (1 + mu)
1793
+
1794
+ if np.any(x < -1) or np.any(x > 1):
1795
+ raise ParameterError(
1796
+ f"Inverse mu-law input x={x} must be in the range [-1, +1]."
1797
+ )
1798
+
1799
+ return np.sign(x) / mu * (np.power(1 + mu, np.abs(x)) - 1)