Spaces:
Running
Running
File size: 1,862 Bytes
baa9131 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
#!/usr/bin/env python3
import argparse
from pathlib import Path
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
def save_mel_image(
audio_path: Path,
out_path: Path,
sr: int = 22050,
n_fft: int = 1024,
hop_length: int = 256,
n_mels: int = 80,
fmin: int = 0,
fmax: int | None = 8000,
):
y, _ = librosa.load(str(audio_path), sr=sr, mono=True)
S = librosa.feature.melspectrogram(
y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmin=fmin, fmax=fmax
)
S_db = librosa.power_to_db(S, ref=np.max)
plt.figure(figsize=(8, 3), dpi=150)
librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis=None, y_axis=None, cmap="magma")
plt.axis("off")
plt.tight_layout(pad=0)
out_path.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(out_path, bbox_inches="tight", pad_inches=0)
plt.close()
def main():
p = argparse.ArgumentParser(description="Generate mel-spectrogram image from an audio file.")
p.add_argument("audio", type=Path, help="Path to input audio (wav/flac/mp3)")
p.add_argument("output", type=Path, help="Path to output image (png/jpg)")
p.add_argument("--sr", type=int, default=22050)
p.add_argument("--n_fft", type=int, default=1024)
p.add_argument("--hop", dest="hop_length", type=int, default=256)
p.add_argument("--mels", dest="n_mels", type=int, default=80)
p.add_argument("--fmin", type=int, default=0)
p.add_argument("--fmax", type=int, default=8000)
args = p.parse_args()
save_mel_image(
audio_path=args.audio,
out_path=args.output,
sr=args.sr,
n_fft=args.n_fft,
hop_length=args.hop_length,
n_mels=args.n_mels,
fmin=args.fmin,
fmax=args.fmax,
)
if __name__ == "__main__":
main()
|