Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| import argparse | |
| from pathlib import Path | |
| import numpy as np | |
| import librosa | |
| import librosa.display | |
| import matplotlib.pyplot as plt | |
| def save_mel_image( | |
| audio_path: Path, | |
| out_path: Path, | |
| sr: int = 22050, | |
| n_fft: int = 1024, | |
| hop_length: int = 256, | |
| n_mels: int = 80, | |
| fmin: int = 0, | |
| fmax: int | None = 8000, | |
| ): | |
| y, _ = librosa.load(str(audio_path), sr=sr, mono=True) | |
| S = librosa.feature.melspectrogram( | |
| y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmin=fmin, fmax=fmax | |
| ) | |
| S_db = librosa.power_to_db(S, ref=np.max) | |
| plt.figure(figsize=(8, 3), dpi=150) | |
| librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis=None, y_axis=None, cmap="magma") | |
| plt.axis("off") | |
| plt.tight_layout(pad=0) | |
| out_path.parent.mkdir(parents=True, exist_ok=True) | |
| plt.savefig(out_path, bbox_inches="tight", pad_inches=0) | |
| plt.close() | |
| def main(): | |
| p = argparse.ArgumentParser(description="Generate mel-spectrogram image from an audio file.") | |
| p.add_argument("audio", type=Path, help="Path to input audio (wav/flac/mp3)") | |
| p.add_argument("output", type=Path, help="Path to output image (png/jpg)") | |
| p.add_argument("--sr", type=int, default=22050) | |
| p.add_argument("--n_fft", type=int, default=1024) | |
| p.add_argument("--hop", dest="hop_length", type=int, default=256) | |
| p.add_argument("--mels", dest="n_mels", type=int, default=80) | |
| p.add_argument("--fmin", type=int, default=0) | |
| p.add_argument("--fmax", type=int, default=8000) | |
| args = p.parse_args() | |
| save_mel_image( | |
| audio_path=args.audio, | |
| out_path=args.output, | |
| sr=args.sr, | |
| n_fft=args.n_fft, | |
| hop_length=args.hop_length, | |
| n_mels=args.n_mels, | |
| fmin=args.fmin, | |
| fmax=args.fmax, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |