Spaces:
Runtime error
Runtime error
import argparse | |
from pathlib import Path | |
import librosa | |
import numpy as np | |
import torch | |
from laion_clap import CLAP_Module | |
from tqdm import tqdm | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--data_path", | |
"-d", | |
required=True, | |
type=str, | |
help="Path of the original wav files", | |
) | |
parser.add_argument( | |
"--save_path", | |
"-s", | |
required=True, | |
type=str, | |
help="Path to save the clap audio embedding '.npy' files", | |
) | |
parser.add_argument( | |
"--clap_ckpt", | |
"-c", | |
required=True, | |
type=str, | |
help="Path of the pretrained clap checkpoint", | |
) | |
parser.add_argument( | |
"--enable_fusion", | |
"-e", | |
default=True, | |
type=bool, | |
help="Whether to enable the feature fusion of the clap model. Depends on the clap checkpoint you are using", | |
) | |
parser.add_argument( | |
"--audio_encoder", | |
"-a", | |
default="HTSAT-tiny", | |
type=str, | |
help="Audio encoder of the clap model. Depends on the clap checkpoint you are using", | |
) | |
args = parser.parse_args() | |
model = CLAP_Module(enable_fusion=args.enable_fusion, aencoder=args.audio_encoder) | |
model.load_ckpt(args.clap_ckpt) | |
data_path = Path(args.data_path) | |
save_path = Path(args.save_path) | |
with torch.no_grad(): | |
for wav_path in tqdm( | |
data_path.glob("**/*.wav"), dynamic_ncols=True, colour="yellow" | |
): | |
wav, _ = librosa.load(wav_path, sr=48000) | |
clap_embeding = model.get_audio_embedding_from_data( | |
x=wav[np.newaxis], use_tensor=False | |
) | |
clap_embeding = clap_embeding.squeeze(axis=0) | |
out_path = save_path / wav_path.with_suffix(".npy").relative_to(data_path) | |
out_path.parent.mkdir(exist_ok=True) | |
np.save(out_path, clap_embeding) | |