File size: 552 Bytes
960cd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import torch


def get_clap_audio_feature(audio_data, clap_model, processor, device):
    with torch.no_grad():
        inputs = processor(
            audios=audio_data, return_tensors="pt", sampling_rate=48000
        ).to(device)
        emb = clap_model.get_audio_features(**inputs).float()
    return emb.T


def get_clap_text_feature(text, clap_model, processor, device):
    with torch.no_grad():
        inputs = processor(text=text, return_tensors="pt").to(device)
        emb = clap_model.get_text_features(**inputs).float()
    return emb.T