campp-multilingual-mlx / usage_example.py
BMP's picture
Convert iic/speech_campplus_sv_zh_en_16k-common_advanced to MLX format
2e82ca2 verified
# CAM++ MLX Model Usage Example (ModelScope Architecture)
import mlx.core as mx
import numpy as np
from model import CAMPPModelScopeV2
import json
def load_model(model_path="."):
# Load config
with open(f"{model_path}/config.json", "r") as f:
config = json.load(f)
# Initialize model
model = CAMPPModelScopeV2(
input_dim=config["input_dim"],
channels=config.get("channels", 512),
block_layers=config.get("block_layers", [4, 9, 16]),
embedding_dim=config["embedding_dim"],
cam_channels=config.get("cam_channels", 128),
input_kernel_size=config.get("input_kernel_size", 5)
)
# Load weights
weights = mx.load(f"{model_path}/weights.npz")
model.load_weights(weights)
return model
def extract_speaker_embedding(model, audio_features):
# audio_features: (batch, features, time) - e.g., mel-spectrogram
# Returns: speaker embedding vector
mx.eval(model.parameters()) # Ensure weights are loaded
with mx.no_grad():
embedding = model(audio_features)
return embedding
# Example usage:
# model = load_model()
# features = mx.random.normal((1, 320, 200)) # Example input
# embedding = extract_speaker_embedding(model, features)
# print(f"Speaker embedding shape: {embedding.shape}")