campp-multilingual-mlx / usage_example.py

Convert iic/speech_campplus_sv_zh_en_16k-common_advanced to MLX format

2e82ca2 verified 13 days ago

1.32 kB

	# CAM++ MLX Model Usage Example (ModelScope Architecture)

	import mlx.core as mx
	import numpy as np
	from model import CAMPPModelScopeV2
	import json

	def load_model(model_path="."):
	# Load config
	with open(f"{model_path}/config.json", "r") as f:
	config = json.load(f)

	# Initialize model
	model = CAMPPModelScopeV2(
	input_dim=config["input_dim"],
	channels=config.get("channels", 512),
	block_layers=config.get("block_layers", [4, 9, 16]),
	embedding_dim=config["embedding_dim"],
	cam_channels=config.get("cam_channels", 128),
	input_kernel_size=config.get("input_kernel_size", 5)
	)

	# Load weights
	weights = mx.load(f"{model_path}/weights.npz")
	model.load_weights(weights)

	return model

	def extract_speaker_embedding(model, audio_features):
	# audio_features: (batch, features, time) - e.g., mel-spectrogram
	# Returns: speaker embedding vector

	mx.eval(model.parameters()) # Ensure weights are loaded
	with mx.no_grad():
	embedding = model(audio_features)

	return embedding

	# Example usage:
	# model = load_model()
	# features = mx.random.normal((1, 320, 200)) # Example input
	# embedding = extract_speaker_embedding(model, features)
	# print(f"Speaker embedding shape: {embedding.shape}")