catalyst-n1 / sdk /benchmarks /dvs_loader.py

Initial upload: Catalyst N1 open source neuromorphic processor RTL

e4cdd5f verified 9 days ago

2.96 kB

	"""DVS128 Gesture dataset loader for neuromorphic benchmarks.

	Uses the `tonic` library for event camera data loading and transforms.
	128x128 pixels x 2 polarities -> downsampled to 32x32 = 2048 input channels.
	11 gesture classes.

	Requires: pip install tonic
	"""

	import os
	import numpy as np

	try:
	import torch
	from torch.utils.data import Dataset
	except ImportError:
	raise ImportError("PyTorch required: pip install torch")

	try:
	import tonic
	import tonic.transforms as transforms
	except ImportError:
	raise ImportError("tonic required: pip install tonic")


	N_CHANNELS = 2048 # 32x32x2 (downsampled from 128x128x2)
	N_CLASSES = 11 # gesture classes
	SENSOR_SIZE = (128, 128, 2)
	DS_FACTOR = 4 # downsample 128->32
	DS_SIZE = (32, 32, 2)


	def get_dvs_transform(dt=10e-3, duration=1.5):
	"""Build tonic transform pipeline: downsample -> bin to frames."""
	n_bins = int(duration / dt)
	return transforms.Compose([
	transforms.Downsample(spatial_factor=1.0 / DS_FACTOR),
	transforms.ToFrame(
	sensor_size=DS_SIZE,
	n_time_bins=n_bins,
	),
	])


	class DVSGestureDataset(Dataset):
	"""PyTorch Dataset wrapper for DVS128 Gesture.

	Each sample is converted to a dense frame tensor (T, 2048) via tonic transforms.
	"""

	def __init__(self, data_dir="data/dvs_gesture", train=True, dt=10e-3, duration=1.5):
	transform = get_dvs_transform(dt=dt, duration=duration)

	self._tonic_ds = tonic.datasets.DVSGesture(
	save_to=data_dir,
	train=train,
	transform=transform,
	)

	self.n_bins = int(duration / dt)
	self.dt = dt
	self.duration = duration

	def __len__(self):
	return len(self._tonic_ds)

	def __getitem__(self, idx):
	frames, label = self._tonic_ds[idx]
	# frames shape from tonic: (T, 2, 32, 32) or (T, C, H, W)
	# Flatten spatial dims: (T, 23232) = (T, 2048)
	frames = np.array(frames, dtype=np.float32)

	if frames.ndim == 4:
	T = frames.shape[0]
	frames = frames.reshape(T, -1)
	elif frames.ndim == 3:
	T = frames.shape[0]
	frames = frames.reshape(T, -1)

	# Clip to n_bins
	if frames.shape[0] > self.n_bins:
	frames = frames[:self.n_bins]
	elif frames.shape[0] < self.n_bins:
	pad = np.zeros((self.n_bins - frames.shape[0], frames.shape[1]), dtype=np.float32)
	frames = np.concatenate([frames, pad], axis=0)

	# Binarize (any event count > 0 = spike)
	frames = (frames > 0).astype(np.float32)

	return torch.from_numpy(frames), int(label)


	def collate_fn(batch):
	"""Collate with uniform time length."""
	inputs, labels = zip(*batch)
	return torch.stack(inputs), torch.tensor(labels, dtype=torch.long)