Spaces:

lidavidsh
/

ml-sharp

Running

App Files Files Community

ml-sharp / src /sharp /models /decoders /unet_decoder.py

amael-apple

Initial commit

c20d7cc 15 days ago

raw

history blame contribute delete

3.65 kB

	"""Contains the UNet decoder.

	For licensing see accompanying LICENSE file.
	Copyright (C) 2025 Apple Inc. All Rights Reserved.
	"""

	from __future__ import annotations

	from typing import List

	import torch
	from torch import nn

	from sharp.models.blocks import (
	NormLayerName,
	norm_layer_2d,
	residual_block_2d,
	)

	from .base_decoder import BaseDecoder


	class UNetDecoder(BaseDecoder):
	"""Decoder of UNet model."""

	def __init__(
	self,
	dim_out: int,
	width: List[int] \| int,
	steps: int = 5,
	norm_type: NormLayerName = "group_norm",
	norm_num_groups=8,
	blocks_per_layer=2,
	) -> None:
	"""Initialize UNet Decoder.

	Args:
	dim_out: The number of output channels.
	width: Width of last input feature map from encoder
	or the width list of all input feature maps from encoder.
	steps: The number of upsampling steps.
	norm_type: Which kind of normalization layer to use.
	norm_num_groups: How many groups to use for group norm (if relevant).
	blocks_per_layer: How many blocks per layer to use.
	"""
	super().__init__()

	if blocks_per_layer < 1:
	raise ValueError("blocks_per_layer must be greater or equal to one.")

	self.dim_out = dim_out

	self.convs_up = nn.ModuleList()

	self.output_dims: list[int]
	# If only one number is specified, we assume each layer will double the channel dimension.
	if isinstance(width, int):
	self.input_dims = [width >> i for i in range(0, steps + 1)]
	else:
	self.input_dims = width[::-1][: steps + 1]

	for i_step in range(steps):
	input_width = self.input_dims[i_step]
	current_width = self.input_dims[i_step + 1]
	convs_up_i = nn.Sequential(
	nn.Upsample(scale_factor=2),
	residual_block_2d(
	input_width * (1 if i_step == 0 else 2),
	current_width,
	norm_type=norm_type,
	norm_num_groups=norm_num_groups,
	),
	*[
	residual_block_2d(
	current_width,
	current_width,
	norm_type=norm_type,
	norm_num_groups=norm_num_groups,
	)
	for _ in range(blocks_per_layer - 1)
	],
	)
	self.convs_up.append(convs_up_i)
	input_width = 2 * current_width
	current_width //= 2

	last_width = self.input_dims[-1]
	self.conv_out = nn.Sequential(
	norm_layer_2d(last_width * 2, norm_type, num_groups=norm_num_groups),
	nn.ReLU(),
	nn.Conv2d(last_width * 2, dim_out, 1),
	norm_layer_2d(dim_out, norm_type, num_groups=norm_num_groups),
	nn.ReLU(),
	)

	def forward(self, features: list[torch.Tensor]) -> torch.Tensor:
	"""Apply UNet to image.

	Args:
	features: The input multi-level feature map from encoder.

	Returns:
	The output feature map.
	"""
	i_feature_layer = len(features) - 1
	out = self.convs_up[0](features[i_feature_layer])
	i_feature_layer -= 1
	for conv_up in self.convs_up[1:]: # type: ignore
	out = conv_up(torch.cat([out, features[i_feature_layer]], dim=1))
	i_feature_layer -= 1
	out = self.conv_out(torch.cat([out, features[i_feature_layer]], dim=1))

	return out