Spaces:

EPFL-VILAB
/

ViPer

Running on L40S

App Files Files Community

ViPer / diffusers /pipelines /wuerstchen /modeling_wuerstchen_common.py

miaw1419

Upload 472 files

0aaa1f1 verified 5 months ago

raw

history blame

3.78 kB

	# Copyright (c) 2023 Dominic Rampas MIT License
	# Copyright 2023 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import torch
	import torch.nn as nn

	from ...models.attention_processor import Attention
	from ...models.lora import LoRACompatibleConv, LoRACompatibleLinear
	from ...utils import USE_PEFT_BACKEND


	class WuerstchenLayerNorm(nn.LayerNorm):
	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)

	def forward(self, x):
	x = x.permute(0, 2, 3, 1)
	x = super().forward(x)
	return x.permute(0, 3, 1, 2)


	class TimestepBlock(nn.Module):
	def __init__(self, c, c_timestep):
	super().__init__()
	linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear
	self.mapper = linear_cls(c_timestep, c * 2)

	def forward(self, x, t):
	a, b = self.mapper(t)[:, :, None, None].chunk(2, dim=1)
	return x * (1 + a) + b


	class ResBlock(nn.Module):
	def __init__(self, c, c_skip=0, kernel_size=3, dropout=0.0):
	super().__init__()

	conv_cls = nn.Conv2d if USE_PEFT_BACKEND else LoRACompatibleConv
	linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear

	self.depthwise = conv_cls(c + c_skip, c, kernel_size=kernel_size, padding=kernel_size // 2, groups=c)
	self.norm = WuerstchenLayerNorm(c, elementwise_affine=False, eps=1e-6)
	self.channelwise = nn.Sequential(
	linear_cls(c, c * 4), nn.GELU(), GlobalResponseNorm(c * 4), nn.Dropout(dropout), linear_cls(c * 4, c)
	)

	def forward(self, x, x_skip=None):
	x_res = x
	if x_skip is not None:
	x = torch.cat([x, x_skip], dim=1)
	x = self.norm(self.depthwise(x)).permute(0, 2, 3, 1)
	x = self.channelwise(x).permute(0, 3, 1, 2)
	return x + x_res


	# from https://github.com/facebookresearch/ConvNeXt-V2/blob/3608f67cc1dae164790c5d0aead7bf2d73d9719b/models/utils.py#L105
	class GlobalResponseNorm(nn.Module):
	def __init__(self, dim):
	super().__init__()
	self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
	self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))

	def forward(self, x):
	agg_norm = torch.norm(x, p=2, dim=(1, 2), keepdim=True)
	stand_div_norm = agg_norm / (agg_norm.mean(dim=-1, keepdim=True) + 1e-6)
	return self.gamma * (x * stand_div_norm) + self.beta + x


	class AttnBlock(nn.Module):
	def __init__(self, c, c_cond, nhead, self_attn=True, dropout=0.0):
	super().__init__()

	linear_cls = nn.Linear if USE_PEFT_BACKEND else LoRACompatibleLinear

	self.self_attn = self_attn
	self.norm = WuerstchenLayerNorm(c, elementwise_affine=False, eps=1e-6)
	self.attention = Attention(query_dim=c, heads=nhead, dim_head=c // nhead, dropout=dropout, bias=True)
	self.kv_mapper = nn.Sequential(nn.SiLU(), linear_cls(c_cond, c))

	def forward(self, x, kv):
	kv = self.kv_mapper(kv)
	norm_x = self.norm(x)
	if self.self_attn:
	batch_size, channel, _, _ = x.shape
	kv = torch.cat([norm_x.view(batch_size, channel, -1).transpose(1, 2), kv], dim=1)
	x = x + self.attention(norm_x, encoder_hidden_states=kv)
	return x