Spaces:

mathiaszinnen
/

odor-detection

Runtime error

App Files Files Community

odor-detection / tests /utils /transformer.py

mathiaszinnen

Initialize app

3e99b05 over 1 year ago

raw

history blame contribute delete

4.71 kB

	# coding=utf-8
	# Copyright 2022 The IDEA Authors. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	import torch
	import torch.nn as nn

	from .attention import MultiheadAttention


	class OriginalConditionalAttentionEncoder(nn.Module):
	"""Original implementation of Conditional Self-Attention

	Remove norm and dropout layer for test simplicity
	"""

	def __init__(self, d_model, nhead):
	super().__init__()
	self.sa_qcontent_proj = nn.Linear(d_model, d_model)
	self.sa_qpos_proj = nn.Linear(d_model, d_model)
	self.sa_kcontent_proj = nn.Linear(d_model, d_model)
	self.sa_kpos_proj = nn.Linear(d_model, d_model)
	self.sa_v_proj = nn.Linear(d_model, d_model)
	self.self_attn = MultiheadAttention(d_model, nhead, dropout=0.0, vdim=d_model)

	def forward(self, tgt, query_pos):
	q_content = self.sa_qcontent_proj(tgt)
	q_pos = self.sa_qpos_proj(query_pos)
	k_content = self.sa_kcontent_proj(tgt)
	k_pos = self.sa_kpos_proj(query_pos)
	v = self.sa_v_proj(tgt)
	q = q_content + q_pos
	k = k_content + k_pos

	tgt2 = self.self_attn(q, k, v)
	return tgt2


	class OriginalConditionalAttentionDecoder(nn.Module):
	"""Original implementation of Conditional Attention Decoder

	Remove norm and dropout layer for test simplicity
	"""

	def __init__(self, d_model, nhead):
	super().__init__()
	# self-attn
	self.sa_qcontent_proj = nn.Linear(d_model, d_model)
	self.sa_qpos_proj = nn.Linear(d_model, d_model)
	self.sa_kcontent_proj = nn.Linear(d_model, d_model)
	self.sa_kpos_proj = nn.Linear(d_model, d_model)
	self.sa_v_proj = nn.Linear(d_model, d_model)
	self.self_attn = MultiheadAttention(d_model, nhead, dropout=0.0, vdim=d_model)

	# cross-attn
	self.ca_qcontent_proj = nn.Linear(d_model, d_model)
	self.ca_qpos_proj = nn.Linear(d_model, d_model)
	self.ca_kcontent_proj = nn.Linear(d_model, d_model)
	self.ca_kpos_proj = nn.Linear(d_model, d_model)
	self.ca_v_proj = nn.Linear(d_model, d_model)
	self.ca_qpos_sine_proj = nn.Linear(d_model, d_model)
	self.cross_attn = MultiheadAttention(d_model * 2, nhead, dropout=0.0, vdim=d_model)

	self.nhead = nhead

	def forward(self, tgt, memory, query_pos, pos, query_sine_embed, is_first=True):
	# self attention
	q_content = self.sa_qcontent_proj(
	tgt
	) # target is the input of the first decoder layer. zero by default.
	q_pos = self.sa_qpos_proj(query_pos)
	k_content = self.sa_kcontent_proj(tgt)
	k_pos = self.sa_kpos_proj(query_pos)
	v = self.sa_v_proj(tgt)
	q = q_content + q_pos
	k = k_content + k_pos

	tgt2 = self.self_attn(q, k, v)[0]

	tgt = tgt + tgt2
	# ========================================

	# cross attention
	q_content = self.ca_qcontent_proj(tgt)
	k_content = self.ca_kcontent_proj(memory)
	v = self.ca_v_proj(memory)

	num_queries, bs, n_model = q_content.shape
	hw, _, _ = k_content.shape

	k_pos = self.ca_kpos_proj(pos)

	# For the first decoder layer, we concatenate the positional embedding predicted from
	# the object query (the positional embedding) into the original query (key) in DETR.
	if is_first:
	q_pos = self.ca_qpos_proj(query_pos)
	q = q_content + q_pos
	k = k_content + k_pos
	else:
	q = q_content
	k = k_content

	q = q.view(num_queries, bs, self.nhead, n_model // self.nhead)
	query_sine_embed = self.ca_qpos_sine_proj(query_sine_embed)
	query_sine_embed = query_sine_embed.view(num_queries, bs, self.nhead, n_model // self.nhead)
	q = torch.cat([q, query_sine_embed], dim=3).view(num_queries, bs, n_model * 2)
	k = k.view(hw, bs, self.nhead, n_model // self.nhead)
	k_pos = k_pos.view(hw, bs, self.nhead, n_model // self.nhead)
	k = torch.cat([k, k_pos], dim=3).view(hw, bs, n_model * 2)

	tgt2 = self.cross_attn(query=q, key=k, value=v)[0]

	return tgt2 + tgt