Spaces:

gossminn
/

fillmorle-app

Build error

App Files Files Community

fillmorle-app / sftp /modules /smooth_crf.py

gossminn

First version

6680682 almost 3 years ago

raw

history blame

3.49 kB

	import torch
	from allennlp.modules.conditional_random_field import ConditionalRandomField
	from allennlp.nn.util import logsumexp
	from overrides import overrides


	class SmoothCRF(ConditionalRandomField):
	@overrides
	def forward(self, inputs: torch.Tensor, tags: torch.Tensor, mask: torch.Tensor = None):
	"""

	:param inputs: Shape [batch, token, tag]
	:param tags: Shape [batch, token] or [batch, token, tag]
	:param mask: Shape [batch, token]
	:return:
	"""
	if mask is None:
	mask = tags.new_ones(tags.shape, dtype=torch.bool)
	mask = mask.to(dtype=torch.bool)
	if tags.dim() == 2:
	return super(SmoothCRF, self).forward(inputs, tags, mask)

	# smooth mode
	log_denominator = self._input_likelihood(inputs, mask)
	log_numerator = self._smooth_joint_likelihood(inputs, tags, mask)

	return torch.sum(log_numerator - log_denominator)

	def _smooth_joint_likelihood(
	self, logits: torch.Tensor, soft_tags: torch.Tensor, mask: torch.Tensor
	) -> torch.Tensor:
	batch_size, sequence_length, num_tags = logits.size()

	epsilon = 1e-30
	soft_tags = soft_tags.clone()
	soft_tags[soft_tags < epsilon] = epsilon

	# Transpose batch size and sequence dimensions
	mask = mask.transpose(0, 1).contiguous()
	logits = logits.transpose(0, 1).contiguous()
	soft_tags = soft_tags.transpose(0, 1).contiguous()

	# Initial alpha is the (batch_size, num_tags) tensor of likelihoods combining the
	# transitions to the initial states and the logits for the first timestep.
	if self.include_start_end_transitions:
	alpha = self.start_transitions.view(1, num_tags) + logits[0] + soft_tags[0].log()
	else:
	alpha = logits[0] * soft_tags[0]

	# For each i we compute logits for the transitions from timestep i-1 to timestep i.
	# We do so in a (batch_size, num_tags, num_tags) tensor where the axes are
	# (instance, current_tag, next_tag)
	for i in range(1, sequence_length):
	# The emit scores are for time i ("next_tag") so we broadcast along the current_tag axis.
	emit_scores = logits[i].view(batch_size, 1, num_tags)
	# Transition scores are (current_tag, next_tag) so we broadcast along the instance axis.
	transition_scores = self.transitions.view(1, num_tags, num_tags)
	# Alpha is for the current_tag, so we broadcast along the next_tag axis.
	broadcast_alpha = alpha.view(batch_size, num_tags, 1)

	# Add all the scores together and logexp over the current_tag axis.
	inner = broadcast_alpha + emit_scores + transition_scores + soft_tags[i].log().unsqueeze(1)

	# In valid positions (mask == True) we want to take the logsumexp over the current_tag dimension
	# of `inner`. Otherwise (mask == False) we want to retain the previous alpha.
	alpha = logsumexp(inner, 1) * mask[i].view(batch_size, 1) + alpha * (
	~mask[i]
	).view(batch_size, 1)

	# Every sequence needs to end with a transition to the stop_tag.
	if self.include_start_end_transitions:
	stops = alpha + self.end_transitions.view(1, num_tags)
	else:
	stops = alpha

	# Finally we log_sum_exp along the num_tags dim, result is (batch_size,)
	return logsumexp(stops)