Spaces:

chhetri123
/

caption_generation_transformer

Sleeping

App Files Files Community

caption_generation_transformer / library /self_attention.py

chhetri123

Upload 27 files

340d736 verified over 1 year ago

raw

history blame contribute delete

3.51 kB


	import tensorflow as tf
	import numpy as np


	def get_angles(pos, i, d_model):
	angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
	return pos * angle_rates


	def positional_encoding_1d(position, d_model):
	angle_rads = get_angles(np.arange(position)[:, np.newaxis],
	np.arange(d_model)[np.newaxis, :],
	d_model)

	# apply sin to even indices in the array; 2i
	angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

	# apply cos to odd indices in the array; 2i+1
	angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

	pos_encoding = angle_rads[np.newaxis, ...]

	return tf.cast(pos_encoding, dtype=tf.float32)


	def positional_encoding_2d(row,col,d_model):
	assert d_model % 2 == 0
	# first d_model/2 encode row embedding and second d_model/2 encode column embedding
	row_pos = np.repeat(np.arange(row),col)[:,np.newaxis]
	col_pos = np.repeat(np.expand_dims(np.arange(col),0),row,axis=0).reshape(-1,1)
	angle_rads_row = get_angles(row_pos,np.arange(d_model//2)[np.newaxis,:],d_model//2)
	angle_rads_col = get_angles(col_pos,np.arange(d_model//2)[np.newaxis,:],d_model//2)
	#apply sin and cos to odd and even indices resp.
	angle_rads_row[:, 0::2] = np.sin(angle_rads_row[:, 0::2])
	angle_rads_row[:, 1::2] = np.cos(angle_rads_row[:, 1::2])
	angle_rads_col[:, 0::2] = np.sin(angle_rads_col[:, 0::2])
	angle_rads_col[:, 1::2] = np.cos(angle_rads_col[:, 1::2])
	pos_encoding = np.concatenate([angle_rads_row,angle_rads_col],axis=1)[np.newaxis, ...]

	return tf.cast(pos_encoding, dtype=tf.float32)


	def create_padding_mask(seq):
	seq = tf.cast(tf.math.equal(seq, 0), tf.float32)

	# add extra dimensions to add the padding
	# to the attention logits.
	return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len)



	def create_look_ahead_mask(size):
	mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
	return mask # (seq_len, seq_len)



	def create_masks_decoder(tar):
	look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
	dec_target_padding_mask = create_padding_mask(tar)
	combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
	return combined_mask



	def scaled_dot_product_attention(q, k, v, mask):
	"""Calculate the attention weights.
	q, k, v must have matching leading dimensions.
	k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
	The mask has different shapes depending on its type(padding or look ahead)
	but it must be broadcastable for addition.

	Args:
	q: query shape == (..., seq_len_q, depth)
	k: key shape == (..., seq_len_k, depth)
	v: value shape == (..., seq_len_v, depth_v)
	mask: Float tensor with shape broadcastable
	to (..., seq_len_q, seq_len_k). Defaults to None.

	Returns:
	output, attention_weights
	"""

	matmul_qk = tf.matmul(q, k, transpose_b=True) # (..., seq_len_q, seq_len_k)

	# scale matmul_qk
	dk = tf.cast(tf.shape(k)[-1], tf.float32)
	scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

	# add the mask to the scaled tensor.
	if mask is not None:
	scaled_attention_logits += (mask * -1e9) #adding -Inf where mask is 1 s.t. value get ignored in softmax

	# softmax is normalized on the last axis (seq_len_k) so that the scores
	# add up to 1.
	attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) # (..., seq_len_q, seq_len_k)

	output = tf.matmul(attention_weights, v) # (..., seq_len_q, depth_v)

	return output, attention_weights