|
import tensorflow as tf |
|
from library.self_attention import create_padding_mask,create_masks_decoder,scaled_dot_product_attention |
|
|
|
from library.Multihead_attention import MultiHeadAttention,point_wise_feed_forward_network |
|
from library.customSchedule import learning_rate |
|
from library.encoder_decoder import Encoder,Decoder,EncoderLayer,DecoderLayer |
|
import pickle |
|
|
|
|
|
|
|
def load_image(image_path): |
|
img = tf.io.read_file(image_path) |
|
img = tf.image.decode_jpeg(img, channels=3) |
|
img = tf.image.resize(img, (299, 299)) |
|
img = tf.keras.applications.inception_v3.preprocess_input(img) |
|
return img, image_path |
|
|
|
|
|
|
|
|
|
image_model = tf.keras.applications.InceptionV3(include_top=False, |
|
weights='imagenet') |
|
new_input = image_model.input |
|
hidden_layer = image_model.layers[-1].output |
|
|
|
image_features_extract_model = tf.keras.Model(new_input, hidden_layer) |
|
|
|
|
|
class Transformer(tf.keras.Model): |
|
def __init__(self, num_layers, d_model, num_heads, dff,row_size,col_size, |
|
target_vocab_size,max_pos_encoding, rate=0.1): |
|
super(Transformer, self).__init__() |
|
|
|
self.encoder = Encoder(num_layers, d_model, num_heads, dff,row_size,col_size, rate) |
|
|
|
self.decoder = Decoder(num_layers, d_model, num_heads, dff, |
|
target_vocab_size,max_pos_encoding, rate) |
|
|
|
self.final_layer = tf.keras.layers.Dense(target_vocab_size) |
|
|
|
def call(self, inp, tar, training,look_ahead_mask=None, dec_padding_mask=None,enc_padding_mask=None): |
|
|
|
enc_output = self.encoder(inp, training, enc_padding_mask) |
|
|
|
|
|
dec_output, attention_weights = self.decoder( |
|
tar, enc_output, training, look_ahead_mask, dec_padding_mask) |
|
|
|
final_output = self.final_layer(dec_output) |
|
|
|
return final_output, attention_weights |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|