Spaces:
Sleeping
Sleeping
import numpy as np | |
import tensorflow as tf | |
import keras | |
from keras.models import Model | |
import warnings | |
warnings.filterwarnings('ignore') | |
class Encoder(Model): | |
def __init__(self, embed_dim): | |
super(Encoder, self).__init__() | |
self.dense = tf.keras.layers.Dense(embed_dim) | |
def call(self, features): | |
features = self.dense(features) | |
features = tf.keras.activations.relu(features) | |
return features | |
class Attention_model(Model): | |
def __init__(self, units): | |
super(Attention_model, self).__init__() | |
self.W1 = tf.keras.layers.Dense(units) | |
self.W2 = tf.keras.layers.Dense(units) | |
self.V = tf.keras.layers.Dense(1) | |
self.units = units | |
def call(self, features, hidden): | |
hidden_with_time_axis = hidden[:, tf.newaxis] | |
score = tf.keras.activations.tanh(self.W1(features) + self.W2(hidden_with_time_axis)) | |
attention_weights = tf.keras.activations.softmax(self.V(score), axis=1) | |
context_vector = attention_weights * features | |
context_vector = tf.reduce_sum(context_vector, axis=1) | |
return context_vector, attention_weights | |
class Decoder(Model): | |
def __init__(self, embed_dim, units, vocab_size): | |
super(Decoder, self).__init__() | |
self.units = units | |
self.attention = Attention_model(self.units) | |
self.embed = tf.keras.layers.Embedding(vocab_size, embed_dim) | |
self.gru = tf.keras.layers.GRU(self.units, return_sequences=True, return_state=True, | |
recurrent_initializer='glorot_uniform') | |
self.d1 = tf.keras.layers.Dense(self.units) | |
self.d2 = tf.keras.layers.Dense(vocab_size) | |
def call(self, x, features, hidden): | |
context_vector, attention_weights = self.attention(features, hidden) | |
embed = self.embed(x) | |
embed = tf.concat([tf.expand_dims(context_vector, 1), embed], axis=-1) | |
output, state = self.gru(embed) | |
output = self.d1(output) | |
output = tf.reshape(output, (-1, output.shape[2])) | |
output = self.d2(output) | |
return output, state, attention_weights | |
def init_state(self, batch_size): | |
return tf.zeros((batch_size, self.units)) | |
def reset_state(self, batch_size): | |
return tf.zeros((batch_size, self.units)) | |
# Loading the tokenizer | |
with open("efb-requirements/tokenizer.json", 'r', encoding='utf-8') as f: | |
loaded_tokenizer_json = f.read() | |
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json) | |
def load_and_process_image(image, target_size=(299, 299)): | |
img = tf.convert_to_tensor(image) | |
img = tf.cast(img, tf.uint8) | |
img = tf.image.resize(img, target_size) | |
img = tf.keras.applications.inception_v3.preprocess_input(img) | |
return img | |
image_features_extract_model = keras.models.load_model("efb-requirements/inception_v3.h5") | |
embedding_dim = 256 | |
units = 512 | |
vocab_size = 5001 | |
encoder = Encoder(embedding_dim) | |
decoder = Decoder(embedding_dim, units, vocab_size) | |
# Creating dummy inputs | |
dummy_img_input = tf.ones((32, 64, 2048)) | |
features = encoder(dummy_img_input) | |
hidden = decoder.init_state(32) | |
dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * 32, 1) | |
dec = decoder(dec_input, features, hidden) | |
# Loading saved models | |
encoder.load_weights("efb-requirements/encoder_50epoch_weights.h5") | |
decoder.load_weights("efb-requirements/decoder_50epoch_weights.h5") | |
def evaluate(image): | |
max_length = 39 | |
attention_plot = np.zeros((max_length, 64)) | |
hidden = decoder.reset_state(batch_size=1) | |
# processing the input image to desired format before extracting features | |
temp_input = tf.expand_dims(load_and_process_image(image), 0) | |
img_tensor_val = image_features_extract_model(temp_input) | |
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3])) | |
cnn_features = encoder(img_tensor_val) | |
decoder_input = tf.expand_dims([tokenizer.word_index['<start>']], 0) | |
result = [] | |
predictions = '' | |
for i in range(max_length): | |
predictions, hidden, attention_weights = decoder(decoder_input, cnn_features, hidden) | |
attention_plot[i] = tf.reshape(attention_weights, (-1,)).numpy() | |
predicted_id = tf.argmax(predictions[0]).numpy() | |
result.append(tokenizer.index_word[predicted_id]) | |
if tokenizer.index_word[predicted_id] == '<end>': | |
# return result, attention_plot, predictions | |
return result | |
decoder_input = tf.expand_dims([predicted_id], 0) | |
attention_plot = attention_plot[:len(result), :] | |
# return result, attention_plot, predictions | |
return result | |