eye_for_blind / inference_script.py
krishnapal2308
pipeline to manual
7301eb7
raw
history blame
4.68 kB
import numpy as np
import tensorflow as tf
import keras
from keras.models import Model
import warnings
warnings.filterwarnings('ignore')
class Encoder(Model):
def __init__(self, embed_dim):
super(Encoder, self).__init__()
self.dense = tf.keras.layers.Dense(embed_dim)
def call(self, features):
features = self.dense(features)
features = tf.keras.activations.relu(features)
return features
class Attention_model(Model):
def __init__(self, units):
super(Attention_model, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
self.units = units
def call(self, features, hidden):
hidden_with_time_axis = hidden[:, tf.newaxis]
score = tf.keras.activations.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
attention_weights = tf.keras.activations.softmax(self.V(score), axis=1)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class Decoder(Model):
def __init__(self, embed_dim, units, vocab_size):
super(Decoder, self).__init__()
self.units = units
self.attention = Attention_model(self.units)
self.embed = tf.keras.layers.Embedding(vocab_size, embed_dim)
self.gru = tf.keras.layers.GRU(self.units, return_sequences=True, return_state=True,
recurrent_initializer='glorot_uniform')
self.d1 = tf.keras.layers.Dense(self.units)
self.d2 = tf.keras.layers.Dense(vocab_size)
def call(self, x, features, hidden):
context_vector, attention_weights = self.attention(features, hidden)
embed = self.embed(x)
embed = tf.concat([tf.expand_dims(context_vector, 1), embed], axis=-1)
output, state = self.gru(embed)
output = self.d1(output)
output = tf.reshape(output, (-1, output.shape[2]))
output = self.d2(output)
return output, state, attention_weights
def init_state(self, batch_size):
return tf.zeros((batch_size, self.units))
def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units))
# Loading the tokenizer
with open("efb-requirements/tokenizer.json", 'r', encoding='utf-8') as f:
loaded_tokenizer_json = f.read()
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json)
def load_and_process_image(image, target_size=(299, 299)):
img = tf.convert_to_tensor(image)
img = tf.cast(img, tf.uint8)
img = tf.image.resize(img, target_size)
img = tf.keras.applications.inception_v3.preprocess_input(img)
return img
image_features_extract_model = keras.models.load_model("efb-requirements/inception_v3.h5")
embedding_dim = 256
units = 512
vocab_size = 5001
encoder = Encoder(embedding_dim)
decoder = Decoder(embedding_dim, units, vocab_size)
# Creating dummy inputs
dummy_img_input = tf.ones((32, 64, 2048))
features = encoder(dummy_img_input)
hidden = decoder.init_state(32)
dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * 32, 1)
dec = decoder(dec_input, features, hidden)
# Loading saved models
encoder.load_weights("efb-requirements/encoder_50epoch_weights.h5")
decoder.load_weights("efb-requirements/decoder_50epoch_weights.h5")
def evaluate(image):
max_length = 39
attention_plot = np.zeros((max_length, 64))
hidden = decoder.reset_state(batch_size=1)
# processing the input image to desired format before extracting features
temp_input = tf.expand_dims(load_and_process_image(image), 0)
img_tensor_val = image_features_extract_model(temp_input)
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
cnn_features = encoder(img_tensor_val)
decoder_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
result = []
predictions = ''
for i in range(max_length):
predictions, hidden, attention_weights = decoder(decoder_input, cnn_features, hidden)
attention_plot[i] = tf.reshape(attention_weights, (-1,)).numpy()
predicted_id = tf.argmax(predictions[0]).numpy()
result.append(tokenizer.index_word[predicted_id])
if tokenizer.index_word[predicted_id] == '<end>':
# return result, attention_plot, predictions
return result
decoder_input = tf.expand_dims([predicted_id], 0)
attention_plot = attention_plot[:len(result), :]
# return result, attention_plot, predictions
return result