|
import tensorflow as tf |
|
from tensorflow.keras.applications import densenet |
|
from tensorflow.keras.applications.densenet import preprocess_input |
|
from tensorflow.keras.layers import Dense, Dropout, Input, Conv2D |
|
from tensorflow.keras.models import Model |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from tqdm import tqdm |
|
import os |
|
import cv2 |
|
import tensorflow as tf |
|
import re |
|
import pickle |
|
from PIL import Image |
|
from skimage.transform import resize |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
import seaborn as sns |
|
from tqdm import tqdm |
|
import tensorflow as tf |
|
from tensorflow.keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
from sklearn.model_selection import train_test_split |
|
import time |
|
from tensorflow.keras.models import Model |
|
from tensorflow.keras.layers import Dense, LSTM, Input, Embedding, Conv2D, Concatenate, Flatten, Add, Dropout, GRU |
|
import random |
|
import datetime |
|
from nltk.translate.bleu_score import sentence_bleu |
|
|
|
|
|
|
|
def getModel(): |
|
embedding_matrix_vocab = np.load('my_embedding_matrix.npy') |
|
|
|
input1 = Input(shape=(2048), name='Image_input') |
|
dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1) |
|
|
|
input2 = Input(shape=(153), name='Text_Input') |
|
embedding_layer = Embedding(input_dim = 1427, output_dim = 300, input_length=153, mask_zero=True, trainable=False, |
|
weights=[embedding_matrix_vocab], name="Embedding_layer") |
|
emb = embedding_layer(input2) |
|
|
|
LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, |
|
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), |
|
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), |
|
bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb) |
|
|
|
|
|
LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, |
|
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), |
|
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), |
|
bias_initializer=tf.keras.initializers.zeros(), name="LSTM2") |
|
LSTM2_output = LSTM2(LSTM1) |
|
|
|
dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output) |
|
|
|
dec = tf.keras.layers.Add()([dense1, dropout1]) |
|
|
|
fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1') |
|
fc1_output = fc1(dec) |
|
|
|
dropout2 = Dropout(0.4, name='dropout2')(fc1_output) |
|
|
|
output_layer = Dense(1427, activation='softmax', name='Output_layer') |
|
output = output_layer(dropout2) |
|
|
|
encoder_decoder = Model(inputs = [input1, input2], outputs = output) |
|
|
|
encoder_decoder.load_weights("encoder_decoder_epoch_5.h5") |
|
|
|
|
|
encoder_input = encoder_decoder.input[0] |
|
encoder_output = encoder_decoder.get_layer('dense_encoder').output |
|
encoder_model = Model(encoder_input, encoder_output) |
|
|
|
|
|
text_input = encoder_decoder.input[1] |
|
enc_output = Input(shape=(256,), name='Enc_Output') |
|
text_output = encoder_decoder.get_layer('LSTM2').output |
|
add1 = tf.keras.layers.Add()([text_output, enc_output]) |
|
fc_1 = fc1(add1) |
|
decoder_output = output_layer(fc_1) |
|
|
|
decoder_model = Model(inputs = [text_input, enc_output], outputs = decoder_output) |
|
|
|
return encoder_model,decoder_model |
|
|
|
|
|
def greedysearch(image): |
|
|
|
train_data = pd.read_csv('Final_Train_Data.csv') |
|
y_train = train_data['Report'] |
|
encoder_model, decoder_model = getModel() |
|
input_ = 'startseq' |
|
image_features = encoder_model.predict(image) |
|
result = [] |
|
tokenizer = Tokenizer(filters='!"#$%&()*+,-/:;<=>?@[\]^_`{|}~\t\n') |
|
tokenizer.fit_on_texts(y_train.values) |
|
for i in range(153): |
|
input_tok = [tokenizer.word_index[w] for w in input_.split()] |
|
input_padded = pad_sequences([input_tok], 153, padding='post') |
|
predictions = decoder_model.predict([input_padded, image_features]) |
|
arg = np.argmax(predictions) |
|
if arg != 7: |
|
result.append(tokenizer.index_word[arg]) |
|
input_ = input_ + ' ' + tokenizer.index_word[arg] |
|
else: |
|
break |
|
rep = ' '.join(e for e in result) |
|
return rep |
|
|
|
|
|
def get_result(img): |
|
pre_Report = greedysearch(img) |
|
print('------------------------------------------------------------------------------------------------------') |
|
print("Predicted Report : ",pre_Report) |
|
print('------------------------------------------------------------------------------------------------------') |
|
return pre_Report |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|