ImageCaptioner / app.py
Mr-Vicky-01's picture
Update app.py
e55aa90 verified
raw
history blame contribute delete
No virus
3.73 kB
import gradio as gr
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.models import model_from_json
from keras.optimizers import Adam
from PIL import Image
# load vgg16 model
pre_trained = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model
pre_trained.trainable = False
x = tf.keras.layers.GlobalAveragePooling2D()(pre_trained.output)
# restructure the model
pre_trained_model = Model(inputs=pre_trained.input, outputs=x)
###########################################################################################################
# model = tf.keras.models.load_model("image_captioning_30k_model.h5")
# Load model architecture
with open("30k_model_architecture.json", "r") as json_file:
loaded_model_json = json_file.read()
# Create the optimizer without specifying the learning rate
optimizer = Adam()
# Set the learning rate separately
optimizer.learning_rate.assign(0.001)
# Load weights
model = model_from_json(loaded_model_json)
model.load_weights("30k_model_weights.h5")
# Load optimizer state
model.compile(optimizer=optimizer, loss='categorical_crossentropy')
###########################################################################################################
tokenizer = Tokenizer()
with open("Image_Captioner_tokenizer_30k.pkl", "rb") as f:
tokenizer = pickle.load(f)
def idx_to_word(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# generate caption for an image
def predict_caption(model, image, tokenizer, max_length):
# add start tag for generation process
in_text = 'startseq'
# iterate over the max length of sequence
for i in range(max_length):
# encode input sequence
sequence = tokenizer.texts_to_sequences([in_text])[0]
# pad the sequence
sequence = pad_sequences([sequence], max_length)
# predict next word
yhat = model.predict([image, sequence], verbose=0)
# get index with high probability
yhat = np.argmax(yhat)
# convert index to word
word = idx_to_word(yhat, tokenizer)
# stop if word not found
if word is None:
break
# append word as input for generating next word
in_text += " " + word
# stop if we reach end tag
if word == 'endseq':
break
cut_text = ' '.join(in_text.split()[1:-1])
return cut_text
def google_image_testing(inp):
# Convert input into jpg file
input_image = Image.fromarray(inp)
input_image.save("input_image.jpg")
# Load input Image
image_path = 'input_image.jpg'
image = load_img(image_path, target_size=(224, 224))
# convert image pixels to numpy array
image = img_to_array(image)
# reshape data for model
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# preprocess image for vgg
image = preprocess_input(image)
# extract features
img_feature = pre_trained_model.predict(image, verbose=0)
# predict the caption
predicted = predict_caption(model, img_feature, tokenizer, max_length=74)
return predicted
demo = gr.Interface(fn=google_image_testing, inputs='image',outputs='text',title='Image Captioner')
demo.launch(debug=True,share=True)