Spaces:

nishantguvvada
/

Image-Captioning

Sleeping

App Files Files Community

nishantguvvada commited on Dec 26, 2023

Commit

f2edbd4

1 Parent(s): 8a0a303

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -69

app.py CHANGED Viewed

@@ -37,74 +37,15 @@ def load_image_model():
     image_model=tf.keras.models.load_model('./image_caption_model.h5')
     return image_model
-# @st.cache_resource()
-# def load_decoder_model():
-#     decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
-#     return decoder_model
-# @st.cache_resource()
-# def load_encoder_model():
-#     encoder=tf.keras.models.load_model('./encoder_model.h5')
-#     return encoder
-# InceptionResNetV2 takes (299, 299, 3) image as inputs
-# and return features in (8, 8, 1536) shape
-FEATURE_EXTRACTOR = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(
-    include_top=False, weights="imagenet"
-)
-# **** ENCODER ****
-image_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
-image_features = FEATURE_EXTRACTOR(image_input)
-x = Reshape((FEATURES_SHAPE[0] * FEATURES_SHAPE[1], FEATURES_SHAPE[2]))(
-    image_features
-)
-encoder_output = Dense(ATTENTION_DIM, activation="relu")(x)
-encoder = tf.keras.Model(inputs=image_input, outputs=encoder_output)
-# **** ENCODER ****
-# **** DECODER ****
-word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
-embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
-decoder_gru = GRU(
-    ATTENTION_DIM,
-    return_sequences=True,
-    return_state=True,
-)
-gru_output, gru_state = decoder_gru(embed_x)
-decoder_attention = Attention()
-context_vector = decoder_attention([gru_output, encoder_output])
-addition = Add()([gru_output, context_vector])
-layer_norm = LayerNormalization(axis=-1)
-layer_norm_out = layer_norm(addition)
-decoder_output_dense = Dense(VOCAB_SIZE)
-# -----------
-gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
-# Reuse trained GRU, but update it so that it can receive states.
-gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
-# Reuse other layers as well
-context_vector = decoder_attention([gru_output, encoder_output])
-addition_output = Add()([gru_output, context_vector])
-layer_norm_output = layer_norm(addition_output)
-decoder_output = decoder_output_dense(layer_norm_output)
-# Define prediction Model with state input and output
-decoder_pred_model = tf.keras.Model(
-    inputs=[word_input, gru_state_input, encoder_output],
-    outputs=[decoder_output, gru_state],
-)
-# **** DECODER ****
 st.title(":blue[Nishant Guvvada's] :red[AI Journey]  Image Caption Generation")
@@ -148,11 +89,11 @@ def predict_caption(file):
     resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
     img = resize/255
-    # encoder = load_encoder_model()
     features = encoder(tf.expand_dims(img, axis=0))
     dec_input = tf.expand_dims([word_to_index("<start>")], 1)
     result = []
-    # decoder_pred_model = load_decoder_model()
     for i in range(MAX_CAPTION_LEN):
         predictions, gru_state = decoder_pred_model(
             [dec_input, gru_state, features]

     image_model=tf.keras.models.load_model('./image_caption_model.h5')
     return image_model
+@st.cache_resource()
+def load_decoder_model():
+    decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
+    return decoder_model
+@st.cache_resource()
+def load_encoder_model():
+    encoder=tf.keras.models.load_model('./encoder_model.h5')
+    return encoder
 st.title(":blue[Nishant Guvvada's] :red[AI Journey]  Image Caption Generation")
     resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
     img = resize/255
+    encoder = load_encoder_model()
     features = encoder(tf.expand_dims(img, axis=0))
     dec_input = tf.expand_dims([word_to_index("<start>")], 1)
     result = []
+    decoder_pred_model = load_decoder_model()
     for i in range(MAX_CAPTION_LEN):
         predictions, gru_state = decoder_pred_model(
             [dec_input, gru_state, features]