nishantguvvada commited on
Commit
f2edbd4
·
1 Parent(s): 8a0a303

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -69
app.py CHANGED
@@ -37,74 +37,15 @@ def load_image_model():
37
  image_model=tf.keras.models.load_model('./image_caption_model.h5')
38
  return image_model
39
 
40
- # @st.cache_resource()
41
- # def load_decoder_model():
42
- # decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
43
- # return decoder_model
44
-
45
- # @st.cache_resource()
46
- # def load_encoder_model():
47
- # encoder=tf.keras.models.load_model('./encoder_model.h5')
48
- # return encoder
49
-
50
- # InceptionResNetV2 takes (299, 299, 3) image as inputs
51
- # and return features in (8, 8, 1536) shape
52
- FEATURE_EXTRACTOR = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(
53
- include_top=False, weights="imagenet"
54
- )
55
-
56
- # **** ENCODER ****
57
- image_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
58
- image_features = FEATURE_EXTRACTOR(image_input)
59
- x = Reshape((FEATURES_SHAPE[0] * FEATURES_SHAPE[1], FEATURES_SHAPE[2]))(
60
- image_features
61
- )
62
- encoder_output = Dense(ATTENTION_DIM, activation="relu")(x)
63
- encoder = tf.keras.Model(inputs=image_input, outputs=encoder_output)
64
- # **** ENCODER ****
65
-
66
-
67
- # **** DECODER ****
68
-
69
- word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
70
- embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
71
-
72
- decoder_gru = GRU(
73
- ATTENTION_DIM,
74
- return_sequences=True,
75
- return_state=True,
76
- )
77
-
78
- gru_output, gru_state = decoder_gru(embed_x)
79
-
80
- decoder_attention = Attention()
81
- context_vector = decoder_attention([gru_output, encoder_output])
82
- addition = Add()([gru_output, context_vector])
83
-
84
- layer_norm = LayerNormalization(axis=-1)
85
- layer_norm_out = layer_norm(addition)
86
-
87
- decoder_output_dense = Dense(VOCAB_SIZE)
88
-
89
- # -----------
90
- gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
91
-
92
- # Reuse trained GRU, but update it so that it can receive states.
93
- gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
94
-
95
- # Reuse other layers as well
96
- context_vector = decoder_attention([gru_output, encoder_output])
97
- addition_output = Add()([gru_output, context_vector])
98
- layer_norm_output = layer_norm(addition_output)
99
-
100
- decoder_output = decoder_output_dense(layer_norm_output)
101
 
102
- # Define prediction Model with state input and output
103
- decoder_pred_model = tf.keras.Model(
104
- inputs=[word_input, gru_state_input, encoder_output],
105
- outputs=[decoder_output, gru_state],
106
- )
107
- # **** DECODER ****
108
 
109
 
110
  st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
@@ -148,11 +89,11 @@ def predict_caption(file):
148
  resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
149
  img = resize/255
150
 
151
- # encoder = load_encoder_model()
152
  features = encoder(tf.expand_dims(img, axis=0))
153
  dec_input = tf.expand_dims([word_to_index("<start>")], 1)
154
  result = []
155
- # decoder_pred_model = load_decoder_model()
156
  for i in range(MAX_CAPTION_LEN):
157
  predictions, gru_state = decoder_pred_model(
158
  [dec_input, gru_state, features]
 
37
  image_model=tf.keras.models.load_model('./image_caption_model.h5')
38
  return image_model
39
 
40
+ @st.cache_resource()
41
+ def load_decoder_model():
42
+ decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
43
+ return decoder_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ @st.cache_resource()
46
+ def load_encoder_model():
47
+ encoder=tf.keras.models.load_model('./encoder_model.h5')
48
+ return encoder
 
 
49
 
50
 
51
  st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
 
89
  resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
90
  img = resize/255
91
 
92
+ encoder = load_encoder_model()
93
  features = encoder(tf.expand_dims(img, axis=0))
94
  dec_input = tf.expand_dims([word_to_index("<start>")], 1)
95
  result = []
96
+ decoder_pred_model = load_decoder_model()
97
  for i in range(MAX_CAPTION_LEN):
98
  predictions, gru_state = decoder_pred_model(
99
  [dec_input, gru_state, features]