Spaces:
Sleeping
Sleeping
nishantguvvada
commited on
Commit
·
f2edbd4
1
Parent(s):
8a0a303
Update app.py
Browse files
app.py
CHANGED
@@ -37,74 +37,15 @@ def load_image_model():
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
# @st.cache_resource()
|
46 |
-
# def load_encoder_model():
|
47 |
-
# encoder=tf.keras.models.load_model('./encoder_model.h5')
|
48 |
-
# return encoder
|
49 |
-
|
50 |
-
# InceptionResNetV2 takes (299, 299, 3) image as inputs
|
51 |
-
# and return features in (8, 8, 1536) shape
|
52 |
-
FEATURE_EXTRACTOR = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(
|
53 |
-
include_top=False, weights="imagenet"
|
54 |
-
)
|
55 |
-
|
56 |
-
# **** ENCODER ****
|
57 |
-
image_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
|
58 |
-
image_features = FEATURE_EXTRACTOR(image_input)
|
59 |
-
x = Reshape((FEATURES_SHAPE[0] * FEATURES_SHAPE[1], FEATURES_SHAPE[2]))(
|
60 |
-
image_features
|
61 |
-
)
|
62 |
-
encoder_output = Dense(ATTENTION_DIM, activation="relu")(x)
|
63 |
-
encoder = tf.keras.Model(inputs=image_input, outputs=encoder_output)
|
64 |
-
# **** ENCODER ****
|
65 |
-
|
66 |
-
|
67 |
-
# **** DECODER ****
|
68 |
-
|
69 |
-
word_input = Input(shape=(MAX_CAPTION_LEN), name="words")
|
70 |
-
embed_x = Embedding(VOCAB_SIZE, ATTENTION_DIM)(word_input)
|
71 |
-
|
72 |
-
decoder_gru = GRU(
|
73 |
-
ATTENTION_DIM,
|
74 |
-
return_sequences=True,
|
75 |
-
return_state=True,
|
76 |
-
)
|
77 |
-
|
78 |
-
gru_output, gru_state = decoder_gru(embed_x)
|
79 |
-
|
80 |
-
decoder_attention = Attention()
|
81 |
-
context_vector = decoder_attention([gru_output, encoder_output])
|
82 |
-
addition = Add()([gru_output, context_vector])
|
83 |
-
|
84 |
-
layer_norm = LayerNormalization(axis=-1)
|
85 |
-
layer_norm_out = layer_norm(addition)
|
86 |
-
|
87 |
-
decoder_output_dense = Dense(VOCAB_SIZE)
|
88 |
-
|
89 |
-
# -----------
|
90 |
-
gru_state_input = Input(shape=(ATTENTION_DIM), name="gru_state_input")
|
91 |
-
|
92 |
-
# Reuse trained GRU, but update it so that it can receive states.
|
93 |
-
gru_output, gru_state = decoder_gru(embed_x, initial_state=gru_state_input)
|
94 |
-
|
95 |
-
# Reuse other layers as well
|
96 |
-
context_vector = decoder_attention([gru_output, encoder_output])
|
97 |
-
addition_output = Add()([gru_output, context_vector])
|
98 |
-
layer_norm_output = layer_norm(addition_output)
|
99 |
-
|
100 |
-
decoder_output = decoder_output_dense(layer_norm_output)
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
)
|
107 |
-
# **** DECODER ****
|
108 |
|
109 |
|
110 |
st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
|
@@ -148,11 +89,11 @@ def predict_caption(file):
|
|
148 |
resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
|
149 |
img = resize/255
|
150 |
|
151 |
-
|
152 |
features = encoder(tf.expand_dims(img, axis=0))
|
153 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
154 |
result = []
|
155 |
-
|
156 |
for i in range(MAX_CAPTION_LEN):
|
157 |
predictions, gru_state = decoder_pred_model(
|
158 |
[dec_input, gru_state, features]
|
|
|
37 |
image_model=tf.keras.models.load_model('./image_caption_model.h5')
|
38 |
return image_model
|
39 |
|
40 |
+
@st.cache_resource()
|
41 |
+
def load_decoder_model():
|
42 |
+
decoder_model=tf.keras.models.load_model('./decoder_pred_model.h5')
|
43 |
+
return decoder_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
@st.cache_resource()
|
46 |
+
def load_encoder_model():
|
47 |
+
encoder=tf.keras.models.load_model('./encoder_model.h5')
|
48 |
+
return encoder
|
|
|
|
|
49 |
|
50 |
|
51 |
st.title(":blue[Nishant Guvvada's] :red[AI Journey] Image Caption Generation")
|
|
|
89 |
resize = tf.image.resize(image, (IMG_HEIGHT, IMG_WIDTH))
|
90 |
img = resize/255
|
91 |
|
92 |
+
encoder = load_encoder_model()
|
93 |
features = encoder(tf.expand_dims(img, axis=0))
|
94 |
dec_input = tf.expand_dims([word_to_index("<start>")], 1)
|
95 |
result = []
|
96 |
+
decoder_pred_model = load_decoder_model()
|
97 |
for i in range(MAX_CAPTION_LEN):
|
98 |
predictions, gru_state = decoder_pred_model(
|
99 |
[dec_input, gru_state, features]
|