File size: 3,269 Bytes
5ee6650
0d7f4b7
4d0572e
0d7f4b7
 
 
 
 
 
 
cd6f682
0d7f4b7
4d0572e
a94700e
4d0572e
 
a94700e
4d0572e
 
 
 
 
 
56e3f4f
cd6f682
4d0572e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a94700e
4d0572e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a94700e
4d0572e
 
 
5ee6650
4d0572e
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
__author__ = 'Taneem Jan, improved the old model through pretrained Auto-encoders'

import keras
from keras.layers import Input, Dense, Dropout, RepeatVector, LSTM, concatenate, Flatten
from keras.models import Sequential, Model
from tensorflow.keras.optimizers import RMSprop
from keras import *
from .Config import *
from .AModel import *
from .autoencoder_image import *
import os


class Main_Model(AModel):
    def __init__(self, input_shape, output_size, output_path):
        AModel.__init__(self, input_shape, output_size, output_path)
        self.name = "Main_Model"

        visual_input = Input(shape=input_shape)

        # Load the pre-trained autoencoder model
        autoencoder_model = autoencoder_image(input_shape, input_shape, output_path)
        autoencoder_model.load('autoencoder')
        path = "classes/model/bin/"
        path_to_autoencoder = "{}autoencoder.h5".format(path)
        autoencoder_model.model.load_weights(path_to_autoencoder)
        # Get only the model up to the encoded part
        hidden_layer_model_freeze = Model(
            inputs=autoencoder_model.model.input,
            outputs=autoencoder_model.model.get_layer('encoded_layer').output
        )
        hidden_layer_input = hidden_layer_model_freeze(visual_input)

        # Additional layers before concatenation
        hidden_layer_model = Flatten()(hidden_layer_input)
        hidden_layer_model = Dense(1024, activation='relu')(hidden_layer_model)
        hidden_layer_model = Dropout(0.3)(hidden_layer_model)
        hidden_layer_model = Dense(1024, activation='relu')(hidden_layer_model)
        hidden_layer_model = Dropout(0.3)(hidden_layer_model)
        hidden_layer_result = RepeatVector(CONTEXT_LENGTH)(hidden_layer_model)

        # Making sure the loaded hidden_layer_model_freeze will no longer be updated
        for layer in hidden_layer_model_freeze.layers:
            layer.trainable = False

        # The same language model that of pix2code by Tony Beltramelli
        language_model = Sequential()
        language_model.add(LSTM(128, return_sequences=True, input_shape=(CONTEXT_LENGTH, output_size)))
        language_model.add(LSTM(128, return_sequences=True))

        textual_input = Input(shape=(CONTEXT_LENGTH, output_size))
        encoded_text = language_model(textual_input)

        decoder = concatenate([hidden_layer_result, encoded_text])

        decoder = LSTM(512, return_sequences=True)(decoder)
        decoder = LSTM(512, return_sequences=False)(decoder)
        decoder = Dense(output_size, activation='softmax')(decoder)

        self.model = Model(inputs=[visual_input, textual_input], outputs=decoder)

        optimizer = RMSprop(learning_rate=0.0001, clipvalue=1.0)
        self.model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    def fit_generator(self, generator, steps_per_epoch):
        # self.model.summary()
        self.model.fit_generator(generator, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
        self.save()

    def predict(self, image, partial_caption):
        return self.model.predict([image, partial_caption], verbose=0)[0]

    def predict_batch(self, images, partial_captions):
        return self.model.predict([images, partial_captions], verbose=1)