Manish Chhetri commited on
Commit
c3b8e88
1 Parent(s): 5812c3e
LSTM/cultural_nepali_50.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7ded4445f4e9d0eaeff6ae61d2f52a30270925eac69794a974772e5404de95
3
+ size 43815912
LSTM/lstm_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea3983c78d36d3aed41962954ac55d2ff4c508f1fd2b1ed8c03af3144d241558
3
+ size 42890524
app.py CHANGED
@@ -1,7 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from flask import Flask, request, jsonify
2
+ import cv2
3
+ import numpy as np
4
+ from keras.applications import ResNet152
5
+ from keras.optimizers import Adam
6
+ from keras.models import Sequential, Model,load_model
7
+ from keras.layers import Input
8
+ from keras.layers import Dense
9
+ from keras.layers import LSTM
10
+ from keras.layers import Embedding
11
+ from keras.layers import Dropout
12
+ from keras.layers import add
13
+ from keras.utils import to_categorical
14
  import gradio as gr
15
 
 
 
16
 
17
+ from keras.preprocessing import image, sequence
18
+ import cv2
19
+ from keras_preprocessing.sequence import pad_sequences
20
+ from tqdm import tqdm
21
+ import pickle
22
+ import tensorflow as tf
23
+ # from keras.applications.Resnet50 import preprocess_input
24
+ from flask_cors import CORS
25
+
26
+ from keras.applications import ResNet50
27
+ #
28
+ # # Transformer
29
+ # from library.prediction import evaluate_single_image
30
+ # from library.transformer import Transformer
31
+ # from library.customSchedule import learning_rate
32
+
33
+ # top_k = 25000
34
+ # num_layer = 4
35
+ # d_model = 512
36
+ # dff = 2048
37
+ # num_heads = 8
38
+ # row_size = 8
39
+ # col_size = 8
40
+ # target_vocab_size = top_k + 1
41
+ # dropout_rate = 0.1
42
+
43
+
44
+ # loaded_transformer = Transformer(num_layer, d_model, num_heads, dff, row_size, col_size,
45
+ # target_vocab_size, max_pos_encoding=target_vocab_size,
46
+ # rate=dropout_rate)
47
+
48
+ # # Load the weights into the model
49
+ # loaded_transformer.load_weights('models/Transformer/model')
50
+ # # Use the loaded custom objects
51
+ # loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
52
+ # print("Trasformer model loaded successfully")
53
+ # # loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=train_loss.result(), metrics=[train_accuracy])
54
+ # global tokenizer
55
+ # with open('pickle_files/transformer/tokenizer.pickle', 'rb') as handle:
56
+ # tokenizer = pickle.load(handle)
57
+ # tokenizer.word_index['<pad>'] = 0
58
+ # tokenizer.index_word[0] = '<pad>'
59
+
60
+
61
+ # print("Tokenizer loaded successfully")
62
+
63
+ #
64
+
65
+
66
+ incept_model = ResNet152(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
67
+ last = incept_model.layers[-2].output
68
+ ResNet152Model= Model(inputs = incept_model.input,outputs = last)
69
+
70
+ with open("pickle_files/lstm/words_dict.pkl","rb") as f:
71
+ words_dict=pickle.load(f)
72
+
73
+
74
+ vocab_size = len(words_dict)+1
75
+ MAX_LEN = 192
76
+ inv_dict = {v:k for k, v in words_dict.items()}
77
+
78
+
79
+ model = tf.keras.models.load_model('LSTM/lstm_model.h5')
80
+
81
+ # inputs1 = Input(shape=(2048,))
82
+ # fe1 = Dropout(0.5)(inputs1)
83
+ # fe2 = Dense(256, activation='relu')(fe1)
84
+
85
+ # # language sequence model
86
+ # inputs2 = Input(shape=(MAX_LEN,))
87
+ # se1 = Embedding(vocab_size, MAX_LEN, mask_zero=True)(inputs2)
88
+ # se2 = Dropout(0.4)(se1)
89
+ # se3 = LSTM(256)(se2)
90
+
91
+ # # decoder model
92
+ # decoder1 = add([fe2, se3])
93
+ # decoder2 = Dense(256, activation='relu')(decoder1)
94
+ # outputs = Dense(vocab_size, activation='softmax')(decoder2)
95
+
96
+ # # tie it together [image, seq] [word]
97
+ # model = Model(inputs=[inputs1, inputs2], outputs=outputs)
98
+ # # compile model
99
+ # model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
100
+ # model.load_model("models/LSTM/cultural_nepali_50.h5")
101
+ # print("LSTM model loaded successfully")
102
+
103
+
104
+ # app = Flask(__name__)
105
+ # app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 1
106
+ # cors = CORS(app, resources={r"/*": {"origins": "*"}})
107
+ # @app.route('/')
108
+ # def index():
109
+ # return render_template('index.html')
110
+
111
+
112
+ # @app.route('/tranformer',methods=['POST'])
113
+ # def tranformer():
114
+ # if 'file' not in request.files:
115
+ # return 'No file part'
116
+
117
+ # file = request.files['file']
118
+
119
+ # if file.filename == '':
120
+ # return 'No selected file'
121
+
122
+ # # Save the file
123
+
124
+ # file.save('static/file.jpg')
125
+ # caption=evaluate_single_image("static/file.jpg",tokenizer,loaded_transformer)
126
+ # print(caption)
127
+ # return jsonify({'caption': caption})
128
+
129
+
130
+ # @app.route('/lstm', methods=['POST'])
131
+ def after(image):
132
+
133
+ # if 'file' not in request.files:
134
+ # return 'No file part'
135
+
136
+ # file = request.files['file']
137
+
138
+ # if file.filename == '':
139
+ # return 'No selected file'
140
+
141
+ # # Save the file
142
+
143
+ # file.save('static/file.jpg')
144
+
145
+ # Read the saved file
146
+ img = cv2.imread(image)
147
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
148
+ img = cv2.resize(img, (224,224))
149
+ img = img.reshape(1,224,224,3)
150
+ test_img_resized=ResNet152Model.predict(img).reshape(2048,)
151
+
152
+
153
+ text_inp = ['startofseq']
154
+ count = 0
155
+ caption = ''
156
+ while count < MAX_LEN:
157
+ count += 1
158
+ encoded = []
159
+ encoded = [words_dict.get(word, len(words_dict) - 1) for word in text_inp] # Convert words to indices, using index for '<end>' for unknown words
160
+ encoded = pad_sequences([encoded], padding='post', truncating='post', maxlen=MAX_LEN)[0] # Pad sequences
161
+
162
+ data_list = [test_img_resized.reshape(1, -1), encoded.reshape(1, -1)] # Reshape encoded
163
+ prediction = np.argmax(model.predict(data_list))
164
+ prediction = np.argmax(model.predict(data_list))
165
+ sampled_word = inv_dict[prediction]
166
+ caption = caption + ' ' + sampled_word
167
+
168
+ if sampled_word == 'endofseq':
169
+ break
170
+ text_inp.append(sampled_word)
171
+
172
+ caption= caption.replace('endofseq','')
173
+ print(caption.replace(' .','.'))
174
+
175
+ # return jsonify({'caption': caption.replace(' .','.')})
176
+ return caption.replace(' .','.')
177
+
178
+
179
+
180
+ iface = gr.Interface(fn=after, inputs="image", outputs="text")
181
+ iface.launch()
182
+ # if __name__ == "__main__":
183
+ # app.run(debug=True)
pickle_files/lstm/words_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64240c4fbb831107a070f4e101539e78ade68806582e287497c7c623640111e2
3
+ size 140742
pickle_files/transformer/tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56ce18abc08dcf20877edbb48701616635801a1c69b8673db5605101f04e623a
3
+ size 1368089