Spaces:
Sleeping
Sleeping
Manish Chhetri
commited on
Commit
•
c3b8e88
1
Parent(s):
5812c3e
test
Browse files- LSTM/cultural_nepali_50.h5 +3 -0
- LSTM/lstm_model.h5 +3 -0
- app.py +180 -4
- pickle_files/lstm/words_dict.pkl +3 -0
- pickle_files/transformer/tokenizer.pickle +3 -0
LSTM/cultural_nepali_50.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf7ded4445f4e9d0eaeff6ae61d2f52a30270925eac69794a974772e5404de95
|
3 |
+
size 43815912
|
LSTM/lstm_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea3983c78d36d3aed41962954ac55d2ff4c508f1fd2b1ed8c03af3144d241558
|
3 |
+
size 42890524
|
app.py
CHANGED
@@ -1,7 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
def greet(name):
|
4 |
-
return "Hello " + name + "!!"
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from flask import Flask, request, jsonify
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from keras.applications import ResNet152
|
5 |
+
from keras.optimizers import Adam
|
6 |
+
from keras.models import Sequential, Model,load_model
|
7 |
+
from keras.layers import Input
|
8 |
+
from keras.layers import Dense
|
9 |
+
from keras.layers import LSTM
|
10 |
+
from keras.layers import Embedding
|
11 |
+
from keras.layers import Dropout
|
12 |
+
from keras.layers import add
|
13 |
+
from keras.utils import to_categorical
|
14 |
import gradio as gr
|
15 |
|
|
|
|
|
16 |
|
17 |
+
from keras.preprocessing import image, sequence
|
18 |
+
import cv2
|
19 |
+
from keras_preprocessing.sequence import pad_sequences
|
20 |
+
from tqdm import tqdm
|
21 |
+
import pickle
|
22 |
+
import tensorflow as tf
|
23 |
+
# from keras.applications.Resnet50 import preprocess_input
|
24 |
+
from flask_cors import CORS
|
25 |
+
|
26 |
+
from keras.applications import ResNet50
|
27 |
+
#
|
28 |
+
# # Transformer
|
29 |
+
# from library.prediction import evaluate_single_image
|
30 |
+
# from library.transformer import Transformer
|
31 |
+
# from library.customSchedule import learning_rate
|
32 |
+
|
33 |
+
# top_k = 25000
|
34 |
+
# num_layer = 4
|
35 |
+
# d_model = 512
|
36 |
+
# dff = 2048
|
37 |
+
# num_heads = 8
|
38 |
+
# row_size = 8
|
39 |
+
# col_size = 8
|
40 |
+
# target_vocab_size = top_k + 1
|
41 |
+
# dropout_rate = 0.1
|
42 |
+
|
43 |
+
|
44 |
+
# loaded_transformer = Transformer(num_layer, d_model, num_heads, dff, row_size, col_size,
|
45 |
+
# target_vocab_size, max_pos_encoding=target_vocab_size,
|
46 |
+
# rate=dropout_rate)
|
47 |
+
|
48 |
+
# # Load the weights into the model
|
49 |
+
# loaded_transformer.load_weights('models/Transformer/model')
|
50 |
+
# # Use the loaded custom objects
|
51 |
+
# loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
|
52 |
+
# print("Trasformer model loaded successfully")
|
53 |
+
# # loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=train_loss.result(), metrics=[train_accuracy])
|
54 |
+
# global tokenizer
|
55 |
+
# with open('pickle_files/transformer/tokenizer.pickle', 'rb') as handle:
|
56 |
+
# tokenizer = pickle.load(handle)
|
57 |
+
# tokenizer.word_index['<pad>'] = 0
|
58 |
+
# tokenizer.index_word[0] = '<pad>'
|
59 |
+
|
60 |
+
|
61 |
+
# print("Tokenizer loaded successfully")
|
62 |
+
|
63 |
+
#
|
64 |
+
|
65 |
+
|
66 |
+
incept_model = ResNet152(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
|
67 |
+
last = incept_model.layers[-2].output
|
68 |
+
ResNet152Model= Model(inputs = incept_model.input,outputs = last)
|
69 |
+
|
70 |
+
with open("pickle_files/lstm/words_dict.pkl","rb") as f:
|
71 |
+
words_dict=pickle.load(f)
|
72 |
+
|
73 |
+
|
74 |
+
vocab_size = len(words_dict)+1
|
75 |
+
MAX_LEN = 192
|
76 |
+
inv_dict = {v:k for k, v in words_dict.items()}
|
77 |
+
|
78 |
+
|
79 |
+
model = tf.keras.models.load_model('LSTM/lstm_model.h5')
|
80 |
+
|
81 |
+
# inputs1 = Input(shape=(2048,))
|
82 |
+
# fe1 = Dropout(0.5)(inputs1)
|
83 |
+
# fe2 = Dense(256, activation='relu')(fe1)
|
84 |
+
|
85 |
+
# # language sequence model
|
86 |
+
# inputs2 = Input(shape=(MAX_LEN,))
|
87 |
+
# se1 = Embedding(vocab_size, MAX_LEN, mask_zero=True)(inputs2)
|
88 |
+
# se2 = Dropout(0.4)(se1)
|
89 |
+
# se3 = LSTM(256)(se2)
|
90 |
+
|
91 |
+
# # decoder model
|
92 |
+
# decoder1 = add([fe2, se3])
|
93 |
+
# decoder2 = Dense(256, activation='relu')(decoder1)
|
94 |
+
# outputs = Dense(vocab_size, activation='softmax')(decoder2)
|
95 |
+
|
96 |
+
# # tie it together [image, seq] [word]
|
97 |
+
# model = Model(inputs=[inputs1, inputs2], outputs=outputs)
|
98 |
+
# # compile model
|
99 |
+
# model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
|
100 |
+
# model.load_model("models/LSTM/cultural_nepali_50.h5")
|
101 |
+
# print("LSTM model loaded successfully")
|
102 |
+
|
103 |
+
|
104 |
+
# app = Flask(__name__)
|
105 |
+
# app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 1
|
106 |
+
# cors = CORS(app, resources={r"/*": {"origins": "*"}})
|
107 |
+
# @app.route('/')
|
108 |
+
# def index():
|
109 |
+
# return render_template('index.html')
|
110 |
+
|
111 |
+
|
112 |
+
# @app.route('/tranformer',methods=['POST'])
|
113 |
+
# def tranformer():
|
114 |
+
# if 'file' not in request.files:
|
115 |
+
# return 'No file part'
|
116 |
+
|
117 |
+
# file = request.files['file']
|
118 |
+
|
119 |
+
# if file.filename == '':
|
120 |
+
# return 'No selected file'
|
121 |
+
|
122 |
+
# # Save the file
|
123 |
+
|
124 |
+
# file.save('static/file.jpg')
|
125 |
+
# caption=evaluate_single_image("static/file.jpg",tokenizer,loaded_transformer)
|
126 |
+
# print(caption)
|
127 |
+
# return jsonify({'caption': caption})
|
128 |
+
|
129 |
+
|
130 |
+
# @app.route('/lstm', methods=['POST'])
|
131 |
+
def after(image):
|
132 |
+
|
133 |
+
# if 'file' not in request.files:
|
134 |
+
# return 'No file part'
|
135 |
+
|
136 |
+
# file = request.files['file']
|
137 |
+
|
138 |
+
# if file.filename == '':
|
139 |
+
# return 'No selected file'
|
140 |
+
|
141 |
+
# # Save the file
|
142 |
+
|
143 |
+
# file.save('static/file.jpg')
|
144 |
+
|
145 |
+
# Read the saved file
|
146 |
+
img = cv2.imread(image)
|
147 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
148 |
+
img = cv2.resize(img, (224,224))
|
149 |
+
img = img.reshape(1,224,224,3)
|
150 |
+
test_img_resized=ResNet152Model.predict(img).reshape(2048,)
|
151 |
+
|
152 |
+
|
153 |
+
text_inp = ['startofseq']
|
154 |
+
count = 0
|
155 |
+
caption = ''
|
156 |
+
while count < MAX_LEN:
|
157 |
+
count += 1
|
158 |
+
encoded = []
|
159 |
+
encoded = [words_dict.get(word, len(words_dict) - 1) for word in text_inp] # Convert words to indices, using index for '<end>' for unknown words
|
160 |
+
encoded = pad_sequences([encoded], padding='post', truncating='post', maxlen=MAX_LEN)[0] # Pad sequences
|
161 |
+
|
162 |
+
data_list = [test_img_resized.reshape(1, -1), encoded.reshape(1, -1)] # Reshape encoded
|
163 |
+
prediction = np.argmax(model.predict(data_list))
|
164 |
+
prediction = np.argmax(model.predict(data_list))
|
165 |
+
sampled_word = inv_dict[prediction]
|
166 |
+
caption = caption + ' ' + sampled_word
|
167 |
+
|
168 |
+
if sampled_word == 'endofseq':
|
169 |
+
break
|
170 |
+
text_inp.append(sampled_word)
|
171 |
+
|
172 |
+
caption= caption.replace('endofseq','')
|
173 |
+
print(caption.replace(' .','.'))
|
174 |
+
|
175 |
+
# return jsonify({'caption': caption.replace(' .','.')})
|
176 |
+
return caption.replace(' .','.')
|
177 |
+
|
178 |
+
|
179 |
+
|
180 |
+
iface = gr.Interface(fn=after, inputs="image", outputs="text")
|
181 |
+
iface.launch()
|
182 |
+
# if __name__ == "__main__":
|
183 |
+
# app.run(debug=True)
|
pickle_files/lstm/words_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64240c4fbb831107a070f4e101539e78ade68806582e287497c7c623640111e2
|
3 |
+
size 140742
|
pickle_files/transformer/tokenizer.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56ce18abc08dcf20877edbb48701616635801a1c69b8673db5605101f04e623a
|
3 |
+
size 1368089
|