Spaces:
Sleeping
Sleeping
File size: 5,463 Bytes
c3b8e88 5812c3e c3b8e88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
# from flask import Flask, request, jsonify
import cv2
import numpy as np
from keras.applications import ResNet152
from keras.optimizers import Adam
from keras.models import Sequential, Model,load_model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers import add
from keras.utils import to_categorical
import gradio as gr
from keras.preprocessing import image, sequence
from keras_preprocessing.sequence import pad_sequences
from tqdm import tqdm
import pickle
import tensorflow as tf
# from keras.applications.Resnet50 import preprocess_input
from flask_cors import CORS
from keras.applications import ResNet50
#
# # Transformer
# from library.prediction import evaluate_single_image
# from library.transformer import Transformer
# from library.customSchedule import learning_rate
# top_k = 25000
# num_layer = 4
# d_model = 512
# dff = 2048
# num_heads = 8
# row_size = 8
# col_size = 8
# target_vocab_size = top_k + 1
# dropout_rate = 0.1
# loaded_transformer = Transformer(num_layer, d_model, num_heads, dff, row_size, col_size,
# target_vocab_size, max_pos_encoding=target_vocab_size,
# rate=dropout_rate)
# # Load the weights into the model
# loaded_transformer.load_weights('models/Transformer/model')
# # Use the loaded custom objects
# loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
# print("Trasformer model loaded successfully")
# # loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=train_loss.result(), metrics=[train_accuracy])
# global tokenizer
# with open('pickle_files/transformer/tokenizer.pickle', 'rb') as handle:
# tokenizer = pickle.load(handle)
# tokenizer.word_index['<pad>'] = 0
# tokenizer.index_word[0] = '<pad>'
# print("Tokenizer loaded successfully")
#
incept_model = ResNet152(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
last = incept_model.layers[-2].output
ResNet152Model= Model(inputs = incept_model.input,outputs = last)
with open("pickle_files/lstm/words_dict.pkl","rb") as f:
words_dict=pickle.load(f)
vocab_size = len(words_dict)+1
MAX_LEN = 192
inv_dict = {v:k for k, v in words_dict.items()}
model = tf.keras.models.load_model('LSTM/lstm_model.h5')
# inputs1 = Input(shape=(2048,))
# fe1 = Dropout(0.5)(inputs1)
# fe2 = Dense(256, activation='relu')(fe1)
# # language sequence model
# inputs2 = Input(shape=(MAX_LEN,))
# se1 = Embedding(vocab_size, MAX_LEN, mask_zero=True)(inputs2)
# se2 = Dropout(0.4)(se1)
# se3 = LSTM(256)(se2)
# # decoder model
# decoder1 = add([fe2, se3])
# decoder2 = Dense(256, activation='relu')(decoder1)
# outputs = Dense(vocab_size, activation='softmax')(decoder2)
# # tie it together [image, seq] [word]
# model = Model(inputs=[inputs1, inputs2], outputs=outputs)
# # compile model
# model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
# model.load_model("models/LSTM/cultural_nepali_50.h5")
# print("LSTM model loaded successfully")
# app = Flask(__name__)
# app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 1
# cors = CORS(app, resources={r"/*": {"origins": "*"}})
# @app.route('/')
# def index():
# return render_template('index.html')
# @app.route('/tranformer',methods=['POST'])
# def tranformer():
# if 'file' not in request.files:
# return 'No file part'
# file = request.files['file']
# if file.filename == '':
# return 'No selected file'
# # Save the file
# file.save('static/file.jpg')
# caption=evaluate_single_image("static/file.jpg",tokenizer,loaded_transformer)
# print(caption)
# return jsonify({'caption': caption})
# @app.route('/lstm', methods=['POST'])
def after(image):
# if 'file' not in request.files:
# return 'No file part'
# file = request.files['file']
# if file.filename == '':
# return 'No selected file'
# # Save the file
# file.save('static/file.jpg')
# Read the saved file
img = cv2.imread(image)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (224,224))
img = img.reshape(1,224,224,3)
test_img_resized=ResNet152Model.predict(img).reshape(2048,)
text_inp = ['startofseq']
count = 0
caption = ''
while count < MAX_LEN:
count += 1
encoded = []
encoded = [words_dict.get(word, len(words_dict) - 1) for word in text_inp] # Convert words to indices, using index for '<end>' for unknown words
encoded = pad_sequences([encoded], padding='post', truncating='post', maxlen=MAX_LEN)[0] # Pad sequences
data_list = [test_img_resized.reshape(1, -1), encoded.reshape(1, -1)] # Reshape encoded
prediction = np.argmax(model.predict(data_list))
prediction = np.argmax(model.predict(data_list))
sampled_word = inv_dict[prediction]
caption = caption + ' ' + sampled_word
if sampled_word == 'endofseq':
break
text_inp.append(sampled_word)
caption= caption.replace('endofseq','')
print(caption.replace(' .','.'))
# return jsonify({'caption': caption.replace(' .','.')})
return caption.replace(' .','.')
iface = gr.Interface(fn=after, inputs="image", outputs="text")
iface.launch()
# if __name__ == "__main__":
# app.run(debug=True) |