chhetri123's picture
Update app.py
53207fe verified
raw
history blame
5.71 kB
# from flask import Flask, request, jsonify
import cv2
import numpy as np
from keras.applications import ResNet152
from keras.optimizers import Adam
from keras.models import Sequential, Model,load_model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers import add
from keras.utils import to_categorical
import gradio as gr
from keras.preprocessing import image, sequence
from keras_preprocessing.sequence import pad_sequences
from tqdm import tqdm
import pickle
import tensorflow as tf
# from keras.applications.Resnet50 import preprocess_input
# from flask_cors import CORS/
from keras.applications import ResNet50
#
# # Transformer
# from library.prediction import evaluate_single_image
# from library.transformer import Transformer
# from library.customSchedule import learning_rate
# top_k = 25000
# num_layer = 4
# d_model = 512
# dff = 2048
# num_heads = 8
# row_size = 8
# col_size = 8
# target_vocab_size = top_k + 1
# dropout_rate = 0.1
# loaded_transformer = Transformer(num_layer, d_model, num_heads, dff, row_size, col_size,
# target_vocab_size, max_pos_encoding=target_vocab_size,
# rate=dropout_rate)
# # Load the weights into the model
# loaded_transformer.load_weights('models/Transformer/model')
# # Use the loaded custom objects
# loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
# print("Trasformer model loaded successfully")
# # loaded_transformer.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=train_loss.result(), metrics=[train_accuracy])
# global tokenizer
# with open('pickle_files/transformer/tokenizer.pickle', 'rb') as handle:
# tokenizer = pickle.load(handle)
# tokenizer.word_index['<pad>'] = 0
# tokenizer.index_word[0] = '<pad>'
# print("Tokenizer loaded successfully")
#
incept_model = ResNet152(weights='imagenet', include_top=False,input_shape=(224, 224, 3))
last = incept_model.layers[-2].output
ResNet152Model= Model(inputs = incept_model.input,outputs = last)
with open("pickle_files/lstm/words_dict.pkl","rb") as f:
words_dict=pickle.load(f)
vocab_size = len(words_dict)+1
MAX_LEN = 192
inv_dict = {v:k for k, v in words_dict.items()}
model = tf.keras.models.load_model('LSTM/lstm_model.h5')
# inputs1 = Input(shape=(2048,))
# fe1 = Dropout(0.5)(inputs1)
# fe2 = Dense(256, activation='relu')(fe1)
# # language sequence model
# inputs2 = Input(shape=(MAX_LEN,))
# se1 = Embedding(vocab_size, MAX_LEN, mask_zero=True)(inputs2)
# se2 = Dropout(0.4)(se1)
# se3 = LSTM(256)(se2)
# # decoder model
# decoder1 = add([fe2, se3])
# decoder2 = Dense(256, activation='relu')(decoder1)
# outputs = Dense(vocab_size, activation='softmax')(decoder2)
# # tie it together [image, seq] [word]
# model = Model(inputs=[inputs1, inputs2], outputs=outputs)
# # compile model
# model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
# model.load_model("models/LSTM/cultural_nepali_50.h5")
# print("LSTM model loaded successfully")
# app = Flask(__name__)
# app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 1
# cors = CORS(app, resources={r"/*": {"origins": "*"}})
# @app.route('/')
# def index():
# return render_template('index.html')
# @app.route('/tranformer',methods=['POST'])
# def tranformer():
# if 'file' not in request.files:
# return 'No file part'
# file = request.files['file']
# if file.filename == '':
# return 'No selected file'
# # Save the file
# file.save('static/file.jpg')
# caption=evaluate_single_image("static/file.jpg",tokenizer,loaded_transformer)
# print(caption)
# return jsonify({'caption': caption})
# @app.route('/lstm', methods=['POST'])
def after(image):
# if 'file' not in request.files:
# return 'No file part'
# file = request.files['file']
# if file.filename == '':
# return 'No selected file'
# # Save the file
# file.save('static/file.jpg')
# Read the saved file
print("Received image:", image)
# Convert Gradio Image object to numpy array
img_array = np.array(image)
# Ensure the image is not empty
if img_array is None:
return "Error: Empty image received."
# Perform image processing
img = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (224,224))
img = img.reshape(1,224,224,3)
test_img_resized=ResNet152Model.predict(img).reshape(,2048)
text_inp = ['startofseq']
count = 0
caption = ''
while count < MAX_LEN:
count += 1
encoded = []
encoded = [words_dict.get(word, len(words_dict) - 1) for word in text_inp] # Convert words to indices, using index for '<end>' for unknown words
encoded = pad_sequences([encoded], padding='post', truncating='post', maxlen=MAX_LEN)[0] # Pad sequences
data_list = [test_img_resized.reshape(1, -1), encoded.reshape(1, -1)] # Reshape encoded
prediction = np.argmax(model.predict(data_list))
prediction = np.argmax(model.predict(data_list))
sampled_word = inv_dict[prediction]
caption = caption + ' ' + sampled_word
if sampled_word == 'endofseq':
break
text_inp.append(sampled_word)
caption= caption.replace('endofseq','')
print(caption.replace(' .','.'))
# return jsonify({'caption': caption.replace(' .','.')})
return caption.replace(' .','.')
iface = gr.Interface(fn=after, inputs="image", outputs="text")
iface.launch()
# if __name__ == "__main__":
# app.run(debug=True)