Spaces:

UtkarshShivhare
/

image_cation

Runtime error

App Files Files Community

image_cation / app.py

UtkarshShivhare

Upload app.py

c3900c4 about 2 years ago

raw

history blame contribute delete

2.75 kB

	import streamlit as st
	import tensorflow as tf
	from PIL import Image
	import numpy as np
	import json
	from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input
	from tensorflow.keras.preprocessing.image import img_to_array
	from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Model

	from keras.models import load_model

	# Load the .h5 model
	model = load_model('image_caption.h5')

	with open('tokenizer_config.json', 'r') as f:
	tokenizer_config = json.load(f)
	tokenizer = tokenizer_from_json(tokenizer_config)
	# tokenizer.word_index = eval(tokenizer_config)['word_index']

	max_length=35
	# Load pre-trained model
	vgg_model = VGG16()
	vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)

	# Set Streamlit configurations
	st.set_page_config(page_title="Image Captioning App", layout="wide")


	# Function to preprocess the input image
	def preprocess_image(image):
	image = image.convert("RGB")
	image = image.resize((224, 224))
	image = img_to_array(image)
	image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
	image = preprocess_input(image)
	return image

	# Function to make predictions on the input image
	def predict(image):
	image = preprocess_image(image)
	feature = vgg_model.predict(image, verbose=0)
	preds = predict_caption(model, feature, tokenizer, max_length)
	preds=preds[8:-7]
	return preds

	def idx_word(integer,tok):
	for word,index in tok.word_index.items():
	if index== integer:
	return word
	return None

	def predict_caption(model,image,tok,max_len):
	in_text="startseq"
	for i in range(max_len):
	seq=tok.texts_to_sequences([in_text])[0]
	seq=pad_sequences([seq],max_len)
	yhat = model.predict([image, seq], verbose=0)
	yhat = np.argmax(yhat)
	word = idx_word(yhat, tok)
	if word is None:
	break
	in_text += " " + word
	if word == 'endseq':
	break
	return in_text

	# Streamlit app
	def main():
	st.title("Image Captioning App")
	st.write("Upload an image and the app will predict its class.")

	uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])

	if uploaded_image is not None:
	image = Image.open(uploaded_image)
	st.image(image, caption='Uploaded Image', use_column_width=True)
	st.write("")

	if st.button("Generate Caption"):
	with st.spinner("Generating..."):
	predictions = predict(image)

	st.write(f"Top Caption:{predictions}")

	# Run the app
	if __name__ == "__main__":
	main()