Spaces:
Runtime error
Runtime error
import streamlit as st | |
import tensorflow as tf | |
from PIL import Image | |
import numpy as np | |
import json | |
from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input | |
from tensorflow.keras.preprocessing.image import img_to_array | |
from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.models import Model | |
from keras.models import load_model | |
# Load the .h5 model | |
model = load_model('image_caption.h5') | |
with open('tokenizer_config.json', 'r') as f: | |
tokenizer_config = json.load(f) | |
tokenizer = tokenizer_from_json(tokenizer_config) | |
# tokenizer.word_index = eval(tokenizer_config)['word_index'] | |
max_length=35 | |
# Load pre-trained model | |
vgg_model = VGG16() | |
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output) | |
# Set Streamlit configurations | |
st.set_page_config(page_title="Image Captioning App", layout="wide") | |
# Function to preprocess the input image | |
def preprocess_image(image): | |
image = image.convert("RGB") | |
image = image.resize((224, 224)) | |
image = img_to_array(image) | |
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | |
image = preprocess_input(image) | |
return image | |
# Function to make predictions on the input image | |
def predict(image): | |
image = preprocess_image(image) | |
feature = vgg_model.predict(image, verbose=0) | |
preds = predict_caption(model, feature, tokenizer, max_length) | |
preds=preds[8:-7] | |
return preds | |
def idx_word(integer,tok): | |
for word,index in tok.word_index.items(): | |
if index== integer: | |
return word | |
return None | |
def predict_caption(model,image,tok,max_len): | |
in_text="startseq" | |
for i in range(max_len): | |
seq=tok.texts_to_sequences([in_text])[0] | |
seq=pad_sequences([seq],max_len) | |
yhat = model.predict([image, seq], verbose=0) | |
yhat = np.argmax(yhat) | |
word = idx_word(yhat, tok) | |
if word is None: | |
break | |
in_text += " " + word | |
if word == 'endseq': | |
break | |
return in_text | |
# Streamlit app | |
def main(): | |
st.title("Image Captioning App") | |
st.write("Upload an image and the app will predict its class.") | |
uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"]) | |
if uploaded_image is not None: | |
image = Image.open(uploaded_image) | |
st.image(image, caption='Uploaded Image', use_column_width=True) | |
st.write("") | |
if st.button("Generate Caption"): | |
with st.spinner("Generating..."): | |
predictions = predict(image) | |
st.write(f"Top Caption:{predictions}") | |
# Run the app | |
if __name__ == "__main__": | |
main() | |