Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import tensorflow as tf | |
| from PIL import Image | |
| import numpy as np | |
| import json | |
| from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input | |
| from tensorflow.keras.preprocessing.image import img_to_array | |
| from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.models import Model | |
| from keras.models import load_model | |
| # Load the .h5 model | |
| model = load_model('image_caption.h5') | |
| with open('tokenizer_config.json', 'r') as f: | |
| tokenizer_config = json.load(f) | |
| tokenizer = tokenizer_from_json(tokenizer_config) | |
| # tokenizer.word_index = eval(tokenizer_config)['word_index'] | |
| max_length=35 | |
| # Load pre-trained model | |
| vgg_model = VGG16() | |
| vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output) | |
| # Set Streamlit configurations | |
| st.set_page_config(page_title="Image Captioning App", layout="wide") | |
| # Function to preprocess the input image | |
| def preprocess_image(image): | |
| image = image.convert("RGB") | |
| image = image.resize((224, 224)) | |
| image = img_to_array(image) | |
| image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | |
| image = preprocess_input(image) | |
| return image | |
| # Function to make predictions on the input image | |
| def predict(image): | |
| image = preprocess_image(image) | |
| feature = vgg_model.predict(image, verbose=0) | |
| preds = predict_caption(model, feature, tokenizer, max_length) | |
| preds=preds[8:-7] | |
| return preds | |
| def idx_word(integer,tok): | |
| for word,index in tok.word_index.items(): | |
| if index== integer: | |
| return word | |
| return None | |
| def predict_caption(model,image,tok,max_len): | |
| in_text="startseq" | |
| for i in range(max_len): | |
| seq=tok.texts_to_sequences([in_text])[0] | |
| seq=pad_sequences([seq],max_len) | |
| yhat = model.predict([image, seq], verbose=0) | |
| yhat = np.argmax(yhat) | |
| word = idx_word(yhat, tok) | |
| if word is None: | |
| break | |
| in_text += " " + word | |
| if word == 'endseq': | |
| break | |
| return in_text | |
| # Streamlit app | |
| def main(): | |
| st.title("Image Captioning App") | |
| st.write("Upload an image and the app will predict its class.") | |
| uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"]) | |
| if uploaded_image is not None: | |
| image = Image.open(uploaded_image) | |
| st.image(image, caption='Uploaded Image', use_column_width=True) | |
| st.write("") | |
| if st.button("Generate Caption"): | |
| with st.spinner("Generating..."): | |
| predictions = predict(image) | |
| st.write(f"Top Caption:{predictions}") | |
| # Run the app | |
| if __name__ == "__main__": | |
| main() | |