Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from tensorflow.keras.models import load_model | |
| from tensorflow.keras.preprocessing.text import Tokenizer | |
| from tensorflow.keras.preprocessing.sequence import pad_sequences | |
| from tensorflow.keras.applications.vgg16 import preprocess_input | |
| from tensorflow.keras.applications.vgg16 import VGG16 | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.preprocessing.image import load_img, img_to_array | |
| import numpy as np | |
| from PIL import Image | |
| from pickle import load | |
| # Load tokenizer | |
| tokenizer = load(open('tokenizer1.pkl', 'rb')) | |
| max_len = 34 | |
| # Load image captioning model | |
| model = load_model('model_18.h5') | |
| # Load VGG16 model for feature extraction | |
| vgg_model = VGG16() | |
| vgg_model.layers.pop() | |
| vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output) | |
| # Function to map an integer to a word | |
| def word_for_id(integer, tokenizer): | |
| for word, index in tokenizer.word_index.items(): | |
| if index == integer: | |
| return word | |
| return None | |
| # Function to generate image caption | |
| def generate_caption(model, tokenizer, photo, max_length): | |
| # Seed the generation process | |
| in_text = 'startseq' | |
| # Iterate over the whole length of the sequence | |
| for i in range(max_length): | |
| # Integer encode input sequence | |
| sequence = tokenizer.texts_to_sequences([in_text])[0] | |
| # Pad input | |
| sequence = pad_sequences([sequence], maxlen=max_length) | |
| # Predict next word | |
| yhat = model.predict([photo, sequence], verbose=0) | |
| # Convert probability to integer | |
| yhat = np.argmax(yhat) | |
| # Map integer to word | |
| word = word_for_id(yhat, tokenizer) | |
| # Stop if we cannot map the word | |
| if word is None: | |
| break | |
| # Append as input for generating the next word | |
| in_text += ' ' + word | |
| # Stop if we predict the end of the sequence | |
| if word == 'endseq': | |
| break | |
| return in_text | |
| # Function to extract image features | |
| def extract_features(filename): | |
| # Load the photo | |
| image = load_img(filename, target_size=(224, 224)) | |
| # Convert the image pixels to a numpy array | |
| image = img_to_array(image) | |
| # Reshape data for the model | |
| image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) | |
| # Prepare the image for the VGG model | |
| image = preprocess_input(image) | |
| # Get features | |
| feature = vgg_model.predict(image, verbose=0) | |
| return feature | |
| # Remove start and end sequence tokens from the generated caption | |
| def remove_start_end_tokens(caption): | |
| stopwords = ['startseq', 'endseq'] | |
| querywords = caption.split() | |
| resultwords = [word for word in querywords if word.lower() not in stopwords] | |
| result = ' '.join(resultwords) | |
| return result | |
| def main(): | |
| st.set_page_config(page_title="Image Captioning", page_icon="π·") | |
| st.title("Image Captioning") | |
| st.markdown("Upload an image and get a caption for it.") | |
| # File uploader | |
| uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"]) | |
| if uploaded_file is not None: | |
| # Display uploaded image | |
| image = Image.open(uploaded_file) | |
| resized_image = image.resize((400, 400)) | |
| st.image(resized_image, caption='Uploaded Image') | |
| # Extract image features | |
| photo = extract_features(uploaded_file) | |
| # Generate image caption | |
| if st.button("Generate Caption"): | |
| with st.spinner("Generating caption..."): | |
| description = generate_caption(model, tokenizer, photo, max_len) | |
| # Remove start and end sequence tokens from the caption | |
| caption = remove_start_end_tokens(description) | |
| # Display caption | |
| st.subheader(" Generated Caption") | |
| st.markdown("---") | |
| st.markdown(f"<p style='font-size: 18px; text-align: center;'>{caption}</p>", unsafe_allow_html=True) | |
| st.markdown("---") | |
| if __name__ == '__main__': | |
| main() | |