File size: 2,745 Bytes
9b5c5c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import streamlit as st
import tensorflow as tf
from PIL import Image
import numpy as np
import json
from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model

from keras.models import load_model

# Load the .h5 model
model = load_model('image_caption.h5')

with open('tokenizer_config.json', 'r') as f:
    tokenizer_config = json.load(f)
tokenizer = tokenizer_from_json(tokenizer_config)
# tokenizer.word_index = eval(tokenizer_config)['word_index']

max_length=35
# Load pre-trained model
vgg_model = VGG16()
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)

# Set Streamlit configurations
st.set_page_config(page_title="Image Captioning App", layout="wide")


# Function to preprocess the input image
def preprocess_image(image):
    image = image.convert("RGB")
    image = image.resize((224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)
    return image

# Function to make predictions on the input image
def predict(image):
    image = preprocess_image(image)
    feature = vgg_model.predict(image, verbose=0)
    preds = predict_caption(model, feature, tokenizer, max_length)
    preds=preds[8:-7]
    return preds

def idx_word(integer,tok):
    for word,index in tok.word_index.items():
        if index== integer:
            return word
    return None

def predict_caption(model,image,tok,max_len):
    in_text="startseq"
    for i in range(max_len):
        seq=tok.texts_to_sequences([in_text])[0]
        seq=pad_sequences([seq],max_len)
        yhat = model.predict([image, seq], verbose=0)
        yhat = np.argmax(yhat)
        word = idx_word(yhat, tok)
        if word is None:
            break
        in_text += " " + word
        if word == 'endseq':
            break
    return in_text

# Streamlit app
def main():
    st.title("Image Captioning App")
    st.write("Upload an image and the app will predict its class.")

    uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])

    if uploaded_image is not None:
        image = Image.open(uploaded_image)
        st.image(image, caption='Uploaded Image', use_column_width=True)
        st.write("")

        if st.button("Generate Caption"):
            with st.spinner("Generating..."):
                predictions = predict(image)

            st.write(f"Top Caption:{predictions}")

# Run the app
if __name__ == "__main__":
    main()