File size: 2,367 Bytes
212dfee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
082d039
c3886ee
212dfee
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
from PIL import Image

with open('tokenizer.pkl', 'rb') as handle:
    tokenizer = pickle.load(handle)
feature_model=tf.keras.models.load_model('feature_model.keras')
model = tf.keras.models.load_model('best_model_inceptionv3.keras')
def idx_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

def predict_caption(image):
    

    in_text = 'startseq'
    # iterate over the max length of sequence
    for i in range(35):
        # encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad the sequence
        sequence = tf.keras.preprocessing.sequence.pad_sequences([sequence], 35, padding='post')
        # predict next word
        yhat = model.predict([image, sequence], verbose=0)
        # get index with high probability
        yhat = np.argmax(yhat)
        # convert index to word
        word = idx_to_word(yhat, tokenizer)
        # stop if word not found
        if word is None:
            break
        # append word as input for generating next word
        in_text += " " + word
        # stop if we reach end tag
        if word == 'endseq':
            break
      
    return in_text
    
def generate_caption(image):
    print(image)
    # image = tf.keras.preprocessing.image.load_img(image_path, target_size=(299, 299))
    # image = tf.keras.preprocessing.image.img_to_array(image)
    # image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = image.resize((299, 299))
    image_array = tf.keras.preprocessing.image.img_to_array(image)
    image_array = image_array.reshape((1, 299, 299, 3))
    image = tf.keras.applications.inception_v3.preprocess_input(image_array)
    feature = feature_model.predict(image, verbose=0)
    caption = predict_caption(feature)
    return caption

gr.Interface(fn=generate_caption,
             inputs=gr.Image(label='Upload a photo',type="pil"),
             outputs=gr.Label(label='Caption Genrerated'),
             examples=['1015584366.jpg','1028205764_7e8df9a2ea.jpg','1024138940_f1fefbdce1.jpg','108899015_bf36131a57.jpg'],
             title='Image Caption Generator',
             theme='dark'
             ).launch(share=True)