SameerR007 commited on
Commit
5e2bbb8
1 Parent(s): 1ce9e8d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ st.header("Image Captioner")
4
+ st.markdown("Building the framework may take upto a minute. Please be patient. Thank you!")
5
+ features=pickle.load(open("features.pkl","rb"))
6
+ all_captions=pickle.load(open("all_captions.pkl","rb"))
7
+ from tensorflow.keras.preprocessing.text import Tokenizer
8
+ tokenizer = Tokenizer()
9
+ tokenizer.fit_on_texts(all_captions)
10
+ vocab_size = len(tokenizer.word_index) + 1
11
+ max_length = max(len(caption.split()) for caption in all_captions)
12
+ from tensorflow import keras
13
+ model = keras.models.load_model("best_model.h5")
14
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
15
+ def idx_to_word(integer, tokenizer):
16
+ for word, index in tokenizer.word_index.items():
17
+ if index == integer:
18
+ return word
19
+ return None
20
+ # generate caption for an image
21
+ import numpy as np
22
+ def predict_caption(model, image, tokenizer, max_length):
23
+ # add start tag for generation process
24
+ in_text = 'startseq'
25
+ # iterate over the max length of sequence
26
+ for i in range(max_length):
27
+ # encode input sequence
28
+ sequence = tokenizer.texts_to_sequences([in_text])[0]
29
+ # pad the sequence
30
+ sequence = pad_sequences([sequence], max_length)
31
+ # predict next word
32
+ yhat = model.predict([image, sequence], verbose=0)
33
+ # get index with high probability
34
+ yhat = np.argmax(yhat)
35
+ # convert index to word
36
+ word = idx_to_word(yhat, tokenizer)
37
+ # stop if word not found
38
+ if word is None:
39
+ break
40
+
41
+ in_text += " " + word
42
+
43
+ if word == 'endseq':
44
+ break
45
+
46
+ return in_text
47
+ from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
48
+ from tensorflow.keras.models import Model
49
+ import pyttsx3
50
+ engine=pyttsx3.init()
51
+ vgg_model = VGG16()
52
+ vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
53
+ from tensorflow.keras.preprocessing.image import img_to_array
54
+ from PIL import Image
55
+ uploaded_image=st.file_uploader("Upload image to be captioned",type=["jpg","png","jpeg",])
56
+ image_path="bushman.jpeg"
57
+ if(uploaded_image!=None):
58
+ display_image=Image.open(uploaded_image)
59
+ st.image(display_image)
60
+ if st.button("Caption"):
61
+ display_image=display_image.resize((224,224))
62
+ image = img_to_array(display_image)
63
+ image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
64
+ image = preprocess_input(image)
65
+ feature = vgg_model.predict(image, verbose=0)
66
+ final=predict_caption(model, feature, tokenizer, max_length)
67
+ final_output=((" ").join(final.split(" ")[1:len(final.split(" "))-1]))
68
+ engine.say(final_output)
69
+ engine.runAndWait()