prtm commited on
Commit
bc19da9
·
1 Parent(s): f0cbae0

Upload 7 files

Browse files
Caption_it1.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ from keras.layers import *
3
+ from keras.models import *
4
+ from tensorflow.keras.models import Model
5
+ from tensorflow.keras.applications import ResNet50
6
+ from tensorflow.keras.preprocessing import image
7
+ import numpy as np
8
+ from tensorflow.keras.applications.resnet50 import preprocess_input
9
+ import pickle
10
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
11
+ from tensorflow.keras.utils import to_categorical
12
+
13
+ # %%
14
+ model=load_model('model_weights/model_19.h5')
15
+
16
+ # %%
17
+ model_temp=ResNet50(weights='imagenet',input_shape=(224,224,3))
18
+
19
+ # %%
20
+ model_resnet=Model(model_temp.input,model_temp.layers[-2].output)
21
+
22
+ # %%
23
+ def preprocess_img(img):
24
+ img=image.load_img(img,target_size=(224,224))
25
+ img=image.img_to_array(img)
26
+ img=np.expand_dims(img,axis=0)
27
+
28
+ img=preprocess_input(img)
29
+
30
+ return img
31
+
32
+ # %%
33
+ def encode_image(img):
34
+ img=preprocess_img(img)
35
+ feature_vector=model_resnet.predict(img)
36
+ feature_vector=feature_vector.reshape(1,feature_vector.shape[1])
37
+ # print(feature_vector.shape)
38
+ return feature_vector
39
+
40
+ # %%
41
+ with open('storage/word_to_idx.pkl','rb') as w2i:
42
+ word_to_idx=pickle.load(w2i)
43
+ with open('storage/idx_to_word.pkl','rb') as i2w:
44
+ idx_to_word=pickle.load(i2w)
45
+
46
+ # %%
47
+ def predict_caption(photo):
48
+ max_len=35
49
+ in_text = "startseq"
50
+ for i in range(max_len):
51
+ sequence = [word_to_idx[w] for w in in_text.split() if w in word_to_idx]
52
+ sequence = pad_sequences([sequence],maxlen=max_len,padding='post')
53
+
54
+ ypred = model.predict([photo,sequence])
55
+ ypred = ypred.argmax() #WOrd with max prob always - Greedy Sampling
56
+ word = idx_to_word[ypred]
57
+ in_text += (' ' + word)
58
+
59
+ if word == "endseq":
60
+ break
61
+
62
+ final_caption = in_text.split()[1:-1]
63
+ final_caption = ' '.join(final_caption)
64
+ return final_caption
65
+
66
+ # %%
67
+ def caption_this_image(image):
68
+ enc=encode_image(image)
69
+ caption=predict_caption(enc)
70
+
71
+ return caption
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, redirect, request
2
+
3
+ import Caption_it1
4
+
5
+ app=Flask(__name__)
6
+
7
+ @app.route('/')
8
+ def hello():
9
+ return render_template('index.html')
10
+
11
+ @app.route('/',methods=['POST'])
12
+ def marks():
13
+ if request.method=='POST':
14
+ f=request.files['userfile']
15
+ path='static/{}'.format(f.filename)
16
+ f.save(path)
17
+
18
+ caption=Caption_it1.caption_this_image(path)
19
+
20
+ result_dic={
21
+ 'image':path,
22
+ 'caption':caption
23
+ }
24
+
25
+ return render_template('index.html',your_result=result_dic)
26
+
27
+ if __name__=='__main__':
28
+ app.run(debug=True)
captions.txt ADDED
The diff for this file is too large to render. See raw diff
 
descriptions.txt ADDED
The diff for this file is too large to render. See raw diff
 
encoded_test_features.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac8012adb9e24405560b28392d70ea4b6dd34222a937b060352a7e838163c80
3
+ size 8251187
encoded_train_features.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb805645a60e973b859a64cbda1f634b9d797352dca6169305b2b75477f0d40
3
+ size 49506244
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Flask==2.3.3
2
+ keras==2.10.0
3
+ numpy==1.25.2
4
+ tensorflow==2.10.0