RupamG commited on
Commit
0f0bf0d
·
verified ·
1 Parent(s): b7cf625

Initial upload

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. app.py +67 -0
  3. requirements.txt +3 -0
  4. trained_model.keras +3 -0
  5. vocab.pkl +3 -0
  6. wordtoix.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ trained_model.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import pickle
4
+ import tensorflow as tf
5
+ from keras.preprocessing.image import load_img, img_to_array
6
+ from keras.applications.inception_v3 import InceptionV3, preprocess_input
7
+ from keras.models import Model
8
+ from keras.preprocessing.sequence import pad_sequences
9
+
10
+ # 1. Load Resources
11
+ model = tf.keras.models.load_model('trained_model.keras')
12
+ with open('vocab.pkl', 'rb') as f:
13
+ ixtoword = pickle.load(f)
14
+ with open('wordtoix.pkl', 'rb') as f:
15
+ wordtoix = pickle.load(f)
16
+
17
+ # 2. Setup Feature Extractor (InceptionV3)
18
+ base_model = InceptionV3(weights='imagenet')
19
+ feature_extractor = Model(base_model.input, base_model.layers[-2].output)
20
+
21
+ def extract_features(image):
22
+ # Resize and preprocess image for InceptionV3
23
+ image = image.resize((299, 299))
24
+ image = img_to_array(image)
25
+ image = np.expand_dims(image, axis=0)
26
+ image = preprocess_input(image)
27
+ feature = feature_extractor.predict(image)
28
+ return np.reshape(feature, (feature.shape[1]))
29
+
30
+ def generate_caption(image):
31
+ # Get image features
32
+ photo = extract_features(image)
33
+
34
+ # Generate caption
35
+ in_text = 'startseq'
36
+ max_length = 34 # Match your training max_length
37
+
38
+ for i in range(max_length):
39
+ sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
40
+ sequence = pad_sequences([sequence], maxlen=max_length)[0]
41
+ sequence = np.expand_dims(sequence, axis=0)
42
+
43
+ # Predict next word
44
+ yhat = model.predict([np.array([photo]), sequence], verbose=0)
45
+ yhat = np.argmax(yhat)
46
+ word = ixtoword.get(yhat)
47
+
48
+ if word is None:
49
+ break
50
+
51
+ in_text += ' ' + word
52
+ if word == 'endseq':
53
+ break
54
+
55
+ final_caption = in_text.replace('startseq', '').replace('endseq', '')
56
+ return final_caption.strip()
57
+
58
+ # 3. Launch Interface
59
+ interface = gr.Interface(
60
+ fn=generate_caption,
61
+ inputs=gr.Image(type="pil"),
62
+ outputs="text",
63
+ title="Image Caption Generator",
64
+ description="Upload an image and the AI will describe it."
65
+ )
66
+
67
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ tensorflow
2
+ numpy
3
+ pillow
trained_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcccbc5be2d19a33b800769870301f4dbceb0cfda7e03f238b53d639a4bed38c
3
+ size 19159581
vocab.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55bbf09864a2edd17f0f0a7af31cd138e61c42f51a40470f410dd7300c94a185
3
+ size 19400
wordtoix.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6611b081e72b2d55491d8768ea35d59ce925e35687ba1f382042c88e3ed096
3
+ size 19400