Spaces:

RupamG
/

Image_Captioning_System

Sleeping

App Files Files Community

RupamG commited on Dec 29, 2025

Commit

0f0bf0d

verified ·

1 Parent(s): b7cf625

Initial upload

Browse files

Files changed (6) hide show

.gitattributes +1 -0
app.py +67 -0
requirements.txt +3 -0
trained_model.keras +3 -0
vocab.pkl +3 -0
wordtoix.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+trained_model.keras filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import gradio as gr
+import numpy as np
+import pickle
+import tensorflow as tf
+from keras.preprocessing.image import load_img, img_to_array
+from keras.applications.inception_v3 import InceptionV3, preprocess_input
+from keras.models import Model
+from keras.preprocessing.sequence import pad_sequences
+# 1. Load Resources
+model = tf.keras.models.load_model('trained_model.keras')
+with open('vocab.pkl', 'rb') as f:
+    ixtoword = pickle.load(f)
+with open('wordtoix.pkl', 'rb') as f:
+    wordtoix = pickle.load(f)
+# 2. Setup Feature Extractor (InceptionV3)
+base_model = InceptionV3(weights='imagenet')
+feature_extractor = Model(base_model.input, base_model.layers[-2].output)
+def extract_features(image):
+    # Resize and preprocess image for InceptionV3
+    image = image.resize((299, 299))
+    image = img_to_array(image)
+    image = np.expand_dims(image, axis=0)
+    image = preprocess_input(image)
+    feature = feature_extractor.predict(image)
+    return np.reshape(feature, (feature.shape[1]))
+def generate_caption(image):
+    # Get image features
+    photo = extract_features(image)
+    # Generate caption
+    in_text = 'startseq'
+    max_length = 34  # Match your training max_length
+    for i in range(max_length):
+        sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
+        sequence = pad_sequences([sequence], maxlen=max_length)[0]
+        sequence = np.expand_dims(sequence, axis=0)
+        # Predict next word
+        yhat = model.predict([np.array([photo]), sequence], verbose=0)
+        yhat = np.argmax(yhat)
+        word = ixtoword.get(yhat)
+        if word is None:
+            break
+        in_text += ' ' + word
+        if word == 'endseq':
+            break
+    final_caption = in_text.replace('startseq', '').replace('endseq', '')
+    return final_caption.strip()
+# 3. Launch Interface
+interface = gr.Interface(
+    fn=generate_caption,
+    inputs=gr.Image(type="pil"),
+    outputs="text",
+    title="Image Caption Generator",
+    description="Upload an image and the AI will describe it."
+)
+interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+tensorflow
+numpy
+pillow

trained_model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcccbc5be2d19a33b800769870301f4dbceb0cfda7e03f238b53d639a4bed38c
+size 19159581

vocab.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55bbf09864a2edd17f0f0a7af31cd138e61c42f51a40470f410dd7300c94a185
+size 19400

wordtoix.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff6611b081e72b2d55491d8768ea35d59ce925e35687ba1f382042c88e3ed096
+size 19400