Spaces:

johntony366
/

image-caption-generator

Sleeping

App Files Files Community

johntony366 commited on Jul 15

Commit

4f2700e

1 Parent(s): 42246b0

add application files

Browse files

Files changed (8) hide show

.gitattributes +2 -0
.gitignore +1 -0
app.py +95 -0
example1.jpg +0 -0
example2.jpg +0 -0
models/image-caption-model.keras +3 -0
requirements.txt +4 -0
tokenizer.p +3 -0

.gitattributes CHANGED Viewed

@@ -5,7 +5,9 @@
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text

 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
+*.p filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
+*.keras filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from keras.applications.xception import Xception
+from keras.models import load_model
+from pickle import load
+import numpy as np
+from PIL import Image
+import os
+import zipfile
+# --- Model and Tokenizer Loading ---
+# MODIFIED extract_features function: It now accepts a PIL Image object directly
+def extract_features(image_obj, model):
+    # Resize the image to the required dimensions
+    image = image_obj.resize((299, 299))
+    image = np.array(image)
+    # Handle RGBA images (4 channels) by converting to RGB (3 channels)
+    if image.shape[2] == 4:
+        image = image[..., :3]
+    # Preprocess the image for the Xception model
+    image = np.expand_dims(image, axis=0)
+    image = image / 127.5
+    image = image - 1.0
+    # Extract features
+    feature = model.predict(image, verbose=0)
+    return feature
+def word_for_id(integer, tokenizer):
+    for word, index in tokenizer.word_index.items():
+        if index == integer:
+            return word
+    return None
+def generate_desc(model, tokenizer, photo, max_length):
+    in_text = 'start'
+    for i in range(max_length):
+        sequence = tokenizer.texts_to_sequences([in_text])[0]
+        sequence = pad_sequences([sequence], maxlen=max_length, padding='post')
+        pred = model.predict([photo, sequence], verbose=0)
+        pred = np.argmax(pred)
+        word = word_for_id(pred, tokenizer)
+        if word is None:
+            break
+        in_text += ' ' + word
+        if word == 'end':
+            break
+    # Clean up the output string
+    final_desc = in_text.split()
+    final_desc = final_desc[1:-1] # Remove 'start' and 'end'
+    final_desc = ' '.join(final_desc)
+    return final_desc
+# --- Load Model and Tokenizer ---
+if not os.path.exists('models'):
+    print("Models directory not found. Extracting models.zip...")
+    with zipfile.ZipFile("models.zip", 'r') as zip_ref:
+        zip_ref.extractall(".")
+    print("Extraction complete.")
+max_length = 34
+tokenizer = load(open("tokenizer.p", "rb"))
+xception_model = Xception(include_top=False, pooling="avg")
+# Use a loaded model from the notebook
+# This assumes you have a model file named 'image-caption-model.keras' in the 'models' directory
+model = load_model('models/image-caption-model.keras')
+# --- Gradio Interface ---
+# MODIFIED predict function: It now passes the PIL image object directly
+def predict(image):
+    # The 'image' argument is now a PIL Image object
+    photo = extract_features(image, xception_model)
+    description = generate_desc(model, tokenizer, photo, max_length)
+    return description
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.Textbox(label="Generated Caption"),
+    title="Image Caption Generator",
+    description="Upload an image and see the generated caption.",
+    examples=[
+        ["example1.jpg"],
+        ["example2.jpg"],
+    ]
+)
+if __name__ == '__main__':
+    iface.launch()

example1.jpg ADDED Viewed

example2.jpg ADDED Viewed

models/image-caption-model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8d7f2da7253524fa437bec55fc3a7579fe93a4a244b19af8550ea843f4aa2d1
+size 20053970

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+tensorflow
+pillow
+numpy

tokenizer.p ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b4cdabcc91588e1aa60e9fff30e6e4b785f4d661f2d17e7952fcdece1d5610b
+size 299505