Spaces:

ProfRom
/

TestSpace3

Sleeping

App Files Files Community

ProfRom commited on 27 days ago

Commit

4d6c27c

verified ·

1 Parent(s): 235a4e1

Harper - Final Assignment submission

Browse files

Files changed (18) hide show

.gitattributes +15 -0
Photos/20240325_133025.jpg +3 -0
Photos/20250106_075208.jpg +3 -0
Photos/20250805_161445.jpg +3 -0
Photos/20250808_131957.jpg +3 -0
Photos/20250808_164442.jpg +3 -0
Photos/20250808_183114.jpg +3 -0
Photos/20250813_113228.jpg +3 -0
Photos/20250918_170635.jpg +3 -0
Photos/20250920_114728.jpg +3 -0
Photos/20251101_155708.jpg +3 -0
Photos/20251106_170359.jpg +3 -0
Photos/20251106_192036.jpg +3 -0
Photos/20251107_100830.jpg +3 -0
Photos/20251107_101822.jpg +3 -0
Photos/20251107_150015.jpg +3 -0
app.py +123 -33
requirements.txt +7 -3

.gitattributes CHANGED Viewed

@@ -56,3 +56,18 @@ images/6.png filter=lfs diff=lfs merge=lfs -text
 images/7.png filter=lfs diff=lfs merge=lfs -text
 images/8.png filter=lfs diff=lfs merge=lfs -text
 images/9.png filter=lfs diff=lfs merge=lfs -text

 images/7.png filter=lfs diff=lfs merge=lfs -text
 images/8.png filter=lfs diff=lfs merge=lfs -text
 images/9.png filter=lfs diff=lfs merge=lfs -text
+Photos/20240325_133025.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250106_075208.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250805_161445.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250808_131957.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250808_164442.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250808_183114.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250813_113228.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250918_170635.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20250920_114728.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20251101_155708.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20251106_170359.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20251106_192036.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20251107_100830.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20251107_101822.jpg filter=lfs diff=lfs merge=lfs -text
+Photos/20251107_150015.jpg filter=lfs diff=lfs merge=lfs -text

Photos/20240325_133025.jpg ADDED Viewed

Git LFS Details

SHA256: 43473e22b5101c66dc443854db3d918236eecb5dbab43408fc3293f1bb1d2a35
Pointer size: 132 Bytes
Size of remote file: 3.04 MB

Photos/20250106_075208.jpg ADDED Viewed

Git LFS Details

SHA256: aed82d8ec88d514b97b54c984ffaabbb731644e419986c618f9de5ac94b099c2
Pointer size: 132 Bytes
Size of remote file: 2.15 MB

Photos/20250805_161445.jpg ADDED Viewed

Git LFS Details

SHA256: 2824930ff93957a55f173e9cdf3a49029fbb03f54c5469fc602ee257be3b31e8
Pointer size: 132 Bytes
Size of remote file: 7.87 MB

Photos/20250808_131957.jpg ADDED Viewed

Git LFS Details

SHA256: 6472060645368c66a91d391ee9fe11f2a8b2fc2c80894d096c18d050e0cf9775
Pointer size: 132 Bytes
Size of remote file: 8.04 MB

Photos/20250808_164442.jpg ADDED Viewed

Git LFS Details

SHA256: e59baaf4fbcfbe4b54c8b419fcfe0c09ebf079dfe02093b03efa380846a3967a
Pointer size: 133 Bytes
Size of remote file: 10.1 MB

Photos/20250808_183114.jpg ADDED Viewed

Git LFS Details

SHA256: 8889daa1da199cfd0381551f7297f82e787141c527a0d8f8a5beaadb712cd945
Pointer size: 132 Bytes
Size of remote file: 6.06 MB

Photos/20250813_113228.jpg ADDED Viewed

Git LFS Details

SHA256: aa96808ab506c3b165c433ac96ecd4adbdfa32f9457ac7a3d6ecc522cd6b3c48
Pointer size: 132 Bytes
Size of remote file: 4.94 MB

Photos/20250918_170635.jpg ADDED Viewed

Git LFS Details

SHA256: 32da19879f8ddaa46bb993f1dfa610aa87712876d43fd5ab1863ade567a35440
Pointer size: 132 Bytes
Size of remote file: 3.59 MB

Photos/20250920_114728.jpg ADDED Viewed

Git LFS Details

SHA256: c3922a218aa9d53a2d2bf9e16b1d84e7c2843a3e1226427078f43c5736c0678b
Pointer size: 132 Bytes
Size of remote file: 8.2 MB

Photos/20251101_155708.jpg ADDED Viewed

Git LFS Details

SHA256: 301a997e6b34879e7e94dfb460c17afb62b57b1280837cd588e0f7c76017600f
Pointer size: 133 Bytes
Size of remote file: 11.9 MB

Photos/20251106_170359.jpg ADDED Viewed

Git LFS Details

SHA256: fd7638dfd63984249b985960917a2c99a4d00fe5ba37b2dd61c04376e962d5ec
Pointer size: 132 Bytes
Size of remote file: 6.29 MB

Photos/20251106_192036.jpg ADDED Viewed

Git LFS Details

SHA256: e70e7ee6d3cc39f04220acd49af2e4918476759e198432aa4fa6ebc060d3bb67
Pointer size: 132 Bytes
Size of remote file: 5.14 MB

Photos/20251107_100830.jpg ADDED Viewed

Git LFS Details

SHA256: 5baaeedc84ab3d301755467771bd0992bcb4fa9228f3af40b1d48ab462924dc4
Pointer size: 133 Bytes
Size of remote file: 10 MB

Photos/20251107_101822.jpg ADDED Viewed

Git LFS Details

SHA256: 6c0a44626bca6e76c56d4ebe0737fefc6f22b3f1f5d6d99dc289e66b2803367c
Pointer size: 132 Bytes
Size of remote file: 7.39 MB

Photos/20251107_150015.jpg ADDED Viewed

Git LFS Details

SHA256: 68e661c821415197be6a4685d00f4841ea69f6abc155d5cfbc77e5427dd870d4
Pointer size: 132 Bytes
Size of remote file: 5.17 MB

app.py CHANGED Viewed

@@ -1,41 +1,131 @@
 import gradio as gr
-from transformers import Blip2Processor, Blip2ForConditionalGeneration
-import torch
-# Load pre-trained BLIP-2 model and processor
-processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
-model = Blip2ForConditionalGeneration.from_pretrained(
-    "Salesforce/blip2-opt-2.7b",
-    torch_dtype=torch.float16
 )
-def predict(image, question=None):
-    # If no question provided, generate a caption
-    if question is None or question.strip() == "":
-        inputs = processor(image, return_tensors="pt")
-    else:
-        inputs = processor(image, question, return_tensors="pt")
-    # Move to GPU if available
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    inputs = inputs.to(device)
-    model.to(device)
-    # Generate output
-    out = model.generate(**inputs, max_new_tokens=50)
-    result = processor.decode(out[0], skip_special_tokens=True)
-    return result
-# Gradio interface
-iface = gr.Interface(
-    fn=predict,
-    inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(label="Optional Question", placeholder="Ask something about the image...")
     ],
-    outputs=gr.Textbox(label="Result"),
-    title="BLIP-2 Multimodal Assistant",
-    description="Upload an image and get a caption. Optionally, ask a question about the image."
 )
-iface.launch()

+# Caption Generator w/English-to-Spanish Translation
+# A. Harper | ARIN 460 | December 2025
+# Load into Hugging Face Space (using the Gradio Framework)
+# Include requirements.txt file (list: gradio, pandas, torch, sentencepiece, tensorflow, Image, transformers)
+# To run, navigate to the App tab. Click the red Generate button.
+# The app will randomly select image, generate (English) caption,
+    # then generate Spanish translation.
+# Import gradio - app framework
 import gradio as gr
+# Two image datasources are available.
+# Minor adjustments (add/remove # to deactivate/activate) to switch between datasources.
+# AA comments refer to images in the DataFrame / from Coco database
+# BB comments refer to images stored in local Gradio app folder
+# Import os and random to support random selection of image (from folder)
+import os
+import random
+# Import pandas datasets, transformers, torch
+import pandas as pd
+from datasets import load_dataset
+from transformers import (
+    BlipProcessor,
+    BlipForConditionalGeneration,
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    MarianMTModel,
+    MarianTokenizer
 )
+from PIL import Image
+import torch
+# AA: Load dataset. Initial image source.
+#Load dataset (henryscheible/coco_val2014_tiny)
+dataset = load_dataset("henryscheible/coco_val2014_tiny", split="validation")
+# Reduce dataset to 20 rows, i.e., get sample
+samples = dataset.select(range(20))
+#Convert to dataframe
+df = pd.DataFrame(samples)
+# BB: Direct to Photos folder
+IMAGE_FOLDER = "Photos"
+image_paths = [
+    os.path.join(IMAGE_FOLDER, f)
+    for f in os.listdir(IMAGE_FOLDER)
+    if f.lower().endswith((".jpg", ".jpeg", ".png"))
+]
+#Load the image captioning model (Salesforce/blip-image-captioning-large)
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
+#Load transformer for translating captions from English to Spanish
+model_name = "Helsinki-NLP/opus-mt-en-es"
+trans_tokenizer = MarianTokenizer.from_pretrained(model_name)
+trans_model = MarianMTModel.from_pretrained(model_name)
+#Configure captioning function
+def caption_random_image():
+    # AA: pick random row - from DF
+    ##sample = df.sample(1).iloc[0]
+    # BB: Pick a random image path - image from folder
+    img_path = random.choice(image_paths)
+    # BB: Load into PIL - image from folder - image from folder
+    image = Image.open(img_path).convert("RGB")
+    # AA: Image - for DF
+    ##image = sample["image"]
+    # Unconditional image captioning
+    inputs = processor(image, return_tensors="pt")
+    out = model.generate(**inputs)
+    caption_eng = processor.decode(out[0], skip_special_tokens=True)
+    # Translate caption from English to Spanish
+    trans_inputs = trans_tokenizer.encode(caption_eng, return_tensors="pt")
+    trans_out = trans_model.generate(trans_inputs)
+    caption_es = trans_tokenizer.decode(trans_out[0], skip_special_tokens=True)
+    return image, caption_eng, caption_es
+demo = gr.Interface(
+    fn=caption_random_image,
+    inputs=None,
+    outputs=[
+        gr.Image(type="pil", label="Random Image"),
+        gr.Textbox(label="Caption (English)"),
+        gr.Textbox(label="Caption (Spanish)")
     ],
+    title="Image Captioning (with English to Spanish translation)",
+    description="Selects a random image (from either the local folder or henryscheible/coco data subset); generates a BLIP caption; then translates the (English) caption to Spanish."
 )
+demo.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,8 @@
-gradio>=4.0
-transformers>=4.30
 torch
-pillow

+gradio
+pandas
 torch
+sentencepiece
+tensorflow
+Image
+transformers