Spaces:

stephenebert
/

Image_Tagger

Sleeping

App Files Files Community

stephenebert commited on Aug 13, 2025

Commit

cd3cf65

verified ·

1 Parent(s): 1f1ea35

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +23 -0
app.py +53 -0
main.py +63 -0
requirements.txt +7 -0
tagger.py +91 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    HF_HOME=/root/.cache/huggingface \
+    TRANSFORMERS_CACHE=/root/.cache/huggingface/transformers
+WORKDIR /app
+# system basics (tiny)
+RUN apt-get update && apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# pre-download tiny NLTK bits so first request is warm
+RUN python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger')"
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from fastapi import FastAPI, File, HTTPException, Query, UploadFile
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from typing import List
+from pathlib import Path
+from PIL import Image
+import io, json
+from tagger import tag_pil_image, CAP_TAG_DIR
+app = FastAPI(title="Image Tagger API", version="0.3.0")
+class TagOut(BaseModel):
+    filename: str
+    caption: str
+    tags: List[str]
+@app.get("/healthz")
+def healthz():
+    return {"ok": True}
+@app.post("/upload", response_model=TagOut)
+async def upload(
+    file: UploadFile = File(..., description="PNG or JPEG image"),
+    top_k: int = Query(5, ge=1, le=20, description="Maximum number of tags"),
+    nouns: bool = Query(True, description="Include noun tags"),
+    adjs: bool = Query(True, description="Include adjective tags"),
+    verbs: bool = Query(True, description="Include verb tags"),
+):
+    if file.content_type not in {"image/png", "image/jpeg"}:
+        raise HTTPException(415, "Only PNG or JPEG supported")
+    try:
+        data = await file.read()
+        img = Image.open(io.BytesIO(data)).convert("RGB")
+    except Exception:
+        raise HTTPException(400, "Could not decode image")
+    stem = Path(file.filename).stem or "upload"
+    tags = tag_pil_image(
+        img, stem,
+        top_k=top_k, keep_nouns=nouns, keep_adjs=adjs, keep_verbs=verbs
+    )
+    caption = ""
+    meta = CAP_TAG_DIR / f"{stem}.json"
+    if meta.exists():
+        try:
+            caption = json.loads(meta.read_text())["caption"]
+        except Exception:
+            pass
+    return JSONResponse({"filename": file.filename, "caption": caption, "tags": tags})

main.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from fastapi import FastAPI, File, HTTPException, Query, UploadFile
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from pathlib import Path
+from typing import List
+import io, json
+from PIL import Image
+from .tagger import tag_pil_image
+app = FastAPI(title="Image Tagger API", version="0.3.0")
+class TagOut(BaseModel):
+    filename: str
+    caption: str
+    tags: List[str]
+@app.get("/healthz")
+def healthz():
+    return {"ok": True}
+@app.post("/upload", response_model=TagOut)
+async def upload(
+    file: UploadFile = File(..., description="PNG or JPEG image"),
+    top_k: int = Query(5, ge=1, le=20, description="Maximum number of tags"),
+    nouns: bool = Query(True, description="Include noun tags"),
+    adjs: bool = Query(True, description="Include adjective tags"),
+    verbs: bool = Query(True, description="Include verb tags"),
+):
+    if file.content_type not in {"image/png", "image/jpeg"}:
+        raise HTTPException(415, "Only PNG or JPEG supported")
+    try:
+        data = await file.read()
+        img = Image.open(io.BytesIO(data)).convert("RGB")
+    except:
+        raise HTTPException(400, "Could not decode image")
+    stem = Path(file.filename).stem or "upload"
+    tags = tag_pil_image(
+        img,
+        stem,
+        top_k=top_k,
+        keep_nouns=nouns,
+        keep_adjs=adjs,
+        keep_verbs=verbs,
+    )
+    # pull the caption back out of the side-car JSON
+    caption = ""
+    meta = Path.home() / "Desktop" / "image_tags" / f"{stem}.json"
+    if meta.exists():
+        try:
+            caption = json.loads(meta.read_text())["caption"]
+        except:
+            pass
+    return JSONResponse(
+        {"filename": file.filename, "caption": caption, "tags": tags}
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.111.0
+uvicorn[standard]==0.30.0
+pillow>=9.5
+transformers>=4.41
+torch>=2.2
+nltk>=3.8
+pydantic>=2.7

tagger.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from __future__ import annotations
+import datetime as _dt
+import json as _json
+import pathlib as _pl
+import re as _re
+import sys as _sys
+from typing import List
+import nltk
+from PIL import Image
+from transformers import BlipForConditionalGeneration, BlipProcessor
+# ─── ensure punkt + perceptron tagger are downloaded ──────────────────────────
+for res, subdir in [
+    ("punkt", "tokenizers"),
+    ("averaged_perceptron_tagger", "taggers"),
+]:
+    try:
+        nltk.data.find(f"{subdir}/{res}")
+    except LookupError:
+        nltk.download(res, quiet=True)
+# ─── where we dump the caption+tags JSON sidecars ──────────────────────────────
+CAP_TAG_DIR = _pl.Path.home() / "Desktop" / "image_tags"
+CAP_TAG_DIR.mkdir(exist_ok=True, parents=True)
+# ─── load the BLIP model once ──────────────────────────────────────────────────
+_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+_model     = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# ─── allowed POS prefixes ──────────────────────────────────────────────────────
+_POS = {"nouns": ("NN",), "adjs": ("JJ",), "verbs": ("VB",)}
+def _caption_to_tags(
+    caption: str,
+    k: int,
+    keep_nouns: bool,
+    keep_adjs: bool,
+    keep_verbs: bool,
+) -> List[str]:
+    from nltk.tokenize import wordpunct_tokenize
+    allowed = []
+    if keep_nouns: allowed += _POS["nouns"]
+    if keep_adjs:  allowed += _POS["adjs"]
+    if keep_verbs: allowed += _POS["verbs"]
+    seen, out = set(), []
+    for w, pos in nltk.pos_tag(wordpunct_tokenize(caption.lower())):
+        if any(pos.startswith(pref) for pref in allowed):
+            clean = _re.sub(r"[^a-z0-9-]", "", w)
+            if clean and clean not in seen:
+                out.append(clean)
+                seen.add(clean)
+                if len(out) >= k:
+                    break
+    return out
+def tag_pil_image(
+    img: Image.Image,
+    stem: str,
+    *,
+    top_k: int = 5,
+    keep_nouns: bool = True,
+    keep_adjs: bool = True,
+    keep_verbs: bool = True,
+) -> List[str]:
+    # 1) generate caption
+    ids = _model.generate(**_processor(images=img, return_tensors="pt"), max_length=30)
+    caption = _processor.decode(ids[0], skip_special_tokens=True)
+    # 2) extract tags
+    tags = _caption_to_tags(caption, top_k, keep_nouns, keep_adjs, keep_verbs)
+    # 3) persist side-car JSON for main.py to read back
+    payload = {
+        "caption": caption,
+        "tags": tags,
+        "timestamp": _dt.datetime.now(_dt.timezone.utc).isoformat(),
+    }
+    (_p := CAP_TAG_DIR / f"{stem}.json").write_text(_json.dumps(payload, indent=2))
+    return tags
+if __name__ == "__main__":
+    if len(_sys.argv) < 2:
+        _sys.exit("Usage: python tagger.py <image_path> [top_k]")
+    path = _pl.Path(_sys.argv[1])
+    if not path.exists():
+        _sys.exit(f"File not found: {path}")
+    k = int(_sys.argv[2]) if len(_sys.argv) > 2 else 5
+    with Image.open(path).convert("RGB") as im:
+        print("tags:", ", ".join(tag_pil_image(im, path.stem, top_k=k)))