Spaces:

Metal3d
/

auto-labelizer

Running on Zero

App Files Files Community

Metal3d commited on 5 days ago

Commit

0b97d29

1 Parent(s): c4c02ca

Make the project adapted to Huggingface

Browse files

Files changed (6) hide show

README.md +64 -0
app.py +3 -1
labelizer/__init__.py +17 -21
pdm.lock +37 -1
pyproject.toml +1 -1
requirements.txt +84 -0

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+title: Labelizer - AI Image Labeling Tool
+emoji: 🖼️
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 6.0.2
+app_file: app.py
+pinned: false
+license: mit
+duplicated_from: metal3d/labelm2
+---
+# 🖼️ Labelizer - AI Image Labeling Tool
+An intelligent image labeling tool that uses Florence-2 vision-language model to automatically generate detailed descriptions for your images. Perfect for creating labeled datasets for machine learning projects.
+## ✨ Features
+- 🤖 **AI-Powered Labeling**: Uses advanced Florence-2 model for accurate image descriptions
+- 📊 **Batch Processing**: Label multiple images at once with progress tracking
+- ✏️ **Manual Editing**: Edit generated labels to fit your specific needs
+- 📦 **Flexible Export**: Download datasets with organized folder structure or flat format
+- 🎨 **User-Friendly Interface**: Clean, intuitive Gradio interface with emoji-enhanced navigation
+## 🚀 How to Use
+1. **Upload Images**: Click "📁 Upload images" to select multiple image files
+2. **Generate Labels**:
+   - Click "✨ Generate label" below individual images
+   - Or click "🏷️ Labelize all images" for batch processing
+3. **Review & Edit**: Modify any generated labels as needed
+4. **Download**: Create and download your labeled dataset as a ZIP file
+## 🛠️ Technical Details
+- **Model**: Florence-2-large-hf for vision-language understanding
+- **Framework**: Gradio with ZeroGPU support
+- **Supported Formats**: JPG, PNG, GIF, BMP, TIFF, WebP
+- **Export Options**: Organized folders (images/ + labels/) or flat structure
+## 📋 Supported Tasks
+The tool supports various captioning tasks:
+- `<MORE_DETAILED_CAPTION>`: Comprehensive image descriptions
+- `<DETAILED_CAPTION>`: Detailed but concise descriptions
+- `<CAPTION>`: Basic image captions
+## 🎯 Use Cases
+- **Machine Learning**: Create labeled datasets for computer vision tasks
+- **Content Management**: Organize image collections with descriptions
+- **Accessibility**: Generate alt-text for images
+- **Research**: Prepare datasets for academic projects
+## ⚡ Performance
+- Optimized for GPU acceleration with ZeroGPU
+- Efficient batch processing for large datasets
+- Lazy loading to minimize resource usage
+---
+Built with ❤️ using [Gradio](https://gradio.app/) and [Florence-2](https://huggingface.co/microsoft/Florence-2-large)

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from click import progressbar
 import gradio as gr
 from PIL import Image
 from dataset import ImageDataset
@@ -8,6 +8,7 @@ from packager import create_dataset_zip
 from documentation import DOC_CONTENT
 def auto_label(
     image: Image.Image, imid: int, dataset: ImageDataset
 ) -> tuple[str, ImageDataset]:
@@ -34,6 +35,7 @@ def uploaded(files: list, current_dataset: ImageDataset | None) -> ImageDataset:
     return current_dataset.add_images(files)
 def labelize_all_images(
     dataset: ImageDataset, label: str, progress=gr.Progress(True)
 ) -> tuple[ImageDataset, str]:

 import gradio as gr
+import spaces
 from PIL import Image
 from dataset import ImageDataset
 from documentation import DOC_CONTENT
+@spaces.GPU
 def auto_label(
     image: Image.Image, imid: int, dataset: ImageDataset
 ) -> tuple[str, ImageDataset]:
     return current_dataset.add_images(files)
+@spaces.GPU
 def labelize_all_images(
     dataset: ImageDataset, label: str, progress=gr.Progress(True)
 ) -> tuple[ImageDataset, str]:

labelizer/__init__.py CHANGED Viewed

@@ -1,34 +1,28 @@
 import torch
 from PIL import Image
-from transformers import Florence2ForConditionalGeneration, Florence2Processor
 MODEL_ID = "ducviet00/Florence-2-large-hf"
 # Global variables for lazy loading
 _model = None
 _processor = None
-_device = None
-_torch_dtype = None
 def _load_model():
     """Load model and processor lazily"""
-    global _model, _processor, _device, _torch_dtype
     if _model is None:
-        _device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        _torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-        print(f"Loading model {MODEL_ID} on {_device} with dtype {_torch_dtype}...")
-        _model = Florence2ForConditionalGeneration.from_pretrained(
-            MODEL_ID, torch_dtype=_torch_dtype, trust_remote_code=True
-        ).to(_device)  # type: ignore
-        _processor = Florence2Processor.from_pretrained(
-            MODEL_ID, trust_remote_code=True
-        )
         print("Model loaded successfully!")
-    return _model, _processor, _device, _torch_dtype
 def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
@@ -41,7 +35,7 @@ def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
     """
     # Lazy load model only when needed
-    model, processor, device, torch_dtype = _load_model()
     if text_input is None:
         prompt = task_prompt
     else:
@@ -53,11 +47,13 @@ def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
     if processor is None:
         raise ValueError("processor is None")
-    inputs = processor(
-        text=prompt,
-        images=image,
-        return_tensors="pt",  # type: ignore
-    ).to(device, torch_dtype)
     generated_ids = model.generate(
         input_ids=inputs["input_ids"],

 import torch
 from PIL import Image
+from transformers import AutoProcessor, AutoModelForCausalLM
 MODEL_ID = "ducviet00/Florence-2-large-hf"
 # Global variables for lazy loading
 _model = None
 _processor = None
 def _load_model():
     """Load model and processor lazily"""
+    global _model, _processor
     if _model is None:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        print(f"Loading model {MODEL_ID} on {device}...")
+        _model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
+        _processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
         print("Model loaded successfully!")
+    return _model, _processor
 def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
     """
     # Lazy load model only when needed
+    model, processor = _load_model()
     if text_input is None:
         prompt = task_prompt
     else:
     if processor is None:
         raise ValueError("processor is None")
+    # Process inputs using the correct API
+    inputs = processor(text=prompt, images=image, return_tensors="pt")
+    # Move inputs to device if model is on CUDA
+    device = next(model.parameters()).device
+    inputs = {k: v.to(device) for k, v in inputs.items()}
     generated_ids = model.generate(
         input_ids=inputs["input_ids"],

pdm.lock CHANGED Viewed

@@ -5,7 +5,7 @@
 groups = ["default"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:ca1f4cf6df24c991a826baf1788242e48e5c757fc53700048e3bea57b9608ca0"
 [[metadata.targets]]
 requires_python = "==3.12.*"
@@ -747,6 +747,22 @@ files = [
     {file = "pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353"},
 ]
 [[package]]
 name = "pydantic"
 version = "2.12.4"
@@ -1006,6 +1022,26 @@ files = [
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
 ]
 [[package]]
 name = "starlette"
 version = "0.50.0"

 groups = ["default"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
+content_hash = "sha256:e71cb6d555d7581ecbf7bd5fc66a31343a04b3c46215175380aae49b346ae57d"
 [[metadata.targets]]
 requires_python = "==3.12.*"
     {file = "pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353"},
 ]
+[[package]]
+name = "psutil"
+version = "5.9.8"
+requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+summary = "Cross-platform lib for process and system monitoring in Python."
+groups = ["default"]
+files = [
+    {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"},
+    {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"},
+    {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"},
+    {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"},
+    {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"},
+    {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"},
+    {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"},
+]
 [[package]]
 name = "pydantic"
 version = "2.12.4"
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
 ]
+[[package]]
+name = "spaces"
+version = "0.44.0"
+requires_python = "<3.14,>=3.10"
+summary = "Utilities for Hugging Face Spaces"
+groups = ["default"]
+dependencies = [
+    "gradio",
+    "httpx>=0.20",
+    "packaging",
+    "psutil<6,>=2",
+    "pydantic<3,>=1",
+    "requests<3.0,>=2.19",
+    "typing-extensions<5,>=4",
+]
+files = [
+    {file = "spaces-0.44.0-py3-none-any.whl", hash = "sha256:7992b57883fff65f2ead2daa52b70b51a786631e5c83cd4586198b9e75cc97b1"},
+    {file = "spaces-0.44.0.tar.gz", hash = "sha256:22603e5eb45c8aa51ef7431530a0721052c80a2331249f84b781a7f1dbb1343b"},
+]
 [[package]]
 name = "starlette"
 version = "0.50.0"

pyproject.toml CHANGED Viewed

@@ -5,7 +5,7 @@ description = "Default template for PDM package"
 authors = [
     {name = "Patrice Ferlet", email = "metal3d@gmail.com"},
 ]
-dependencies = ["gradio>=6.0.2", "torch>=2.9.1", "transformers>=4.57.3", "pillow>=12.0.0"]
 requires-python = "==3.12.*"
 readme = "README.md"
 license = {text = "MIT"}

 authors = [
     {name = "Patrice Ferlet", email = "metal3d@gmail.com"},
 ]
+dependencies = ["gradio>=6.0.2", "torch>=2.9.1", "transformers>=4.57.3", "pillow>=12.0.0", "spaces>=0.44.0"]
 requires-python = "==3.12.*"
 readme = "README.md"
 license = {text = "MIT"}

requirements.txt ADDED Viewed

	@@ -0,0 +1,84 @@

+# This file is @generated by PDM.
+# Please do not edit it manually.
+aiofiles==24.1.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anyio==4.12.0
+brotli==1.2.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+click==8.3.1
+colorama==0.4.6; platform_system == "Windows"
+fastapi==0.124.0
+ffmpy==1.0.0
+filelock==3.20.0
+fsspec==2025.12.0
+gradio==6.0.2
+gradio-client==2.0.1
+groovy==0.1.2
+h11==0.16.0
+hf-xet==1.2.0; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.36.0
+idna==3.11
+jinja2==3.1.6
+markdown-it-py==4.0.0
+markupsafe==3.0.3
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.6
+numpy==2.3.5
+nvidia-cublas-cu12==12.8.4.1; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-cupti-cu12==12.8.90; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-runtime-cu12==12.8.90; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cudnn-cu12==9.10.2.21; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cufft-cu12==11.3.3.83; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cufile-cu12==1.13.1.3; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-curand-cu12==10.3.9.90; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusolver-cu12==11.7.3.90; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusparse-cu12==12.5.8.93; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusparselt-cu12==0.7.1; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nccl-cu12==2.27.5; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvjitlink-cu12==12.8.93; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvshmem-cu12==3.3.20; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvtx-cu12==12.8.90; platform_system == "Linux" and platform_machine == "x86_64"
+orjson==3.11.5
+packaging==25.0
+pandas==2.3.3
+pillow==12.0.0
+psutil==5.9.8
+pydantic==2.12.4
+pydantic-core==2.41.5
+pydub==0.25.1
+pygments==2.19.2
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.2
+pyyaml==6.0.3
+regex==2025.11.3
+requests==2.32.5
+rich==14.2.0
+safehttpx==0.1.7
+safetensors==0.7.0
+semantic-version==2.10.0
+setuptools==80.9.0; python_version >= "3.12"
+shellingham==1.5.4
+six==1.17.0
+spaces==0.44.0
+starlette==0.50.0
+sympy==1.14.0
+tokenizers==0.22.1
+tomlkit==0.13.3
+torch==2.9.1
+tqdm==4.67.1
+transformers==4.57.3
+triton==3.5.1; platform_system == "Linux" and platform_machine == "x86_64"
+typer==0.20.0
+typing-extensions==4.15.0
+typing-inspection==0.4.2
+tzdata==2025.2
+urllib3==2.6.0
+uvicorn==0.38.0