Metal3d commited on
Commit
0b97d29
·
1 Parent(s): c4c02ca

Make the project adapted to Huggingface

Browse files
Files changed (6) hide show
  1. README.md +64 -0
  2. app.py +3 -1
  3. labelizer/__init__.py +17 -21
  4. pdm.lock +37 -1
  5. pyproject.toml +1 -1
  6. requirements.txt +84 -0
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Labelizer - AI Image Labeling Tool
3
+ emoji: 🖼️
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 6.0.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: metal3d/labelm2
12
+ ---
13
+
14
+ # 🖼️ Labelizer - AI Image Labeling Tool
15
+
16
+ An intelligent image labeling tool that uses Florence-2 vision-language model to automatically generate detailed descriptions for your images. Perfect for creating labeled datasets for machine learning projects.
17
+
18
+ ## ✨ Features
19
+
20
+ - 🤖 **AI-Powered Labeling**: Uses advanced Florence-2 model for accurate image descriptions
21
+ - 📊 **Batch Processing**: Label multiple images at once with progress tracking
22
+ - ✏️ **Manual Editing**: Edit generated labels to fit your specific needs
23
+ - 📦 **Flexible Export**: Download datasets with organized folder structure or flat format
24
+ - 🎨 **User-Friendly Interface**: Clean, intuitive Gradio interface with emoji-enhanced navigation
25
+
26
+ ## 🚀 How to Use
27
+
28
+ 1. **Upload Images**: Click "📁 Upload images" to select multiple image files
29
+ 2. **Generate Labels**:
30
+ - Click "✨ Generate label" below individual images
31
+ - Or click "🏷️ Labelize all images" for batch processing
32
+ 3. **Review & Edit**: Modify any generated labels as needed
33
+ 4. **Download**: Create and download your labeled dataset as a ZIP file
34
+
35
+ ## 🛠️ Technical Details
36
+
37
+ - **Model**: Florence-2-large-hf for vision-language understanding
38
+ - **Framework**: Gradio with ZeroGPU support
39
+ - **Supported Formats**: JPG, PNG, GIF, BMP, TIFF, WebP
40
+ - **Export Options**: Organized folders (images/ + labels/) or flat structure
41
+
42
+ ## 📋 Supported Tasks
43
+
44
+ The tool supports various captioning tasks:
45
+ - `<MORE_DETAILED_CAPTION>`: Comprehensive image descriptions
46
+ - `<DETAILED_CAPTION>`: Detailed but concise descriptions
47
+ - `<CAPTION>`: Basic image captions
48
+
49
+ ## 🎯 Use Cases
50
+
51
+ - **Machine Learning**: Create labeled datasets for computer vision tasks
52
+ - **Content Management**: Organize image collections with descriptions
53
+ - **Accessibility**: Generate alt-text for images
54
+ - **Research**: Prepare datasets for academic projects
55
+
56
+ ## ⚡ Performance
57
+
58
+ - Optimized for GPU acceleration with ZeroGPU
59
+ - Efficient batch processing for large datasets
60
+ - Lazy loading to minimize resource usage
61
+
62
+ ---
63
+
64
+ Built with ❤️ using [Gradio](https://gradio.app/) and [Florence-2](https://huggingface.co/microsoft/Florence-2-large)
app.py CHANGED
@@ -1,5 +1,5 @@
1
- from click import progressbar
2
  import gradio as gr
 
3
  from PIL import Image
4
 
5
  from dataset import ImageDataset
@@ -8,6 +8,7 @@ from packager import create_dataset_zip
8
  from documentation import DOC_CONTENT
9
 
10
 
 
11
  def auto_label(
12
  image: Image.Image, imid: int, dataset: ImageDataset
13
  ) -> tuple[str, ImageDataset]:
@@ -34,6 +35,7 @@ def uploaded(files: list, current_dataset: ImageDataset | None) -> ImageDataset:
34
  return current_dataset.add_images(files)
35
 
36
 
 
37
  def labelize_all_images(
38
  dataset: ImageDataset, label: str, progress=gr.Progress(True)
39
  ) -> tuple[ImageDataset, str]:
 
 
1
  import gradio as gr
2
+ import spaces
3
  from PIL import Image
4
 
5
  from dataset import ImageDataset
 
8
  from documentation import DOC_CONTENT
9
 
10
 
11
+ @spaces.GPU
12
  def auto_label(
13
  image: Image.Image, imid: int, dataset: ImageDataset
14
  ) -> tuple[str, ImageDataset]:
 
35
  return current_dataset.add_images(files)
36
 
37
 
38
+ @spaces.GPU
39
  def labelize_all_images(
40
  dataset: ImageDataset, label: str, progress=gr.Progress(True)
41
  ) -> tuple[ImageDataset, str]:
labelizer/__init__.py CHANGED
@@ -1,34 +1,28 @@
1
  import torch
2
  from PIL import Image
3
- from transformers import Florence2ForConditionalGeneration, Florence2Processor
4
 
5
  MODEL_ID = "ducviet00/Florence-2-large-hf"
6
 
7
  # Global variables for lazy loading
8
  _model = None
9
  _processor = None
10
- _device = None
11
- _torch_dtype = None
12
 
13
 
14
  def _load_model():
15
  """Load model and processor lazily"""
16
- global _model, _processor, _device, _torch_dtype
17
 
18
  if _model is None:
19
- _device = "cuda:0" if torch.cuda.is_available() else "cpu"
20
- _torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
21
-
22
- print(f"Loading model {MODEL_ID} on {_device} with dtype {_torch_dtype}...")
23
- _model = Florence2ForConditionalGeneration.from_pretrained(
24
- MODEL_ID, torch_dtype=_torch_dtype, trust_remote_code=True
25
- ).to(_device) # type: ignore
26
- _processor = Florence2Processor.from_pretrained(
27
- MODEL_ID, trust_remote_code=True
28
- )
29
  print("Model loaded successfully!")
30
 
31
- return _model, _processor, _device, _torch_dtype
32
 
33
 
34
  def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
@@ -41,7 +35,7 @@ def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
41
 
42
  """
43
  # Lazy load model only when needed
44
- model, processor, device, torch_dtype = _load_model()
45
  if text_input is None:
46
  prompt = task_prompt
47
  else:
@@ -53,11 +47,13 @@ def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
53
 
54
  if processor is None:
55
  raise ValueError("processor is None")
56
- inputs = processor(
57
- text=prompt,
58
- images=image,
59
- return_tensors="pt", # type: ignore
60
- ).to(device, torch_dtype)
 
 
61
 
62
  generated_ids = model.generate(
63
  input_ids=inputs["input_ids"],
 
1
  import torch
2
  from PIL import Image
3
+ from transformers import AutoProcessor, AutoModelForCausalLM
4
 
5
  MODEL_ID = "ducviet00/Florence-2-large-hf"
6
 
7
  # Global variables for lazy loading
8
  _model = None
9
  _processor = None
 
 
10
 
11
 
12
  def _load_model():
13
  """Load model and processor lazily"""
14
+ global _model, _processor
15
 
16
  if _model is None:
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
19
+
20
+ print(f"Loading model {MODEL_ID} on {device}...")
21
+ _model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
22
+ _processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
23
  print("Model loaded successfully!")
24
 
25
+ return _model, _processor
26
 
27
 
28
  def get_task_response(task_prompt: str, image: Image.Image, text_input=None):
 
35
 
36
  """
37
  # Lazy load model only when needed
38
+ model, processor = _load_model()
39
  if text_input is None:
40
  prompt = task_prompt
41
  else:
 
47
 
48
  if processor is None:
49
  raise ValueError("processor is None")
50
+
51
+ # Process inputs using the correct API
52
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
53
+
54
+ # Move inputs to device if model is on CUDA
55
+ device = next(model.parameters()).device
56
+ inputs = {k: v.to(device) for k, v in inputs.items()}
57
 
58
  generated_ids = model.generate(
59
  input_ids=inputs["input_ids"],
pdm.lock CHANGED
@@ -5,7 +5,7 @@
5
  groups = ["default"]
6
  strategy = ["inherit_metadata"]
7
  lock_version = "4.5.0"
8
- content_hash = "sha256:ca1f4cf6df24c991a826baf1788242e48e5c757fc53700048e3bea57b9608ca0"
9
 
10
  [[metadata.targets]]
11
  requires_python = "==3.12.*"
@@ -747,6 +747,22 @@ files = [
747
  {file = "pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353"},
748
  ]
749
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  [[package]]
751
  name = "pydantic"
752
  version = "2.12.4"
@@ -1006,6 +1022,26 @@ files = [
1006
  {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
1007
  ]
1008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1009
  [[package]]
1010
  name = "starlette"
1011
  version = "0.50.0"
 
5
  groups = ["default"]
6
  strategy = ["inherit_metadata"]
7
  lock_version = "4.5.0"
8
+ content_hash = "sha256:e71cb6d555d7581ecbf7bd5fc66a31343a04b3c46215175380aae49b346ae57d"
9
 
10
  [[metadata.targets]]
11
  requires_python = "==3.12.*"
 
747
  {file = "pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353"},
748
  ]
749
 
750
+ [[package]]
751
+ name = "psutil"
752
+ version = "5.9.8"
753
+ requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
754
+ summary = "Cross-platform lib for process and system monitoring in Python."
755
+ groups = ["default"]
756
+ files = [
757
+ {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"},
758
+ {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"},
759
+ {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"},
760
+ {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"},
761
+ {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"},
762
+ {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"},
763
+ {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"},
764
+ ]
765
+
766
  [[package]]
767
  name = "pydantic"
768
  version = "2.12.4"
 
1022
  {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
1023
  ]
1024
 
1025
+ [[package]]
1026
+ name = "spaces"
1027
+ version = "0.44.0"
1028
+ requires_python = "<3.14,>=3.10"
1029
+ summary = "Utilities for Hugging Face Spaces"
1030
+ groups = ["default"]
1031
+ dependencies = [
1032
+ "gradio",
1033
+ "httpx>=0.20",
1034
+ "packaging",
1035
+ "psutil<6,>=2",
1036
+ "pydantic<3,>=1",
1037
+ "requests<3.0,>=2.19",
1038
+ "typing-extensions<5,>=4",
1039
+ ]
1040
+ files = [
1041
+ {file = "spaces-0.44.0-py3-none-any.whl", hash = "sha256:7992b57883fff65f2ead2daa52b70b51a786631e5c83cd4586198b9e75cc97b1"},
1042
+ {file = "spaces-0.44.0.tar.gz", hash = "sha256:22603e5eb45c8aa51ef7431530a0721052c80a2331249f84b781a7f1dbb1343b"},
1043
+ ]
1044
+
1045
  [[package]]
1046
  name = "starlette"
1047
  version = "0.50.0"
pyproject.toml CHANGED
@@ -5,7 +5,7 @@ description = "Default template for PDM package"
5
  authors = [
6
  {name = "Patrice Ferlet", email = "metal3d@gmail.com"},
7
  ]
8
- dependencies = ["gradio>=6.0.2", "torch>=2.9.1", "transformers>=4.57.3", "pillow>=12.0.0"]
9
  requires-python = "==3.12.*"
10
  readme = "README.md"
11
  license = {text = "MIT"}
 
5
  authors = [
6
  {name = "Patrice Ferlet", email = "metal3d@gmail.com"},
7
  ]
8
+ dependencies = ["gradio>=6.0.2", "torch>=2.9.1", "transformers>=4.57.3", "pillow>=12.0.0", "spaces>=0.44.0"]
9
  requires-python = "==3.12.*"
10
  readme = "README.md"
11
  license = {text = "MIT"}
requirements.txt ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is @generated by PDM.
2
+ # Please do not edit it manually.
3
+
4
+ aiofiles==24.1.0
5
+ annotated-doc==0.0.4
6
+ annotated-types==0.7.0
7
+ anyio==4.12.0
8
+ brotli==1.2.0
9
+ certifi==2025.11.12
10
+ charset-normalizer==3.4.4
11
+ click==8.3.1
12
+ colorama==0.4.6; platform_system == "Windows"
13
+ fastapi==0.124.0
14
+ ffmpy==1.0.0
15
+ filelock==3.20.0
16
+ fsspec==2025.12.0
17
+ gradio==6.0.2
18
+ gradio-client==2.0.1
19
+ groovy==0.1.2
20
+ h11==0.16.0
21
+ hf-xet==1.2.0; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
22
+ httpcore==1.0.9
23
+ httpx==0.28.1
24
+ huggingface-hub==0.36.0
25
+ idna==3.11
26
+ jinja2==3.1.6
27
+ markdown-it-py==4.0.0
28
+ markupsafe==3.0.3
29
+ mdurl==0.1.2
30
+ mpmath==1.3.0
31
+ networkx==3.6
32
+ numpy==2.3.5
33
+ nvidia-cublas-cu12==12.8.4.1; platform_system == "Linux" and platform_machine == "x86_64"
34
+ nvidia-cuda-cupti-cu12==12.8.90; platform_system == "Linux" and platform_machine == "x86_64"
35
+ nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == "Linux" and platform_machine == "x86_64"
36
+ nvidia-cuda-runtime-cu12==12.8.90; platform_system == "Linux" and platform_machine == "x86_64"
37
+ nvidia-cudnn-cu12==9.10.2.21; platform_system == "Linux" and platform_machine == "x86_64"
38
+ nvidia-cufft-cu12==11.3.3.83; platform_system == "Linux" and platform_machine == "x86_64"
39
+ nvidia-cufile-cu12==1.13.1.3; platform_system == "Linux" and platform_machine == "x86_64"
40
+ nvidia-curand-cu12==10.3.9.90; platform_system == "Linux" and platform_machine == "x86_64"
41
+ nvidia-cusolver-cu12==11.7.3.90; platform_system == "Linux" and platform_machine == "x86_64"
42
+ nvidia-cusparse-cu12==12.5.8.93; platform_system == "Linux" and platform_machine == "x86_64"
43
+ nvidia-cusparselt-cu12==0.7.1; platform_system == "Linux" and platform_machine == "x86_64"
44
+ nvidia-nccl-cu12==2.27.5; platform_system == "Linux" and platform_machine == "x86_64"
45
+ nvidia-nvjitlink-cu12==12.8.93; platform_system == "Linux" and platform_machine == "x86_64"
46
+ nvidia-nvshmem-cu12==3.3.20; platform_system == "Linux" and platform_machine == "x86_64"
47
+ nvidia-nvtx-cu12==12.8.90; platform_system == "Linux" and platform_machine == "x86_64"
48
+ orjson==3.11.5
49
+ packaging==25.0
50
+ pandas==2.3.3
51
+ pillow==12.0.0
52
+ psutil==5.9.8
53
+ pydantic==2.12.4
54
+ pydantic-core==2.41.5
55
+ pydub==0.25.1
56
+ pygments==2.19.2
57
+ python-dateutil==2.9.0.post0
58
+ python-multipart==0.0.20
59
+ pytz==2025.2
60
+ pyyaml==6.0.3
61
+ regex==2025.11.3
62
+ requests==2.32.5
63
+ rich==14.2.0
64
+ safehttpx==0.1.7
65
+ safetensors==0.7.0
66
+ semantic-version==2.10.0
67
+ setuptools==80.9.0; python_version >= "3.12"
68
+ shellingham==1.5.4
69
+ six==1.17.0
70
+ spaces==0.44.0
71
+ starlette==0.50.0
72
+ sympy==1.14.0
73
+ tokenizers==0.22.1
74
+ tomlkit==0.13.3
75
+ torch==2.9.1
76
+ tqdm==4.67.1
77
+ transformers==4.57.3
78
+ triton==3.5.1; platform_system == "Linux" and platform_machine == "x86_64"
79
+ typer==0.20.0
80
+ typing-extensions==4.15.0
81
+ typing-inspection==0.4.2
82
+ tzdata==2025.2
83
+ urllib3==2.6.0
84
+ uvicorn==0.38.0