JointTaggerProject-Inference-Beta

Running

App Files Files Community

cdnuts commited on Jul 23

Commit

08a2d0b

•

1 Parent(s): 683b0c2

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -27

app.py CHANGED Viewed

@@ -1,6 +1,12 @@
 import json
 import gradio as gr
 from PIL import Image
 import safetensors.torch
 import spaces
@@ -10,9 +16,53 @@ import torch
 from torchvision.transforms import transforms
 from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
 torch.set_grad_enabled(False)
 class Fit(torch.nn.Module):
     def __init__(
         self,
@@ -138,6 +188,8 @@ class GatedHead(torch.nn.Module):
 model.head = GatedHead(min(model.head.weight.shape), 9083)
 safetensors.torch.load_model(model, "JTP_PILOT2-2-e3-vit_so400m_patch14_siglip_384.safetensors")
 model.eval()
 with open("tagger_tags.json", "r") as file:
@@ -149,11 +201,11 @@ for idx, tag in enumerate(allowed_tags):
 sorted_tag_score = {}
-@spaces.GPU(duration=5)
 def run_classifier(image, threshold):
     global sorted_tag_score
     img = image.convert('RGBA')
-    tensor = transform(img).unsqueeze(0)
     with torch.no_grad():
         probits = model(tensor)[0]
@@ -177,6 +229,83 @@ def clear_image():
     sorted_tag_score = {}
     return "", {}
 with gr.Blocks(css=".output-class { display: none; }") as demo:
     gr.Markdown("""
     ## Joint Tagger Project: JTP-PILOT² Demo **BETA**
@@ -186,31 +315,43 @@ with gr.Blocks(css=".output-class { display: none; }") as demo:
     Special thanks to Minotoro at frosting.ai for providing the compute power for this project.
     """)
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(label="Source", sources=['upload'], type='pil', height=512, show_label=False)
-            threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Threshold")
-        with gr.Column():
-            tag_string = gr.Textbox(label="Tag String")
-            label_box = gr.Label(label="Tag Predictions", num_top_classes=250, show_label=False)
-    image_input.upload(
-        fn=run_classifier,
-        inputs=[image_input, threshold_slider],
-        outputs=[tag_string, label_box]
-    )
-    image_input.clear(
-        fn=clear_image,
-        inputs=[],
-        outputs=[tag_string, label_box]
-    )
-    threshold_slider.input(
-        fn=create_tags,
-        inputs=[threshold_slider],
-        outputs=[tag_string, label_box]
-    )
 if __name__ == "__main__":
     demo.launch()

 import json
+import os
+import zipfile
+from io import BytesIO
+from tempfile import NamedTemporaryFile
+import tempfile
 import gradio as gr
+import pandas as pd
 from PIL import Image
 import safetensors.torch
 import spaces
 from torchvision.transforms import transforms
 from torchvision.transforms import InterpolationMode
 import torchvision.transforms.functional as TF
+from torch.utils.data import Dataset, DataLoader
+from typing import Callable
+from functools import partial
+import spaces.config
+from spaces.zero.decorator import P, R
 torch.set_grad_enabled(False)
+def _dynGPU(
+    fn: Callable[P, R] | None, duration: Callable[P, int], min=30, max=300, step=10
+) -> Callable[P, R]:
+    if not spaces.config.Config.zero_gpu:
+        return fn
+    funcs = [
+        (t, spaces.GPU(duration=t)(lambda *args, **kwargs: fn(*args, **kwargs)))
+        for t in range(min, max + 1, step)
+    ]
+    def wrapper(*args, **kwargs):
+        requirement = duration(*args, **kwargs)
+        # find the function that satisfies the duration requirement
+        for t, func in funcs:
+            if t >= requirement:
+                gr.Info(f"Acquiring ZeroGPU for {t} seconds")
+                return func(*args, **kwargs)
+        # if no function is found, return the last one
+        gr.Info(f"Acquiring ZeroGPU for {funcs[-1][0]} seconds")
+        return funcs[-1][1](*args, **kwargs)
+    return wrapper
+def dynGPU(
+    fn: Callable[P, R] | None = None,
+    duration: Callable[P, int] = lambda: 60,
+    min=30,
+    max=300,
+    step=10,
+) -> Callable[P, R]:
+    if fn is None:
+        return partial(_dynGPU, duration=duration, min=min, max=max, step=step)
+    return _dynGPU(fn, duration, min, max, step)
 class Fit(torch.nn.Module):
     def __init__(
         self,
 model.head = GatedHead(min(model.head.weight.shape), 9083)
 safetensors.torch.load_model(model, "JTP_PILOT2-2-e3-vit_so400m_patch14_siglip_384.safetensors")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
 model.eval()
 with open("tagger_tags.json", "r") as file:
 sorted_tag_score = {}
+@spaces.GPU(duration=6)
 def run_classifier(image, threshold):
     global sorted_tag_score
     img = image.convert('RGBA')
+    tensor = transform(img).unsqueeze(0).to(device)
     with torch.no_grad():
         probits = model(tensor)[0]
     sorted_tag_score = {}
     return "", {}
+class ImageDataset(Dataset):
+    def __init__(self, image_files, transform):
+        self.image_files = image_files
+        self.transform = transform
+    def __len__(self):
+        return len(self.image_files)
+    def __getitem__(self, idx):
+        img_path = self.image_files[idx]
+        img = Image.open(img_path).convert('RGB')
+        return self.transform(img), os.path.basename(img_path)
+def measure_duration(images, threshold) -> int:
+    return ceil(len(images) / 64) * 9 + 3
+@dynGPU(duration=measure_duration)
+def process_images(images, threshold):
+    dataset = ImageDataset(images, transform)
+    dataloader = DataLoader(dataset, batch_size=64, num_workers=0, pin_memory=True, drop_last=False)
+    all_results = []
+    with torch.no_grad():
+        for batch, filenames in dataloader:
+            batch = batch.to(device)
+            with torch.no_grad():
+                logits = model(batch)
+                probabilities = torch.nn.functional.sigmoid(logits)
+                for i, prob in enumerate(probabilities):
+                    indices = torch.where(prob > threshold)[0]
+                    values = prob[indices]
+                    temp = []
+                    tag_score = dict()
+                    for j in range(indices.size(0)):
+                        temp.append([allowed_tags[indices[j]], values[j].item()])
+                        tag_score[allowed_tags[indices[j]]] = values[j].item()
+                    tags = ", ".join([t[0] for t in temp])
+                    all_results.append((filenames[i], tags, tag_score))
+    return all_results
+def is_valid_image(file_path):
+    try:
+        with Image.open(file_path) as img:
+            img.verify()
+        return True
+    except:
+        return False
+def process_zip(zip_file, threshold):
+    if zip_file is None:
+        return None, None
+    with tempfile.TemporaryDirectory() as temp_dir:
+        with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
+            zip_ref.extractall(temp_dir)
+        all_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir)]
+        image_files = [f for f in all_files if is_valid_image(f)]
+        results = process_images(image_files, threshold)
+        temp_file = NamedTemporaryFile(delete=False, suffix=".zip")
+        with zipfile.ZipFile(temp_file, "w") as zip_ref:
+            for image_name, text_no_impl, _ in results:
+                with zip_ref.open(''.join(image_name.split('.')[:-1]) + ".txt", 'w') as file:
+                    file.write(text_no_impl.encode())
+        temp_file.seek(0)
+        df = pd.DataFrame([(os.path.basename(f), t) for f, t, _ in results], columns=['Image', 'Tags'])
+    return temp_file.name, df
 with gr.Blocks(css=".output-class { display: none; }") as demo:
     gr.Markdown("""
     ## Joint Tagger Project: JTP-PILOT² Demo **BETA**
     Special thanks to Minotoro at frosting.ai for providing the compute power for this project.
     """)
+    with gr.Tabs():
+        with gr.TabItem("Single Image"):
+            with gr.Row():
+                with gr.Column():
+                    image_input = gr.Image(label="Source", sources=['upload'], type='pil', height=512, show_label=False)
+                    threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Threshold")
+                with gr.Column():
+                    tag_string = gr.Textbox(label="Tag String")
+                    label_box = gr.Label(label="Tag Predictions", num_top_classes=250, show_label=False)
+            image_input.upload(
+                fn=run_classifier,
+                inputs=[image_input, threshold_slider],
+                outputs=[tag_string, label_box]
+            )
+            threshold_slider.input(
+                fn=create_tags,
+                inputs=[threshold_slider],
+                outputs=[tag_string, label_box]
+            )
+        with gr.TabItem("Multiple Images"):
+            with gr.Row():
+                with gr.Column():
+                    zip_input = gr.File(label="Upload ZIP file", file_types=['.zip'])
+                    multi_threshold_slider = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.20, label="Threshold")
+                    process_button = gr.Button("Process Images")
+                with gr.Column():
+                    zip_output = gr.File(label="Download Tagged Text Files (ZIP)")
+                    dataframe_output = gr.Dataframe(label="Image Tags Summary")
+            process_button.click(
+                fn=process_zip,
+                inputs=[zip_input, multi_threshold_slider],
+                outputs=[zip_output, dataframe_output]
+            )
 if __name__ == "__main__":
     demo.launch()