Spaces:

SEA-AI
/

detection-RGB

Running

App Files Files Community

kevinconka commited on Jan 23, 2024

Commit

955daea

1 Parent(s): c54f19a

save flagged data to HF dataset

Browse files

Files changed (3) hide show

app.py +19 -19
flagging.py +77 -0
utils.py +36 -6

app.py CHANGED Viewed

@@ -1,13 +1,9 @@
 import gradio as gr
-from utils import load_model, load_image_from_url, inference
-BADGES = """
-<p align="right">
-<img alt="Static Badge" src="https://img.shields.io/badge/SEA.AI-beta-blue">
-</p>
-"""
 TITLE = """
 <h1> RGB Detection Demo </h1>
 <p align="center">
@@ -19,7 +15,8 @@ Give it a try! Upload an image or enter a URL to an image and click
 NOTICE = """
 See something off? Your feedback makes a difference! Let us know by
 flagging any outcomes that don't seem right. Just click on `Flag`
-to submit the image for review.
 """
 css = """
@@ -36,11 +33,12 @@ model.max_det = 100
 model.agnostic = True  # NMS class-agnostic
 # This callback will be used to flag images
-callback = gr.CSVLogger()
 with gr.Blocks(css=css) as demo:
-    gr.Markdown(value=BADGES)
-    gr.Markdown(value=TITLE)
     with gr.Row():
         with gr.Column():
@@ -80,6 +78,7 @@ with gr.Blocks(css=css) as demo:
     img_url.change(load_image_from_url, [img_url], img_input)
     submit.click(lambda image: inference(model, image), [img_input], img_output)
     @img_output.change(inputs=[img_output], outputs=[flag, notice])
     def show_hide(img_output):
         visible = img_output is not None
@@ -89,15 +88,16 @@ with gr.Blocks(css=css) as demo:
         }
     # This needs to be called prior to the first call to callback.flag()
-    callback.setup([img_input, img_url, img_output], "flagged")
     # We can choose which components to flag (in this case, we'll flag all)
-    flag.click(
-        lambda *args: callback.flag(args),
-        [img_input, img_url, img_output],
-        None,
         preprocess=False,
-    ).then(lambda: gr.Info("Thank you for contributing!"))
-demo.queue().launch()

 import gradio as gr
+from huggingface_hub import get_token
+from utils import load_model, load_image_from_url, inference, load_badges
+from flagging import myHuggingFaceDatasetSaver
 TITLE = """
 <h1> RGB Detection Demo </h1>
 <p align="center">
 NOTICE = """
 See something off? Your feedback makes a difference! Let us know by
 flagging any outcomes that don't seem right. Just click on `Flag`
+to submit the image for review. Note that by clicking `Flag`, you
+agree to the use of your image for A.I. improvement purposes.
 """
 css = """
 model.agnostic = True  # NMS class-agnostic
 # This callback will be used to flag images
+dataset_name = "SEA-AI/crowdsourced-rgb-images"
+hf_writer = myHuggingFaceDatasetSaver(get_token(), dataset_name)
 with gr.Blocks(css=css) as demo:
+    badges = gr.HTML(load_badges(dataset_name, trials=1))
+    title = gr.HTML(TITLE)
     with gr.Row():
         with gr.Column():
     img_url.change(load_image_from_url, [img_url], img_input)
     submit.click(lambda image: inference(model, image), [img_input], img_output)
+    # event listeners with decorators
     @img_output.change(inputs=[img_output], outputs=[flag, notice])
     def show_hide(img_output):
         visible = img_output is not None
         }
     # This needs to be called prior to the first call to callback.flag()
+    hf_writer.setup([img_input], "flagged")
+    img_input.flag
     # We can choose which components to flag (in this case, we'll flag all)
+    flag.click(lambda: gr.Info("Thank you for contributing!")).then(
+        lambda *args: hf_writer.flag(args),
+        [img_input, flag],
+        [],
         preprocess=False,
+    ).then(lambda: load_badges(dataset_name), [], badges)
+if __name__ == "__main__":
+    demo.queue().launch()

flagging.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import json
+from collections import OrderedDict
+from pathlib import Path
+from typing import Any
+import gradio as gr
+from gradio.flagging import HuggingFaceDatasetSaver, client_utils
+import huggingface_hub
+class myHuggingFaceDatasetSaver(HuggingFaceDatasetSaver):
+    """
+    Custom HuggingFaceDatasetSaver to save images/audio to disk.
+    Gradio's implementation seems to have a bug.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def _deserialize_components(
+        self,
+        data_dir: Path,
+        flag_data: list[Any],
+        flag_option: str = "",
+        username: str = "",
+    ) -> tuple[dict[Any, Any], list[Any]]:
+        """Deserialize components and return the corresponding row for the flagged sample.
+        Images/audio are saved to disk as individual files.
+        """
+        # Components that can have a preview on dataset repos
+        file_preview_types = {gr.Audio: "Audio", gr.Image: "Image"}
+        # Generate the row corresponding to the flagged sample
+        features = OrderedDict()
+        row = []
+        for component, sample in zip(self.components, flag_data):
+            # Get deserialized object (will save sample to disk if applicable -file, audio, image,...-)
+            label = component.label or ""
+            save_dir = data_dir / client_utils.strip_invalid_filename_characters(label)
+            save_dir.mkdir(exist_ok=True, parents=True)
+            deserialized = component.flag(sample, save_dir)
+            if isinstance(component, gr.Image) and isinstance(sample, dict):
+                deserialized = json.loads(deserialized)['path']  # dirty hack
+            # Add deserialized object to row
+            features[label] = {"dtype": "string", "_type": "Value"}
+            try:
+                assert Path(deserialized).exists()
+                row.append(str(Path(deserialized).relative_to(self.dataset_dir)))
+            except (AssertionError, TypeError, ValueError):
+                deserialized = "" if deserialized is None else str(deserialized)
+                row.append(deserialized)
+            # If component is eligible for a preview, add the URL of the file
+            # Be mindful that images and audio can be None
+            if isinstance(component, tuple(file_preview_types)):  # type: ignore
+                for _component, _type in file_preview_types.items():
+                    if isinstance(component, _component):
+                        features[label + " file"] = {"_type": _type}
+                        break
+                if deserialized:
+                    path_in_repo = str(  # returned filepath is absolute, we want it relative to compute URL
+                        Path(deserialized).relative_to(self.dataset_dir)
+                    ).replace("\\", "/")
+                    row.append(
+                        huggingface_hub.hf_hub_url(
+                            repo_id=self.dataset_id,
+                            filename=path_in_repo,
+                            repo_type="dataset",
+                        )
+                    )
+                else:
+                    row.append("")
+        features["flag"] = {"dtype": "string", "_type": "Value"}
+        features["username"] = {"dtype": "string", "_type": "Value"}
+        row.append(flag_option)
+        row.append(username)
+        return features, row

utils.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import requests
 from io import BytesIO
 import numpy as np
@@ -6,14 +5,12 @@ from PIL import Image
 import yolov5
 from yolov5.utils.plots import Annotator, colors
 import gradio as gr
 def load_model(model_path, img_size=640):
-    HF_TOKEN = os.getenv("HF_TOKEN")
-    if HF_TOKEN is not None:  # assume SECRET variable is set
-        model = yolov5.load(model_path, hf_token=HF_TOKEN)
-    else:
-        model = yolov5.load(model_path)
     model.img_size = img_size  # add img_size attribute
     return model
@@ -37,3 +34,36 @@ def inference(model, image):
         # print(f'{cls} {conf:.2f} {box}')
         annotator.box_label(box, "", color=colors(cls, True))
     return annotator.im

 import requests
 from io import BytesIO
 import numpy as np
 import yolov5
 from yolov5.utils.plots import Annotator, colors
 import gradio as gr
+from huggingface_hub import get_token
+import time
 def load_model(model_path, img_size=640):
+    model = yolov5.load(model_path, hf_token=get_token())
     model.img_size = img_size  # add img_size attribute
     return model
         # print(f'{cls} {conf:.2f} {box}')
         annotator.box_label(box, "", color=colors(cls, True))
     return annotator.im
+def count_flagged_images(dataset_name, trials=10):
+    headers = {"Authorization": f"Bearer {get_token()}"}
+    API_URL = f"https://datasets-server.huggingface.co/size?dataset={dataset_name}"
+    def query():
+        response = requests.get(API_URL, headers=headers, timeout=5)
+        return response.json()
+    for i in range(trials):
+        try:
+            data = query()
+            if "error" not in data and data["size"]["dataset"]["num_rows"] > 0:
+                print(f"[{i+1}/{trials}] {data}")
+                return data["size"]["dataset"]["num_rows"]
+        except Exception:
+            pass
+        print(f"[{i+1}/{trials}] {data}")
+        time.sleep(5)
+    return 0
+def load_badges(dataset_name, trials=10):
+    n = count_flagged_images(dataset_name, trials)
+    return f"""
+        <p style="display: flex">
+        <img alt="" src="https://img.shields.io/badge/SEA.AI-beta-blue">
+        &nbsp;
+        <img alt="" src="https://img.shields.io/badge/%F0%9F%96%BC%EF%B8%8F-{n}-green">
+        </p>
+        """