Spaces:

prs-eth
/

marigold-lcm

Running

App Files Files Community

toshas commited on Apr 3, 2024

Commit

1e6b2f7

1 Parent(s): 4467dbe

store examples in named directories

Browse files

clean up flagging sharing functionality

Files changed (3) hide show

app.py +8 -4
gradio_patches/examples.py +13 -0
flagging.py → gradio_patches/flagging.py +6 -228

app.py CHANGED Viewed

@@ -36,7 +36,8 @@ from huggingface_hub import login
 from tqdm import tqdm
 from extrude import extrude_depth_3d
-from flagging import FlagMethod, HuggingFaceDatasetSaver
 from marigold_depth_estimation_lcm import MarigoldDepthConsistencyPipeline
 warnings.filterwarnings(
@@ -533,7 +534,7 @@ def run_demo_server(pipe, hf_writer=None):
                                         "Share", variant="stop", scale=1
                                     )
-                gr.Examples(
                     fn=process_pipe_image,
                     examples=[
                         os.path.join("files", "image", name)
@@ -568,6 +569,7 @@ def run_demo_server(pipe, hf_writer=None):
                     inputs=[image_input],
                     outputs=[image_output_slider, image_output_files],
                     cache_examples=True,
                 )
             with gr.Tab("Video"):
@@ -592,7 +594,7 @@ def run_demo_server(pipe, hf_writer=None):
                             elem_id="download",
                             interactive=False,
                         )
-                gr.Examples(
                     fn=process_pipe_video,
                     examples=[
                         os.path.join("files", "video", name)
@@ -605,6 +607,7 @@ def run_demo_server(pipe, hf_writer=None):
                     inputs=[video_input],
                     outputs=[video_output_video, video_output_files],
                     cache_examples=True,
                 )
             with gr.Tab("Bas-relief (3D)"):
@@ -729,7 +732,7 @@ def run_demo_server(pipe, hf_writer=None):
                             elem_id="download",
                             interactive=False,
                         )
-                gr.Examples(
                     fn=process_pipe_bas,
                     examples=[
                         [
@@ -795,6 +798,7 @@ def run_demo_server(pipe, hf_writer=None):
                     ],
                     outputs=[bas_output_viewer, bas_output_files],
                     cache_examples=True,
                 )
         ### Image tab

 from tqdm import tqdm
 from extrude import extrude_depth_3d
+from gradio_patches.examples import Examples
+from gradio_patches.flagging import FlagMethod, HuggingFaceDatasetSaver
 from marigold_depth_estimation_lcm import MarigoldDepthConsistencyPipeline
 warnings.filterwarnings(
                                         "Share", variant="stop", scale=1
                                     )
+                Examples(
                     fn=process_pipe_image,
                     examples=[
                         os.path.join("files", "image", name)
                     inputs=[image_input],
                     outputs=[image_output_slider, image_output_files],
                     cache_examples=True,
+                    directory_name="examples_image",
                 )
             with gr.Tab("Video"):
                             elem_id="download",
                             interactive=False,
                         )
+                Examples(
                     fn=process_pipe_video,
                     examples=[
                         os.path.join("files", "video", name)
                     inputs=[video_input],
                     outputs=[video_output_video, video_output_files],
                     cache_examples=True,
+                    directory_name="examples_video",
                 )
             with gr.Tab("Bas-relief (3D)"):
                             elem_id="download",
                             interactive=False,
                         )
+                Examples(
                     fn=process_pipe_bas,
                     examples=[
                         [
                     ],
                     outputs=[bas_output_viewer, bas_output_files],
                     cache_examples=True,
+                    directory_name="examples_bas",
                 )
         ### Image tab

gradio_patches/examples.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from pathlib import Path
+import gradio
+from gradio.utils import get_cache_folder
+class Examples(gradio.helpers.Examples):
+    def __init__(self, *args, directory_name=None, **kwargs):
+        super().__init__(*args, **kwargs, _initiated_directly=False)
+        if directory_name is not None:
+            self.cached_folder = get_cache_folder() / directory_name
+            self.cached_file = Path(self.cached_folder) / "log.csv"
+        self.create()

flagging.py → gradio_patches/flagging.py RENAMED Viewed

@@ -1,157 +1,22 @@
 from __future__ import annotations
-import csv
 import json
 import time
 import uuid
-from abc import ABC, abstractmethod
 from collections import OrderedDict
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
-import filelock
 import huggingface_hub
 from gradio_client import utils as client_utils
-from gradio_client.documentation import document
-import gradio as gr
-from gradio import utils
-if TYPE_CHECKING:
-    from gradio.components import Component
-class FlaggingCallback(ABC):
-    """
-    An abstract class for defining the methods that any FlaggingCallback should have.
-    """
-    @abstractmethod
-    def setup(self, components: list[Component], flagging_dir: str):
-        """
-        This method should be overridden and ensure that everything is set up correctly for flag().
-        This method gets called once at the beginning of the Interface.launch() method.
-        Parameters:
-        components: Set of components that will provide flagged data.
-        flagging_dir: A string, typically containing the path to the directory where the flagging file should be stored (provided as an argument to Interface.__init__()).
-        """
-        pass
-    @abstractmethod
-    def flag(
-        self,
-        flag_data: list[Any],
-        flag_option: str = "",
-        username: str | None = None,
-    ) -> int:
-        """
-        This method should be overridden by the FlaggingCallback subclass and may contain optional additional arguments.
-        This gets called every time the <flag> button is pressed.
-        Parameters:
-        interface: The Interface object that is being used to launch the flagging interface.
-        flag_data: The data to be flagged.
-        flag_option (optional): In the case that flagging_options are provided, the flag option that is being used.
-        username (optional): The username of the user that is flagging the data, if logged in.
-        Returns:
-        (int) The total number of samples that have been flagged.
-        """
-        pass
-@document()
-class HuggingFaceDatasetSaver(FlaggingCallback):
-    """
-    A callback that saves each flagged sample (both the input and output data) to a HuggingFace dataset.
-    Example:
-        import gradio as gr
-        hf_writer = gr.HuggingFaceDatasetSaver(HF_API_TOKEN, "image-classification-mistakes")
-        def image_classifier(inp):
-            return {'cat': 0.3, 'dog': 0.7}
-        demo = gr.Interface(fn=image_classifier, inputs="image", outputs="label",
-                            allow_flagging="manual", flagging_callback=hf_writer)
-    Guides: using-flagging
-    """
-    def __init__(
-        self,
-        hf_token: str,
-        dataset_name: str,
-        private: bool = False,
-        info_filename: str = "dataset_info.json",
-        separate_dirs: bool = False,
-    ):
-        """
-        Parameters:
-            hf_token: The HuggingFace token to use to create (and write the flagged sample to) the HuggingFace dataset (defaults to the registered one).
-            dataset_name: The repo_id of the dataset to save the data to, e.g. "image-classifier-1" or "username/image-classifier-1".
-            private: Whether the dataset should be private (defaults to False).
-            info_filename: The name of the file to save the dataset info (defaults to "dataset_infos.json").
-            separate_dirs: If True, each flagged item will be saved in a separate directory. This makes the flagging more robust to concurrent editing, but may be less convenient to use.
-        """
-        self.hf_token = hf_token
-        self.dataset_id = dataset_name  # TODO: rename parameter (but ensure backward compatibility somehow)
-        self.dataset_private = private
-        self.info_filename = info_filename
-        self.separate_dirs = separate_dirs
-    def setup(self, components: list[Component], flagging_dir: str):
-        """
-        Params:
-        flagging_dir (str): local directory where the dataset is cloned,
-        updated, and pushed from.
-        """
-        # Setup dataset on the Hub
-        self.dataset_id = huggingface_hub.create_repo(
-            repo_id=self.dataset_id,
-            token=self.hf_token,
-            private=self.dataset_private,
-            repo_type="dataset",
-            exist_ok=True,
-        ).repo_id
-        path_glob = "**/*.jsonl" if self.separate_dirs else "data.csv"
-        huggingface_hub.metadata_update(
-            repo_id=self.dataset_id,
-            repo_type="dataset",
-            metadata={
-                "configs": [
-                    {
-                        "config_name": "default",
-                        "data_files": [{"split": "train", "path": path_glob}],
-                    }
-                ]
-            },
-            overwrite=True,
-            token=self.hf_token,
-        )
-        # Setup flagging dir
-        self.components = components
-        self.dataset_dir = (
-            Path(flagging_dir).absolute() / self.dataset_id.split("/")[-1]
-        )
-        self.dataset_dir.mkdir(parents=True, exist_ok=True)
-        self.infos_file = self.dataset_dir / self.info_filename
-        # Download remote files to local
-        remote_files = [self.info_filename]
-        if not self.separate_dirs:
-            # No separate dirs => means all data is in the same CSV file => download it to get its current content
-            remote_files.append("data.csv")
-        for filename in remote_files:
-            try:
-                huggingface_hub.hf_hub_download(
-                    repo_id=self.dataset_id,
-                    repo_type="dataset",
-                    filename=filename,
-                    local_dir=self.dataset_dir,
-                    token=self.hf_token,
-                )
-            except huggingface_hub.utils.EntryNotFoundError:
-                pass
     def flag(
         self,
         flag_data: list[Any],
@@ -188,93 +53,6 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
             username=username or "",
         )
-    def _flag_in_dir(
-        self,
-        data_file: Path,
-        components_dir: Path,
-        path_in_repo: str | None,
-        flag_data: list[Any],
-        flag_option: str = "",
-        username: str = "",
-    ) -> int:
-        # Deserialize components (write images/audio to files)
-        features, row = self._deserialize_components(
-            components_dir, flag_data, flag_option, username
-        )
-        # Write generic info to dataset_infos.json + upload
-        with filelock.FileLock(str(self.infos_file) + ".lock"):
-            if not self.infos_file.exists():
-                self.infos_file.write_text(
-                    json.dumps({"flagged": {"features": features}})
-                )
-                huggingface_hub.upload_file(
-                    repo_id=self.dataset_id,
-                    repo_type="dataset",
-                    token=self.hf_token,
-                    path_in_repo=self.infos_file.name,
-                    path_or_fileobj=self.infos_file,
-                )
-        headers = list(features.keys())
-        if not self.separate_dirs:
-            with filelock.FileLock(components_dir / ".lock"):
-                sample_nb = self._save_as_csv(data_file, headers=headers, row=row)
-                sample_name = str(sample_nb)
-                huggingface_hub.upload_folder(
-                    repo_id=self.dataset_id,
-                    repo_type="dataset",
-                    commit_message=f"Flagged sample #{sample_name}",
-                    path_in_repo=path_in_repo,
-                    ignore_patterns="*.lock",
-                    folder_path=components_dir,
-                    token=self.hf_token,
-                )
-        else:
-            sample_name = self._save_as_jsonl(data_file, headers=headers, row=row)
-            sample_nb = len(
-                [path for path in self.dataset_dir.iterdir() if path.is_dir()]
-            )
-            huggingface_hub.upload_folder(
-                repo_id=self.dataset_id,
-                repo_type="dataset",
-                commit_message=f"Flagged sample #{sample_name}",
-                path_in_repo=path_in_repo,
-                ignore_patterns="*.lock",
-                folder_path=components_dir,
-                token=self.hf_token,
-            )
-        return sample_nb
-    @staticmethod
-    def _save_as_csv(data_file: Path, headers: list[str], row: list[Any]) -> int:
-        """Save data as CSV and return the sample name (row number)."""
-        is_new = not data_file.exists()
-        with data_file.open("a", newline="", encoding="utf-8") as csvfile:
-            writer = csv.writer(csvfile)
-            # Write CSV headers if new file
-            if is_new:
-                writer.writerow(utils.sanitize_list_for_csv(headers))
-            # Write CSV row for flagged sample
-            writer.writerow(utils.sanitize_list_for_csv(row))
-        with data_file.open(encoding="utf-8") as csvfile:
-            return sum(1 for _ in csv.reader(csvfile)) - 1
-    @staticmethod
-    def _save_as_jsonl(data_file: Path, headers: list[str], row: list[Any]) -> str:
-        """Save data as JSONL and return the sample name (uuid)."""
-        Path.mkdir(data_file.parent, parents=True, exist_ok=True)
-        with open(data_file, "w") as f:
-            json.dump(dict(zip(headers, row)), f)
-        return data_file.parent.name
     def _deserialize_components(
         self,
         data_dir: Path,

 from __future__ import annotations
+import datetime
 import json
 import time
 import uuid
 from collections import OrderedDict
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any
+import gradio
+import gradio as gr
 import huggingface_hub
+from gradio import FlaggingCallback
 from gradio_client import utils as client_utils
+class HuggingFaceDatasetSaver(gradio.HuggingFaceDatasetSaver):
     def flag(
         self,
         flag_data: list[Any],
             username=username or "",
         )
     def _deserialize_components(
         self,
         data_dir: Path,