Spaces:

ml-energy
/

leaderboard

Running

App Files Files Community

SchwarzXia commited on Jan 20

Commit

56a3a83

•

1 Parent(s): 87d2c55

Support for benchmarking of diffusion models (#31)

Browse files

Files changed (6) hide show

deployment/benchmark.Dockerfile +1 -0
pegasus/benchmark_diffusion.yaml +22 -0
scripts/diffusion/benchmark.py +157 -0
scripts/diffusion/metrics.py +54 -0
scripts/diffusion/utils.py +55 -0
setup.py +13 -1

deployment/benchmark.Dockerfile CHANGED Viewed

@@ -37,6 +37,7 @@ RUN cd /root/.local/miniconda3/lib/python3.9/site-packages \
 # Where all the weights downloaded from Hugging Face Hub will go to
 ENV TRANSFORMERS_CACHE=/data/leaderboard/hfcache
 # So that docker exec container python scripts/benchmark.py will work
 WORKDIR /workspace/leaderboard

 # Where all the weights downloaded from Hugging Face Hub will go to
 ENV TRANSFORMERS_CACHE=/data/leaderboard/hfcache
+ENV HF_HOME=/data/leaderboard/hfcache
 # So that docker exec container python scripts/benchmark.py will work
 WORKDIR /workspace/leaderboard

pegasus/benchmark_diffusion.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# This YAML dictionary will expand into 8 (models) x 6 (batch sizes) = 48 job commands,
+# where {{ model }} and {{ batch_size }} are filled in with all possible combinations.
+# {{ gpu }} is defined in `hosts.yaml`, and will be filled in when Pegasus
+# determines the specific node and gpu the generated job command will run on.
+- command:
+    - docker exec leaderboard{{ gpu }} python scripts/diffusion/benchmark.py --model {{ model }} --batch_size {{ batch_size }} --warmup
+  model:
+    - runwayml/stable-diffusion-v1-5
+    - stabilityai/stable-diffusion-xl-base-1.0
+    - stabilityai/stable-diffusion-2-1
+    - prompthero/openjourney
+    - kakaobrain/karlo-v1-alpha
+    - kandinsky-community/kandinsky-2-2-decoder
+    - CompVis/ldm-text2im-large-256
+    - SimianLuo/LCM_Dreamshaper_v7
+  batch_size:
+    - 1
+    - 2
+    - 4
+    - 8
+    - 16
+    - 32

scripts/diffusion/benchmark.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import gc
+import time
+from diffusers import AutoPipelineForText2Image, DiffusionPipeline
+import numpy as np
+from PIL import Image
+import torch
+from transformers.trainer_utils import set_seed
+import tyro
+from zeus.monitor import ZeusMonitor
+from utils import get_logger, CsvHandler
+from metrics import load_prompts, calculate_clip_score
+# default parameters
+DEVICE = "cuda:0"
+WEIGHT_DTYPE = torch.float16
+SEED = 0
+OUTPUT_FILE = "results.csv"
+OUTPUT_IMAGES = "images/"
+def get_pipeline(model, device=DEVICE, weight_dtype=WEIGHT_DTYPE):
+    try:
+        return AutoPipelineForText2Image.from_pretrained(
+            model, torch_dtype=weight_dtype, safety_checker=None
+        ).to(device)
+    except ValueError:
+        return DiffusionPipeline.from_pretrained(
+            model, torch_dtype=weight_dtype, safety_checker=None
+        ).to(device)
+def gpu_warmup(pipeline):
+    """Warm up the GPU by running the given pipeline for 10 secs."""
+    logger = get_logger()
+    logger.info("Warming up GPU")
+    generator = torch.manual_seed(2)
+    timeout_start = time.time()
+    prompts, _ = load_prompts(1, 1)
+    while time.time() < timeout_start + 10:
+        _ = pipeline(
+            prompts, num_images_per_prompt=10, generator=generator, output_type="numpy"
+        ).images
+    logger.info("Finished warming up GPU")
+def benchmark(
+    model: str,
+    benchmark_size: int = 0,
+    batch_size: int = 1,
+    result_file: str = OUTPUT_FILE,
+    images_path: str = OUTPUT_IMAGES,
+    device: str = DEVICE,
+    seed: int = SEED,
+    weight_dtype: torch.dtype = WEIGHT_DTYPE,
+    write_header: bool = False,
+    warmup: bool = False,
+    settings: dict = {},
+) -> None:
+    """Benchmarks given model with a set of parameters.
+    Args:
+        model: The name of the model to benchmark, as shown on HuggingFace.
+        benchmark_size: The number of prompts to benchmark on. If 0, benchmarks
+          the entire parti-prompts dataset.
+        batch_size: The size of each batch of prompts. When benchmarking, the
+          prompts are split into batches of this size, and prompts are fed into
+          the model in batches.
+        result_file: The path to the output csv file.
+        images_path: The path to the output images directory.
+        device: The device to run the benchmark on.
+        seed: The seed to use for the RNG.
+        weight_dtype: The weight dtype to use for the model.
+        write_header: Whether to write the header row to the output csv file,
+          recommended to be True for the first run.
+        warmup: Whether to warm up the GPU before running the benchmark,
+          recommended to be True for the first run of a model.
+        settings: Any additional settings to pass to the pipeline, supports
+          any keyword parameters accepted by the model chosen. See HuggingFace
+          documentation on particular models for more details.
+    """
+    logger = get_logger()
+    logger.info("Running benchmark for model: " + model)
+    csv_handler = CsvHandler(result_file)
+    if write_header:
+        csv_handler.write_header(
+            [
+                "model",
+                "GPU",
+                "num_prompts",
+                "batch_size",
+                "clip_score",
+                "average_batch_latency(s)",
+                "throughput(image/s)",
+                "avg_energy(J)",
+                "peak_memory(GB)",
+            ]
+        )
+    set_seed(seed)
+    prompts, batched_prompts = load_prompts(benchmark_size, batch_size)
+    logger.info("Loaded prompts")
+    generator = torch.manual_seed(seed)
+    torch.cuda.set_device(device)
+    monitor = ZeusMonitor(gpu_indices=[torch.cuda.current_device()])
+    pipeline = get_pipeline(model, device=device, weight_dtype=weight_dtype)
+    if warmup:
+        gpu_warmup(pipeline)
+    torch.cuda.empty_cache()
+    gc.collect()
+    torch.cuda.reset_peak_memory_stats(device=device)
+    monitor.begin_window("generate")
+    images = []
+    for batch in batched_prompts:
+        image = pipeline(
+            batch, generator=generator, output_type="np", **settings
+        ).images
+        images.append(image)
+    images = np.concatenate(images)
+    result_monitor = monitor.end_window("generate")
+    peak_memory = torch.cuda.max_memory_allocated(device=device)
+    for saved_image, saved_prompt in zip(images[::10], prompts[::10]):
+        saved_image = (saved_image * 255).astype(np.uint8)
+        Image.fromarray(saved_image).save(images_path + saved_prompt + ".png")
+    clip_score = calculate_clip_score(images, prompts)
+    result = {
+        "model": model,
+        "GPU": torch.cuda.get_device_name(device),
+        "num_prompts": len(prompts),
+        "batch_size": batch_size,
+        "clip_score": clip_score,
+        "avg_batch_latency": result_monitor.time / (benchmark_size / batch_size),
+        "throughput": benchmark_size / result_monitor.time,
+        "avg_energy": result_monitor.total_energy / benchmark_size,
+        "peak_memory": peak_memory,
+    }
+    logger.info("Results for model " + model + ":")
+    logger.info(result)
+    csv_handler.write_results(result)
+    logger.info("Finished benchmarking for " + model)
+if __name__ == "__main__":
+    tyro.cli(benchmark)

scripts/diffusion/metrics.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from functools import partial
+from datasets import load_dataset
+import torch
+from torchmetrics.functional.multimodal import clip_score
+def load_prompts(num_prompts, batch_size):
+    """Generate prompts for CLIP Score metric.
+    Args:
+        num_prompts (int): number of prompts to generate.
+            If num_prompts == 0, returns all prompts instead.
+        batch_size (int): batch size for prompts
+    Returns:
+        A tuple (prompts, batched_prompts) where prompts is a list of prompts
+        of length num_prompts (if num_prompts != 0) or the list of all prompts
+        (if num_prompts == 0), and batched_prompts is the list of prompts,
+        batched into chunks of size batch_size each.
+    """
+    prompts = load_dataset("nateraw/parti-prompts", split="train")
+    if num_prompts == 0:
+        num_prompts = len(prompts)
+    else:
+        prompts = prompts.shuffle()
+    prompts = prompts[:num_prompts]["Prompt"]
+    batched_prompts = [
+        prompts[i : i + batch_size] for i in range(0, len(prompts), batch_size)
+    ]
+    if len(batched_prompts[-1]) < batch_size:
+        batched_prompts = batched_prompts[:-1]
+    prompts = [prompt for batch in batched_prompts for prompt in batch]
+    return prompts, batched_prompts
+def calculate_clip_score(images, prompts):
+    """Calculate CLIP Score metric.
+    Args:
+        images (np.ndarray): array of images
+        prompts (list): list of prompts, assumes same size as images
+    Returns:
+        The clip score across all images and prompts as a float.
+    """
+    clip_score_fn = partial(
+        clip_score, model_name_or_path="openai/clip-vit-base-patch16"
+    )
+    images_int = (images * 255).astype("uint8")
+    clip = clip_score_fn(
+        torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts
+    ).detach()
+    return float(clip)

scripts/diffusion/utils.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import csv
+import logging
+import sys
+def get_logger(
+    level: int = logging.INFO,
+    propagate: bool = False,
+) -> logging.Logger:
+    """Get a logger with the given name with some formatting configs."""
+    logger = logging.getLogger("diffusion-benchmarks")
+    logger.propagate = propagate
+    logger.setLevel(level)
+    if not len(logger.handlers):
+        formatter = logging.Formatter(
+            "[%(asctime)s] %(message)s", datefmt="%m-%d %H:%M:%S"
+        )
+        handler = logging.StreamHandler(sys.stdout)
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    return logger
+class CsvHandler:
+    def __init__(self, file_name, header=None):
+        self.file_name = file_name
+        self.header = header
+        self.file = None
+    def open_file(self):
+        self.file = open(self.file_name, mode="a", newline="\n", encoding="utf-8")
+        self.csv_writer = csv.writer(self.file)
+        if self.header:
+            self.csv_writer.writerow(self.header)
+        print(f"File '{self.file_name}' opened successfully for writing.")
+    def write_row(self, data):
+        self.csv_writer.writerow(data)
+    def close_file(self):
+        if self.file:
+            self.file.close()
+            print(f"File '{self.file_name}' closed successfully.")
+    def write_header(self, data):
+        self.open_file()
+        self.write_row(data)
+        self.close_file()
+    def write_results(self, result):
+        self.open_file()
+        self.write_row(list(result.values()))
+        self.close_file()

setup.py CHANGED Viewed

@@ -11,7 +11,19 @@ extras_require = {
         "gradio==3.39.0",
         "text_generation @ git+https://github.com/ml-energy/text_generation_energy@master",
     ],
-    "benchmark": ["zeus-ml", "fschat==0.2.23", "torch==2.0.1", "tyro", "rich"],
     "dev": ["pytest"],
 }

         "gradio==3.39.0",
         "text_generation @ git+https://github.com/ml-energy/text_generation_energy@master",
     ],
+    "benchmark": [
+        "zeus-ml",
+        "fschat==0.2.23",
+        "torch==2.0.1",
+        "tyro",
+        "rich",
+        "datasets==2.15.0",
+        "diffusers==0.23.1",
+        "transformers==4.35.2",
+        "accelerat==0.24.1",
+        "torchmetrics==1.2.0",
+        "pillow==10.1.0",
+    ],
     "dev": ["pytest"],
 }