Spaces:
Running
Running
Support for benchmarking of diffusion models (#31)
Browse files- deployment/benchmark.Dockerfile +1 -0
- pegasus/benchmark_diffusion.yaml +22 -0
- scripts/diffusion/benchmark.py +157 -0
- scripts/diffusion/metrics.py +54 -0
- scripts/diffusion/utils.py +55 -0
- setup.py +13 -1
deployment/benchmark.Dockerfile
CHANGED
@@ -37,6 +37,7 @@ RUN cd /root/.local/miniconda3/lib/python3.9/site-packages \
|
|
37 |
|
38 |
# Where all the weights downloaded from Hugging Face Hub will go to
|
39 |
ENV TRANSFORMERS_CACHE=/data/leaderboard/hfcache
|
|
|
40 |
|
41 |
# So that docker exec container python scripts/benchmark.py will work
|
42 |
WORKDIR /workspace/leaderboard
|
|
|
37 |
|
38 |
# Where all the weights downloaded from Hugging Face Hub will go to
|
39 |
ENV TRANSFORMERS_CACHE=/data/leaderboard/hfcache
|
40 |
+
ENV HF_HOME=/data/leaderboard/hfcache
|
41 |
|
42 |
# So that docker exec container python scripts/benchmark.py will work
|
43 |
WORKDIR /workspace/leaderboard
|
pegasus/benchmark_diffusion.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This YAML dictionary will expand into 8 (models) x 6 (batch sizes) = 48 job commands,
|
2 |
+
# where {{ model }} and {{ batch_size }} are filled in with all possible combinations.
|
3 |
+
# {{ gpu }} is defined in `hosts.yaml`, and will be filled in when Pegasus
|
4 |
+
# determines the specific node and gpu the generated job command will run on.
|
5 |
+
- command:
|
6 |
+
- docker exec leaderboard{{ gpu }} python scripts/diffusion/benchmark.py --model {{ model }} --batch_size {{ batch_size }} --warmup
|
7 |
+
model:
|
8 |
+
- runwayml/stable-diffusion-v1-5
|
9 |
+
- stabilityai/stable-diffusion-xl-base-1.0
|
10 |
+
- stabilityai/stable-diffusion-2-1
|
11 |
+
- prompthero/openjourney
|
12 |
+
- kakaobrain/karlo-v1-alpha
|
13 |
+
- kandinsky-community/kandinsky-2-2-decoder
|
14 |
+
- CompVis/ldm-text2im-large-256
|
15 |
+
- SimianLuo/LCM_Dreamshaper_v7
|
16 |
+
batch_size:
|
17 |
+
- 1
|
18 |
+
- 2
|
19 |
+
- 4
|
20 |
+
- 8
|
21 |
+
- 16
|
22 |
+
- 32
|
scripts/diffusion/benchmark.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gc
|
2 |
+
import time
|
3 |
+
|
4 |
+
from diffusers import AutoPipelineForText2Image, DiffusionPipeline
|
5 |
+
import numpy as np
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from transformers.trainer_utils import set_seed
|
9 |
+
import tyro
|
10 |
+
from zeus.monitor import ZeusMonitor
|
11 |
+
|
12 |
+
from utils import get_logger, CsvHandler
|
13 |
+
from metrics import load_prompts, calculate_clip_score
|
14 |
+
|
15 |
+
# default parameters
|
16 |
+
DEVICE = "cuda:0"
|
17 |
+
WEIGHT_DTYPE = torch.float16
|
18 |
+
SEED = 0
|
19 |
+
OUTPUT_FILE = "results.csv"
|
20 |
+
OUTPUT_IMAGES = "images/"
|
21 |
+
|
22 |
+
|
23 |
+
def get_pipeline(model, device=DEVICE, weight_dtype=WEIGHT_DTYPE):
|
24 |
+
try:
|
25 |
+
return AutoPipelineForText2Image.from_pretrained(
|
26 |
+
model, torch_dtype=weight_dtype, safety_checker=None
|
27 |
+
).to(device)
|
28 |
+
except ValueError:
|
29 |
+
return DiffusionPipeline.from_pretrained(
|
30 |
+
model, torch_dtype=weight_dtype, safety_checker=None
|
31 |
+
).to(device)
|
32 |
+
|
33 |
+
|
34 |
+
def gpu_warmup(pipeline):
|
35 |
+
"""Warm up the GPU by running the given pipeline for 10 secs."""
|
36 |
+
logger = get_logger()
|
37 |
+
logger.info("Warming up GPU")
|
38 |
+
generator = torch.manual_seed(2)
|
39 |
+
timeout_start = time.time()
|
40 |
+
prompts, _ = load_prompts(1, 1)
|
41 |
+
while time.time() < timeout_start + 10:
|
42 |
+
_ = pipeline(
|
43 |
+
prompts, num_images_per_prompt=10, generator=generator, output_type="numpy"
|
44 |
+
).images
|
45 |
+
logger.info("Finished warming up GPU")
|
46 |
+
|
47 |
+
|
48 |
+
def benchmark(
|
49 |
+
model: str,
|
50 |
+
benchmark_size: int = 0,
|
51 |
+
batch_size: int = 1,
|
52 |
+
result_file: str = OUTPUT_FILE,
|
53 |
+
images_path: str = OUTPUT_IMAGES,
|
54 |
+
device: str = DEVICE,
|
55 |
+
seed: int = SEED,
|
56 |
+
weight_dtype: torch.dtype = WEIGHT_DTYPE,
|
57 |
+
write_header: bool = False,
|
58 |
+
warmup: bool = False,
|
59 |
+
settings: dict = {},
|
60 |
+
) -> None:
|
61 |
+
"""Benchmarks given model with a set of parameters.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
model: The name of the model to benchmark, as shown on HuggingFace.
|
65 |
+
benchmark_size: The number of prompts to benchmark on. If 0, benchmarks
|
66 |
+
the entire parti-prompts dataset.
|
67 |
+
batch_size: The size of each batch of prompts. When benchmarking, the
|
68 |
+
prompts are split into batches of this size, and prompts are fed into
|
69 |
+
the model in batches.
|
70 |
+
result_file: The path to the output csv file.
|
71 |
+
images_path: The path to the output images directory.
|
72 |
+
device: The device to run the benchmark on.
|
73 |
+
seed: The seed to use for the RNG.
|
74 |
+
weight_dtype: The weight dtype to use for the model.
|
75 |
+
write_header: Whether to write the header row to the output csv file,
|
76 |
+
recommended to be True for the first run.
|
77 |
+
warmup: Whether to warm up the GPU before running the benchmark,
|
78 |
+
recommended to be True for the first run of a model.
|
79 |
+
settings: Any additional settings to pass to the pipeline, supports
|
80 |
+
any keyword parameters accepted by the model chosen. See HuggingFace
|
81 |
+
documentation on particular models for more details.
|
82 |
+
"""
|
83 |
+
logger = get_logger()
|
84 |
+
logger.info("Running benchmark for model: " + model)
|
85 |
+
|
86 |
+
csv_handler = CsvHandler(result_file)
|
87 |
+
if write_header:
|
88 |
+
csv_handler.write_header(
|
89 |
+
[
|
90 |
+
"model",
|
91 |
+
"GPU",
|
92 |
+
"num_prompts",
|
93 |
+
"batch_size",
|
94 |
+
"clip_score",
|
95 |
+
"average_batch_latency(s)",
|
96 |
+
"throughput(image/s)",
|
97 |
+
"avg_energy(J)",
|
98 |
+
"peak_memory(GB)",
|
99 |
+
]
|
100 |
+
)
|
101 |
+
|
102 |
+
set_seed(seed)
|
103 |
+
prompts, batched_prompts = load_prompts(benchmark_size, batch_size)
|
104 |
+
logger.info("Loaded prompts")
|
105 |
+
|
106 |
+
generator = torch.manual_seed(seed)
|
107 |
+
torch.cuda.set_device(device)
|
108 |
+
monitor = ZeusMonitor(gpu_indices=[torch.cuda.current_device()])
|
109 |
+
pipeline = get_pipeline(model, device=device, weight_dtype=weight_dtype)
|
110 |
+
|
111 |
+
if warmup:
|
112 |
+
gpu_warmup(pipeline)
|
113 |
+
|
114 |
+
torch.cuda.empty_cache()
|
115 |
+
gc.collect()
|
116 |
+
torch.cuda.reset_peak_memory_stats(device=device)
|
117 |
+
|
118 |
+
monitor.begin_window("generate")
|
119 |
+
images = []
|
120 |
+
for batch in batched_prompts:
|
121 |
+
image = pipeline(
|
122 |
+
batch, generator=generator, output_type="np", **settings
|
123 |
+
).images
|
124 |
+
images.append(image)
|
125 |
+
images = np.concatenate(images)
|
126 |
+
result_monitor = monitor.end_window("generate")
|
127 |
+
|
128 |
+
peak_memory = torch.cuda.max_memory_allocated(device=device)
|
129 |
+
|
130 |
+
for saved_image, saved_prompt in zip(images[::10], prompts[::10]):
|
131 |
+
saved_image = (saved_image * 255).astype(np.uint8)
|
132 |
+
Image.fromarray(saved_image).save(images_path + saved_prompt + ".png")
|
133 |
+
|
134 |
+
clip_score = calculate_clip_score(images, prompts)
|
135 |
+
|
136 |
+
result = {
|
137 |
+
"model": model,
|
138 |
+
"GPU": torch.cuda.get_device_name(device),
|
139 |
+
"num_prompts": len(prompts),
|
140 |
+
"batch_size": batch_size,
|
141 |
+
"clip_score": clip_score,
|
142 |
+
"avg_batch_latency": result_monitor.time / (benchmark_size / batch_size),
|
143 |
+
"throughput": benchmark_size / result_monitor.time,
|
144 |
+
"avg_energy": result_monitor.total_energy / benchmark_size,
|
145 |
+
"peak_memory": peak_memory,
|
146 |
+
}
|
147 |
+
|
148 |
+
logger.info("Results for model " + model + ":")
|
149 |
+
logger.info(result)
|
150 |
+
|
151 |
+
csv_handler.write_results(result)
|
152 |
+
|
153 |
+
logger.info("Finished benchmarking for " + model)
|
154 |
+
|
155 |
+
|
156 |
+
if __name__ == "__main__":
|
157 |
+
tyro.cli(benchmark)
|
scripts/diffusion/metrics.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import partial
|
2 |
+
|
3 |
+
from datasets import load_dataset
|
4 |
+
import torch
|
5 |
+
from torchmetrics.functional.multimodal import clip_score
|
6 |
+
|
7 |
+
|
8 |
+
def load_prompts(num_prompts, batch_size):
|
9 |
+
"""Generate prompts for CLIP Score metric.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
num_prompts (int): number of prompts to generate.
|
13 |
+
If num_prompts == 0, returns all prompts instead.
|
14 |
+
batch_size (int): batch size for prompts
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
A tuple (prompts, batched_prompts) where prompts is a list of prompts
|
18 |
+
of length num_prompts (if num_prompts != 0) or the list of all prompts
|
19 |
+
(if num_prompts == 0), and batched_prompts is the list of prompts,
|
20 |
+
batched into chunks of size batch_size each.
|
21 |
+
"""
|
22 |
+
prompts = load_dataset("nateraw/parti-prompts", split="train")
|
23 |
+
if num_prompts == 0:
|
24 |
+
num_prompts = len(prompts)
|
25 |
+
else:
|
26 |
+
prompts = prompts.shuffle()
|
27 |
+
prompts = prompts[:num_prompts]["Prompt"]
|
28 |
+
batched_prompts = [
|
29 |
+
prompts[i : i + batch_size] for i in range(0, len(prompts), batch_size)
|
30 |
+
]
|
31 |
+
if len(batched_prompts[-1]) < batch_size:
|
32 |
+
batched_prompts = batched_prompts[:-1]
|
33 |
+
prompts = [prompt for batch in batched_prompts for prompt in batch]
|
34 |
+
return prompts, batched_prompts
|
35 |
+
|
36 |
+
|
37 |
+
def calculate_clip_score(images, prompts):
|
38 |
+
"""Calculate CLIP Score metric.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
images (np.ndarray): array of images
|
42 |
+
prompts (list): list of prompts, assumes same size as images
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
The clip score across all images and prompts as a float.
|
46 |
+
"""
|
47 |
+
clip_score_fn = partial(
|
48 |
+
clip_score, model_name_or_path="openai/clip-vit-base-patch16"
|
49 |
+
)
|
50 |
+
images_int = (images * 255).astype("uint8")
|
51 |
+
clip = clip_score_fn(
|
52 |
+
torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts
|
53 |
+
).detach()
|
54 |
+
return float(clip)
|
scripts/diffusion/utils.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import logging
|
3 |
+
import sys
|
4 |
+
|
5 |
+
|
6 |
+
def get_logger(
|
7 |
+
level: int = logging.INFO,
|
8 |
+
propagate: bool = False,
|
9 |
+
) -> logging.Logger:
|
10 |
+
"""Get a logger with the given name with some formatting configs."""
|
11 |
+
logger = logging.getLogger("diffusion-benchmarks")
|
12 |
+
logger.propagate = propagate
|
13 |
+
logger.setLevel(level)
|
14 |
+
if not len(logger.handlers):
|
15 |
+
formatter = logging.Formatter(
|
16 |
+
"[%(asctime)s] %(message)s", datefmt="%m-%d %H:%M:%S"
|
17 |
+
)
|
18 |
+
handler = logging.StreamHandler(sys.stdout)
|
19 |
+
handler.setFormatter(formatter)
|
20 |
+
logger.addHandler(handler)
|
21 |
+
return logger
|
22 |
+
|
23 |
+
|
24 |
+
class CsvHandler:
|
25 |
+
def __init__(self, file_name, header=None):
|
26 |
+
self.file_name = file_name
|
27 |
+
self.header = header
|
28 |
+
self.file = None
|
29 |
+
|
30 |
+
def open_file(self):
|
31 |
+
self.file = open(self.file_name, mode="a", newline="\n", encoding="utf-8")
|
32 |
+
self.csv_writer = csv.writer(self.file)
|
33 |
+
|
34 |
+
if self.header:
|
35 |
+
self.csv_writer.writerow(self.header)
|
36 |
+
|
37 |
+
print(f"File '{self.file_name}' opened successfully for writing.")
|
38 |
+
|
39 |
+
def write_row(self, data):
|
40 |
+
self.csv_writer.writerow(data)
|
41 |
+
|
42 |
+
def close_file(self):
|
43 |
+
if self.file:
|
44 |
+
self.file.close()
|
45 |
+
print(f"File '{self.file_name}' closed successfully.")
|
46 |
+
|
47 |
+
def write_header(self, data):
|
48 |
+
self.open_file()
|
49 |
+
self.write_row(data)
|
50 |
+
self.close_file()
|
51 |
+
|
52 |
+
def write_results(self, result):
|
53 |
+
self.open_file()
|
54 |
+
self.write_row(list(result.values()))
|
55 |
+
self.close_file()
|
setup.py
CHANGED
@@ -11,7 +11,19 @@ extras_require = {
|
|
11 |
"gradio==3.39.0",
|
12 |
"text_generation @ git+https://github.com/ml-energy/text_generation_energy@master",
|
13 |
],
|
14 |
-
"benchmark": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
"dev": ["pytest"],
|
16 |
}
|
17 |
|
|
|
11 |
"gradio==3.39.0",
|
12 |
"text_generation @ git+https://github.com/ml-energy/text_generation_energy@master",
|
13 |
],
|
14 |
+
"benchmark": [
|
15 |
+
"zeus-ml",
|
16 |
+
"fschat==0.2.23",
|
17 |
+
"torch==2.0.1",
|
18 |
+
"tyro",
|
19 |
+
"rich",
|
20 |
+
"datasets==2.15.0",
|
21 |
+
"diffusers==0.23.1",
|
22 |
+
"transformers==4.35.2",
|
23 |
+
"accelerat==0.24.1",
|
24 |
+
"torchmetrics==1.2.0",
|
25 |
+
"pillow==10.1.0",
|
26 |
+
],
|
27 |
"dev": ["pytest"],
|
28 |
}
|
29 |
|