SchwarzXia commited on
Commit
56a3a83
1 Parent(s): 87d2c55

Support for benchmarking of diffusion models (#31)

Browse files
deployment/benchmark.Dockerfile CHANGED
@@ -37,6 +37,7 @@ RUN cd /root/.local/miniconda3/lib/python3.9/site-packages \
37
 
38
  # Where all the weights downloaded from Hugging Face Hub will go to
39
  ENV TRANSFORMERS_CACHE=/data/leaderboard/hfcache
 
40
 
41
  # So that docker exec container python scripts/benchmark.py will work
42
  WORKDIR /workspace/leaderboard
 
37
 
38
  # Where all the weights downloaded from Hugging Face Hub will go to
39
  ENV TRANSFORMERS_CACHE=/data/leaderboard/hfcache
40
+ ENV HF_HOME=/data/leaderboard/hfcache
41
 
42
  # So that docker exec container python scripts/benchmark.py will work
43
  WORKDIR /workspace/leaderboard
pegasus/benchmark_diffusion.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This YAML dictionary will expand into 8 (models) x 6 (batch sizes) = 48 job commands,
2
+ # where {{ model }} and {{ batch_size }} are filled in with all possible combinations.
3
+ # {{ gpu }} is defined in `hosts.yaml`, and will be filled in when Pegasus
4
+ # determines the specific node and gpu the generated job command will run on.
5
+ - command:
6
+ - docker exec leaderboard{{ gpu }} python scripts/diffusion/benchmark.py --model {{ model }} --batch_size {{ batch_size }} --warmup
7
+ model:
8
+ - runwayml/stable-diffusion-v1-5
9
+ - stabilityai/stable-diffusion-xl-base-1.0
10
+ - stabilityai/stable-diffusion-2-1
11
+ - prompthero/openjourney
12
+ - kakaobrain/karlo-v1-alpha
13
+ - kandinsky-community/kandinsky-2-2-decoder
14
+ - CompVis/ldm-text2im-large-256
15
+ - SimianLuo/LCM_Dreamshaper_v7
16
+ batch_size:
17
+ - 1
18
+ - 2
19
+ - 4
20
+ - 8
21
+ - 16
22
+ - 32
scripts/diffusion/benchmark.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import time
3
+
4
+ from diffusers import AutoPipelineForText2Image, DiffusionPipeline
5
+ import numpy as np
6
+ from PIL import Image
7
+ import torch
8
+ from transformers.trainer_utils import set_seed
9
+ import tyro
10
+ from zeus.monitor import ZeusMonitor
11
+
12
+ from utils import get_logger, CsvHandler
13
+ from metrics import load_prompts, calculate_clip_score
14
+
15
+ # default parameters
16
+ DEVICE = "cuda:0"
17
+ WEIGHT_DTYPE = torch.float16
18
+ SEED = 0
19
+ OUTPUT_FILE = "results.csv"
20
+ OUTPUT_IMAGES = "images/"
21
+
22
+
23
+ def get_pipeline(model, device=DEVICE, weight_dtype=WEIGHT_DTYPE):
24
+ try:
25
+ return AutoPipelineForText2Image.from_pretrained(
26
+ model, torch_dtype=weight_dtype, safety_checker=None
27
+ ).to(device)
28
+ except ValueError:
29
+ return DiffusionPipeline.from_pretrained(
30
+ model, torch_dtype=weight_dtype, safety_checker=None
31
+ ).to(device)
32
+
33
+
34
+ def gpu_warmup(pipeline):
35
+ """Warm up the GPU by running the given pipeline for 10 secs."""
36
+ logger = get_logger()
37
+ logger.info("Warming up GPU")
38
+ generator = torch.manual_seed(2)
39
+ timeout_start = time.time()
40
+ prompts, _ = load_prompts(1, 1)
41
+ while time.time() < timeout_start + 10:
42
+ _ = pipeline(
43
+ prompts, num_images_per_prompt=10, generator=generator, output_type="numpy"
44
+ ).images
45
+ logger.info("Finished warming up GPU")
46
+
47
+
48
+ def benchmark(
49
+ model: str,
50
+ benchmark_size: int = 0,
51
+ batch_size: int = 1,
52
+ result_file: str = OUTPUT_FILE,
53
+ images_path: str = OUTPUT_IMAGES,
54
+ device: str = DEVICE,
55
+ seed: int = SEED,
56
+ weight_dtype: torch.dtype = WEIGHT_DTYPE,
57
+ write_header: bool = False,
58
+ warmup: bool = False,
59
+ settings: dict = {},
60
+ ) -> None:
61
+ """Benchmarks given model with a set of parameters.
62
+
63
+ Args:
64
+ model: The name of the model to benchmark, as shown on HuggingFace.
65
+ benchmark_size: The number of prompts to benchmark on. If 0, benchmarks
66
+ the entire parti-prompts dataset.
67
+ batch_size: The size of each batch of prompts. When benchmarking, the
68
+ prompts are split into batches of this size, and prompts are fed into
69
+ the model in batches.
70
+ result_file: The path to the output csv file.
71
+ images_path: The path to the output images directory.
72
+ device: The device to run the benchmark on.
73
+ seed: The seed to use for the RNG.
74
+ weight_dtype: The weight dtype to use for the model.
75
+ write_header: Whether to write the header row to the output csv file,
76
+ recommended to be True for the first run.
77
+ warmup: Whether to warm up the GPU before running the benchmark,
78
+ recommended to be True for the first run of a model.
79
+ settings: Any additional settings to pass to the pipeline, supports
80
+ any keyword parameters accepted by the model chosen. See HuggingFace
81
+ documentation on particular models for more details.
82
+ """
83
+ logger = get_logger()
84
+ logger.info("Running benchmark for model: " + model)
85
+
86
+ csv_handler = CsvHandler(result_file)
87
+ if write_header:
88
+ csv_handler.write_header(
89
+ [
90
+ "model",
91
+ "GPU",
92
+ "num_prompts",
93
+ "batch_size",
94
+ "clip_score",
95
+ "average_batch_latency(s)",
96
+ "throughput(image/s)",
97
+ "avg_energy(J)",
98
+ "peak_memory(GB)",
99
+ ]
100
+ )
101
+
102
+ set_seed(seed)
103
+ prompts, batched_prompts = load_prompts(benchmark_size, batch_size)
104
+ logger.info("Loaded prompts")
105
+
106
+ generator = torch.manual_seed(seed)
107
+ torch.cuda.set_device(device)
108
+ monitor = ZeusMonitor(gpu_indices=[torch.cuda.current_device()])
109
+ pipeline = get_pipeline(model, device=device, weight_dtype=weight_dtype)
110
+
111
+ if warmup:
112
+ gpu_warmup(pipeline)
113
+
114
+ torch.cuda.empty_cache()
115
+ gc.collect()
116
+ torch.cuda.reset_peak_memory_stats(device=device)
117
+
118
+ monitor.begin_window("generate")
119
+ images = []
120
+ for batch in batched_prompts:
121
+ image = pipeline(
122
+ batch, generator=generator, output_type="np", **settings
123
+ ).images
124
+ images.append(image)
125
+ images = np.concatenate(images)
126
+ result_monitor = monitor.end_window("generate")
127
+
128
+ peak_memory = torch.cuda.max_memory_allocated(device=device)
129
+
130
+ for saved_image, saved_prompt in zip(images[::10], prompts[::10]):
131
+ saved_image = (saved_image * 255).astype(np.uint8)
132
+ Image.fromarray(saved_image).save(images_path + saved_prompt + ".png")
133
+
134
+ clip_score = calculate_clip_score(images, prompts)
135
+
136
+ result = {
137
+ "model": model,
138
+ "GPU": torch.cuda.get_device_name(device),
139
+ "num_prompts": len(prompts),
140
+ "batch_size": batch_size,
141
+ "clip_score": clip_score,
142
+ "avg_batch_latency": result_monitor.time / (benchmark_size / batch_size),
143
+ "throughput": benchmark_size / result_monitor.time,
144
+ "avg_energy": result_monitor.total_energy / benchmark_size,
145
+ "peak_memory": peak_memory,
146
+ }
147
+
148
+ logger.info("Results for model " + model + ":")
149
+ logger.info(result)
150
+
151
+ csv_handler.write_results(result)
152
+
153
+ logger.info("Finished benchmarking for " + model)
154
+
155
+
156
+ if __name__ == "__main__":
157
+ tyro.cli(benchmark)
scripts/diffusion/metrics.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ from datasets import load_dataset
4
+ import torch
5
+ from torchmetrics.functional.multimodal import clip_score
6
+
7
+
8
+ def load_prompts(num_prompts, batch_size):
9
+ """Generate prompts for CLIP Score metric.
10
+
11
+ Args:
12
+ num_prompts (int): number of prompts to generate.
13
+ If num_prompts == 0, returns all prompts instead.
14
+ batch_size (int): batch size for prompts
15
+
16
+ Returns:
17
+ A tuple (prompts, batched_prompts) where prompts is a list of prompts
18
+ of length num_prompts (if num_prompts != 0) or the list of all prompts
19
+ (if num_prompts == 0), and batched_prompts is the list of prompts,
20
+ batched into chunks of size batch_size each.
21
+ """
22
+ prompts = load_dataset("nateraw/parti-prompts", split="train")
23
+ if num_prompts == 0:
24
+ num_prompts = len(prompts)
25
+ else:
26
+ prompts = prompts.shuffle()
27
+ prompts = prompts[:num_prompts]["Prompt"]
28
+ batched_prompts = [
29
+ prompts[i : i + batch_size] for i in range(0, len(prompts), batch_size)
30
+ ]
31
+ if len(batched_prompts[-1]) < batch_size:
32
+ batched_prompts = batched_prompts[:-1]
33
+ prompts = [prompt for batch in batched_prompts for prompt in batch]
34
+ return prompts, batched_prompts
35
+
36
+
37
+ def calculate_clip_score(images, prompts):
38
+ """Calculate CLIP Score metric.
39
+
40
+ Args:
41
+ images (np.ndarray): array of images
42
+ prompts (list): list of prompts, assumes same size as images
43
+
44
+ Returns:
45
+ The clip score across all images and prompts as a float.
46
+ """
47
+ clip_score_fn = partial(
48
+ clip_score, model_name_or_path="openai/clip-vit-base-patch16"
49
+ )
50
+ images_int = (images * 255).astype("uint8")
51
+ clip = clip_score_fn(
52
+ torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts
53
+ ).detach()
54
+ return float(clip)
scripts/diffusion/utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import logging
3
+ import sys
4
+
5
+
6
+ def get_logger(
7
+ level: int = logging.INFO,
8
+ propagate: bool = False,
9
+ ) -> logging.Logger:
10
+ """Get a logger with the given name with some formatting configs."""
11
+ logger = logging.getLogger("diffusion-benchmarks")
12
+ logger.propagate = propagate
13
+ logger.setLevel(level)
14
+ if not len(logger.handlers):
15
+ formatter = logging.Formatter(
16
+ "[%(asctime)s] %(message)s", datefmt="%m-%d %H:%M:%S"
17
+ )
18
+ handler = logging.StreamHandler(sys.stdout)
19
+ handler.setFormatter(formatter)
20
+ logger.addHandler(handler)
21
+ return logger
22
+
23
+
24
+ class CsvHandler:
25
+ def __init__(self, file_name, header=None):
26
+ self.file_name = file_name
27
+ self.header = header
28
+ self.file = None
29
+
30
+ def open_file(self):
31
+ self.file = open(self.file_name, mode="a", newline="\n", encoding="utf-8")
32
+ self.csv_writer = csv.writer(self.file)
33
+
34
+ if self.header:
35
+ self.csv_writer.writerow(self.header)
36
+
37
+ print(f"File '{self.file_name}' opened successfully for writing.")
38
+
39
+ def write_row(self, data):
40
+ self.csv_writer.writerow(data)
41
+
42
+ def close_file(self):
43
+ if self.file:
44
+ self.file.close()
45
+ print(f"File '{self.file_name}' closed successfully.")
46
+
47
+ def write_header(self, data):
48
+ self.open_file()
49
+ self.write_row(data)
50
+ self.close_file()
51
+
52
+ def write_results(self, result):
53
+ self.open_file()
54
+ self.write_row(list(result.values()))
55
+ self.close_file()
setup.py CHANGED
@@ -11,7 +11,19 @@ extras_require = {
11
  "gradio==3.39.0",
12
  "text_generation @ git+https://github.com/ml-energy/text_generation_energy@master",
13
  ],
14
- "benchmark": ["zeus-ml", "fschat==0.2.23", "torch==2.0.1", "tyro", "rich"],
 
 
 
 
 
 
 
 
 
 
 
 
15
  "dev": ["pytest"],
16
  }
17
 
 
11
  "gradio==3.39.0",
12
  "text_generation @ git+https://github.com/ml-energy/text_generation_energy@master",
13
  ],
14
+ "benchmark": [
15
+ "zeus-ml",
16
+ "fschat==0.2.23",
17
+ "torch==2.0.1",
18
+ "tyro",
19
+ "rich",
20
+ "datasets==2.15.0",
21
+ "diffusers==0.23.1",
22
+ "transformers==4.35.2",
23
+ "accelerat==0.24.1",
24
+ "torchmetrics==1.2.0",
25
+ "pillow==10.1.0",
26
+ ],
27
  "dev": ["pytest"],
28
  }
29