|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import datetime |
|
import io |
|
import itertools |
|
import json |
|
import math |
|
import os |
|
import platform |
|
import re |
|
import shlex |
|
import subprocess |
|
import sys |
|
from pathlib import Path |
|
from statistics import fmean |
|
|
|
import pandas as pd |
|
import torch |
|
from tqdm import tqdm |
|
|
|
import transformers |
|
|
|
|
|
nan = float("nan") |
|
|
|
|
|
class Tee: |
|
""" |
|
A helper class to tee print's output into a file. |
|
Usage: |
|
sys.stdout = Tee(filename) |
|
""" |
|
|
|
def __init__(self, filename): |
|
self.stdout = sys.stdout |
|
self.file = open(filename, "a") |
|
|
|
def __getattr__(self, attr): |
|
return getattr(self.stdout, attr) |
|
|
|
def write(self, msg): |
|
self.stdout.write(msg) |
|
|
|
self.file.write(re.sub(r"^.*\r", "", msg, 0, re.M)) |
|
|
|
|
|
def get_original_command(max_width=80, full_python_path=False): |
|
""" |
|
Return the original command line string that can be replayed nicely and wrapped for 80 char width. |
|
|
|
Args: |
|
max_width (`int`, `optional`, defaults to 80): |
|
The width to wrap for. |
|
full_python_path (`bool`, `optional`, defaults to `False`): |
|
Whether to replicate the full path or just the last segment (i.e. `python`). |
|
""" |
|
|
|
cmd = [] |
|
|
|
|
|
env_keys = ["CUDA_VISIBLE_DEVICES"] |
|
for key in env_keys: |
|
val = os.environ.get(key, None) |
|
if val is not None: |
|
cmd.append(f"{key}={val}") |
|
|
|
|
|
python = sys.executable if full_python_path else sys.executable.split("/")[-1] |
|
cmd.append(python) |
|
|
|
|
|
cmd += list(map(shlex.quote, sys.argv)) |
|
|
|
|
|
lines = [] |
|
current_line = "" |
|
while len(cmd) > 0: |
|
current_line += f"{cmd.pop(0)} " |
|
if len(cmd) == 0 or len(current_line) + len(cmd[0]) + 1 > max_width - 1: |
|
lines.append(current_line) |
|
current_line = "" |
|
return "\\\n".join(lines) |
|
|
|
|
|
def get_base_command(args, output_dir): |
|
|
|
|
|
args.base_cmd = re.sub(r"[\\\n]+", " ", args.base_cmd) |
|
|
|
|
|
args.base_cmd = re.sub("--output_dir\s+[^\s]+", "", args.base_cmd) |
|
args.base_cmd += f" --output_dir {output_dir}" |
|
|
|
|
|
args.base_cmd = re.sub("--overwrite_output_dir\s+", "", args.base_cmd) |
|
args.base_cmd += " --overwrite_output_dir" |
|
|
|
return [sys.executable] + shlex.split(args.base_cmd) |
|
|
|
|
|
def process_run_single(id, cmd, variation, output_dir, target_metric_key, metric_keys, verbose): |
|
|
|
|
|
|
|
|
|
if 0: |
|
import random |
|
from time import sleep |
|
|
|
sleep(0) |
|
return dict( |
|
{k: random.uniform(0, 100) for k in metric_keys}, |
|
**{target_metric_key: random.choice([nan, 10.31, 100.2, 55.6666, 222.22222222])}, |
|
) |
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True) |
|
|
|
if verbose: |
|
print("STDOUT", result.stdout) |
|
print("STDERR", result.stderr) |
|
|
|
|
|
prefix = variation.replace(" ", "-") |
|
with open(Path(output_dir) / f"log.{prefix}.stdout.txt", "w") as f: |
|
f.write(result.stdout) |
|
with open(Path(output_dir) / f"log.{prefix}.stderr.txt", "w") as f: |
|
f.write(result.stderr) |
|
|
|
if result.returncode != 0: |
|
if verbose: |
|
print("failed") |
|
return {target_metric_key: nan} |
|
|
|
with io.open(f"{output_dir}/all_results.json", "r", encoding="utf-8") as f: |
|
metrics = json.load(f) |
|
|
|
|
|
return {k: v for k, v in metrics.items() if k in metric_keys} |
|
|
|
|
|
def process_run( |
|
id, |
|
cmd, |
|
variation_key, |
|
variation, |
|
longest_variation_len, |
|
target_metric_key, |
|
report_metric_keys, |
|
repeat_times, |
|
output_dir, |
|
verbose, |
|
): |
|
results = [] |
|
metrics = [] |
|
preamble = f"{id}: {variation:<{longest_variation_len}}" |
|
outcome = f"{preamble}: " |
|
metric_keys = set(report_metric_keys + [target_metric_key]) |
|
for i in tqdm(range(repeat_times), desc=preamble, leave=False): |
|
single_run_metrics = process_run_single( |
|
id, cmd, variation, output_dir, target_metric_key, metric_keys, verbose |
|
) |
|
result = single_run_metrics[target_metric_key] |
|
if not math.isnan(result): |
|
metrics.append(single_run_metrics) |
|
results.append(result) |
|
outcome += "✓" |
|
else: |
|
outcome += "✘" |
|
outcome = f"\33[2K\r{outcome}" |
|
if len(metrics) > 0: |
|
mean_metrics = {k: fmean([x[k] for x in metrics]) for k in metrics[0].keys()} |
|
mean_target = round(mean_metrics[target_metric_key], 2) |
|
results_str = f"{outcome} {mean_target}" |
|
if len(metrics) > 1: |
|
results_str += f" {tuple(round(x, 2) for x in results)}" |
|
print(results_str) |
|
mean_metrics[variation_key] = variation |
|
return mean_metrics |
|
else: |
|
print(outcome) |
|
return {variation_key: variation, target_metric_key: nan} |
|
|
|
|
|
def get_versions(): |
|
properties = torch.cuda.get_device_properties(torch.device("cuda")) |
|
return f""" |
|
Datetime : {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} |
|
|
|
Software: |
|
transformers: {transformers.__version__} |
|
torch : {torch.__version__} |
|
cuda : {torch.version.cuda} |
|
python : {platform.python_version()} |
|
|
|
Hardware: |
|
{torch.cuda.device_count()} GPUs : {properties.name}, {properties.total_memory/2**30:0.2f}GB |
|
""" |
|
|
|
|
|
def process_results(results, target_metric_key, report_metric_keys, base_variation, output_dir): |
|
|
|
df = pd.DataFrame(results) |
|
variation_key = "variation" |
|
diff_key = "diff_%" |
|
|
|
sentinel_value = nan |
|
if base_variation is not None and len(df[df[variation_key] == base_variation]): |
|
|
|
sentinel_value = df.loc[df[variation_key] == base_variation][target_metric_key].item() |
|
if math.isnan(sentinel_value): |
|
|
|
sentinel_value = df.loc[df[target_metric_key] != nan][target_metric_key].min() |
|
|
|
|
|
if not math.isnan(sentinel_value): |
|
df[diff_key] = df.apply( |
|
lambda r: round(100 * (r[target_metric_key] - sentinel_value) / sentinel_value) |
|
if not math.isnan(r[target_metric_key]) |
|
else 0, |
|
axis="columns", |
|
) |
|
|
|
|
|
cols = [variation_key, target_metric_key, diff_key, *report_metric_keys] |
|
df = df.reindex(cols, axis="columns") |
|
|
|
|
|
df = df.rename(str.capitalize, axis="columns") |
|
|
|
|
|
df_github = df.rename(lambda c: c.replace("_", "<br>"), axis="columns") |
|
df_console = df.rename(lambda c: c.replace("_", "\n"), axis="columns") |
|
|
|
report = ["", "Copy between the cut-here-lines and paste as is to github or a forum"] |
|
report += ["----------8<-----------------8<--------"] |
|
report += ["*** Results:", df_github.to_markdown(index=False, floatfmt=".2f")] |
|
report += ["```"] |
|
report += ["*** Setup:", get_versions()] |
|
report += ["*** The benchmark command line was:", get_original_command()] |
|
report += ["```"] |
|
report += ["----------8<-----------------8<--------"] |
|
report += ["*** Results (console):", df_console.to_markdown(index=False, floatfmt=".2f")] |
|
|
|
print("\n\n".join(report)) |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--base-cmd", |
|
default=None, |
|
type=str, |
|
required=True, |
|
help="Base cmd", |
|
) |
|
parser.add_argument( |
|
"--variations", |
|
default=None, |
|
type=str, |
|
nargs="+", |
|
required=True, |
|
help="Multi-dimensional variations, example: '|--fp16|--bf16' '|--tf32'", |
|
) |
|
parser.add_argument( |
|
"--base-variation", |
|
default=None, |
|
type=str, |
|
help="Baseline variation to compare to. if None the minimal target value will be used to compare against", |
|
) |
|
parser.add_argument( |
|
"--target-metric-key", |
|
default=None, |
|
type=str, |
|
required=True, |
|
help="Target metric key in output_dir/all_results.json, e.g., train_samples_per_second", |
|
) |
|
parser.add_argument( |
|
"--report-metric-keys", |
|
default="", |
|
type=str, |
|
help="Report metric keys - other metric keys from output_dir/all_results.json to report, e.g., train_loss. Use a single argument e.g., 'train_loss train_samples", |
|
) |
|
parser.add_argument( |
|
"--repeat-times", |
|
default=1, |
|
type=int, |
|
help="How many times to re-run each variation - an average will be reported", |
|
) |
|
parser.add_argument( |
|
"--output_dir", |
|
default="output_benchmark", |
|
type=str, |
|
help="The output directory where all the benchmark reports will go to and additionally this directory will be used to override --output_dir in the script that is being benchmarked", |
|
) |
|
parser.add_argument( |
|
"--verbose", |
|
default=False, |
|
action="store_true", |
|
help="Whether to show the outputs of each run or just the benchmark progress", |
|
) |
|
args = parser.parse_args() |
|
|
|
output_dir = args.output_dir |
|
Path(output_dir).mkdir(exist_ok=True) |
|
base_cmd = get_base_command(args, output_dir) |
|
|
|
|
|
dims = [list(map(str.strip, re.split(r"\|", x))) for x in args.variations] |
|
|
|
|
|
variations = list(map(str.strip, map(" ".join, itertools.product(*dims)))) |
|
longest_variation_len = max(len(x) for x in variations) |
|
|
|
|
|
report_metric_keys = args.report_metric_keys.split() |
|
|
|
|
|
report_fn = f"benchmark-report-{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.txt" |
|
print(f"\nNote: each run's output is also logged under {output_dir}/log.*.std*.txt") |
|
print(f"and this script's output is also piped into {report_fn}") |
|
|
|
sys.stdout = Tee(report_fn) |
|
|
|
print(f"\n*** Running {len(variations)} benchmarks:") |
|
print(f"Base command: {' '.join(base_cmd)}") |
|
|
|
variation_key = "variation" |
|
results = [] |
|
for id, variation in enumerate(tqdm(variations, desc="Total completion: ", leave=False)): |
|
cmd = base_cmd + variation.split() |
|
results.append( |
|
process_run( |
|
id + 1, |
|
cmd, |
|
variation_key, |
|
variation, |
|
longest_variation_len, |
|
args.target_metric_key, |
|
report_metric_keys, |
|
args.repeat_times, |
|
output_dir, |
|
args.verbose, |
|
) |
|
) |
|
|
|
process_results(results, args.target_metric_key, report_metric_keys, args.base_variation, output_dir) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|