Spaces:
Running
Running
File size: 3,191 Bytes
2fcb72a c113723 2fcb72a c113723 2fcb72a c5bc8e4 2fcb72a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
from typing import List
import pandas as pd
DATASET_DIRECTORY = "dataset"
# COLUMNS_MAPPING = {
# "config.name": "Quantization",
# "config.backend.model": "Model",
# # primary measurements
# "report.prefill.throughput.value": "Prefill (tokens/s)",
# "report.decode.throughput.value": "Decode (tokens/s)",
# "report.memory": "Model Size (GB)",
# # deployment settings
# "config.backend.name": "Backend",
# "quantization": "Quantization",
# # additional information
# "#Params (B)": "Params (B)",
# }
SORTING_COLUMNS = ["Model Size (GB)", "Decode (tokens/s)", "Prefill (tokens/s)", "MMLU Accuracy"]
SORTING_ASCENDING = [False, True, True, True]
def get_raw_llm_perf_df(
machine: str, backends: List[str], hardware_type: str
):
dfs = []
try:
dfs.append(
pd.read_csv("/Users/arnavchavan/leaderboard/benchmark_results_with_mmlu.csv")
# pd.read_csv(
# f"hf://datasets/nyunai/edge-llm-leaderboard/perf-df-{hardware_type}-{machine}-{backends}.csv"
# )
)
except Exception:
print("Dataset not found for:")
print(f" • Machine: {machine}")
print(f" • Hardware Type: {hardware_type}")
url = f"https://huggingface.co/datasets/nyunai/edge-llm-leaderboard/blob/main/perf-df-{hardware_type}-{machine}-{backends}.csv"
print(f" • URL: {url}")
if len(dfs) == 0:
raise ValueError(
f"No datasets found for machine {machine}, check your hardware.yml config file or your datatset on huggingface"
)
perf_df = pd.concat(dfs)
# llm_df = pd.read_csv(
# "hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv"
# )
# llm_perf_df = pd.merge(
# llm_df, perf_df, left_on="Model", right_on="config.backend.model"
# )
return perf_df
def processed_llm_perf_df(llm_perf_df):
# llm_perf_df["architecture"] = llm_perf_df["config.backend.model"].apply(
# process_architectures
# )
# round numerical columns
llm_perf_df = llm_perf_df.round(
{
"Prefill (tokens/s)": 3,
"Decode (tokens/s)": 3,
"Model Size (GB)": 3,
"#Params (B)": 3,
"MMLU Accuracy": 1,
}
)
# sort by metric
llm_perf_df.sort_values(
by=SORTING_COLUMNS,
ascending=SORTING_ASCENDING,
inplace=True,
)
return llm_perf_df
def get_llm_perf_df(
machine: str, backends: List[str], hardware_type: str
):
if not os.path.exists(DATASET_DIRECTORY):
os.makedirs(DATASET_DIRECTORY)
if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
llm_perf_df = pd.read_csv(
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"
)
else:
print(f"Dataset machine {machine} not found, downloading...")
llm_perf_df = get_raw_llm_perf_df(machine, backends, hardware_type)
llm_perf_df = processed_llm_perf_df(llm_perf_df)
llm_perf_df.to_csv(
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False
)
return llm_perf_df
|