Spaces:

optimum
/

llm-perf-leaderboard

Running

BenchmarkBot commited on Jun 26, 2023

Commit

2773294

1 Parent(s): 534ff40

remove bnb quantization

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,13 +12,13 @@ LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
 LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN")
-OLD_COLUMNS = ["model", "backend.name", "backend.torch_dtype", "backend.quantization",
                "generate.latency(s)", "generate.throughput(tokens/s)"]
-NEW_COLUMNS = ["Model", "Backend 🏭", "Load Datatype", "Quantization 🗜️",
                "Latency (s) ⬇️", "Throughput (tokens/s) ⬆️"]
-COLUMNS_DATATYPES = ["markdown", "str", "str", "str", "number", "number"]
 SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
@@ -34,6 +34,9 @@ def get_benchmark_df():
     df = pd.read_csv(
         "./llm-perf-dataset/reports/cuda_1_100/inference_report.csv")
     # preprocess
     df["model"] = df["model"].apply(make_clickable_model)

 LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN")
+OLD_COLUMNS = ["model", "backend.name", "backend.torch_dtype",
                "generate.latency(s)", "generate.throughput(tokens/s)"]
+NEW_COLUMNS = ["Model", "Backend 🏭", "Load Datatype",
                "Latency (s) ⬇️", "Throughput (tokens/s) ⬆️"]
+COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number"]
 SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
     df = pd.read_csv(
         "./llm-perf-dataset/reports/cuda_1_100/inference_report.csv")
+    # remove quantized models
+    df = df[df["backend.quantization"].notna()]
     # preprocess
     df["model"] = df["model"].apply(make_clickable_model)