Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on 30 days ago

Commit

735361b

•

1 Parent(s): 33c2dab

H100 results for SuperNova Medius and FCv1

Browse files

Files changed (3) hide show

results.py +28 -0
results_arcee_fcv1.py +28 -12
results_arcee_supernova_medius.py +29 -5

results.py CHANGED Viewed

@@ -76,6 +76,13 @@ instance_type_mappings = {
         "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
         "price": 10.493,
     },
     "g4dn.12xlarge": {
         "cloud": "AWS",
         "gpu": "4xNVIDIA T4",
@@ -104,6 +111,27 @@ instance_type_mappings = {
         "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
         "price": 98.32,
     },
     "c6i.xlarge": {
         "cloud": "AWS",
         "gpu": "-",

         "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
         "price": 10.493,
     },
+    "g6e.12xlarge (2 GPUs)": {
+        "cloud": "AWS",
+        "gpu": "2xNVIDIA L40S",
+        "gpuRAM": "96 GB",
+        "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
+        "price": 5.2465,
+    },
     "g4dn.12xlarge": {
         "cloud": "AWS",
         "gpu": "4xNVIDIA T4",
         "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
         "price": 98.32,
     },
+    "p5.48xlarge (4 GPUs)": {
+        "cloud": "AWS",
+        "gpu": "4xNVIDIA H100",
+        "gpuRAM": "320GB",
+        "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
+        "price": 49.16,
+    },
+    "p5.48xlarge (2 GPUs)": {
+        "cloud": "AWS",
+        "gpu": "2xNVIDIA H100",
+        "gpuRAM": "160GB",
+        "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
+        "price": 24.58,
+    },
+    "p5.48xlarge (1 GPU)": {
+        "cloud": "AWS",
+        "gpu": "1xNVIDIA H100",
+        "gpuRAM": "80GB",
+        "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
+        "price": 12.29,
+    },
     "c6i.xlarge": {
         "cloud": "AWS",
         "gpu": "-",

results_arcee_fcv1.py CHANGED Viewed

@@ -4,14 +4,6 @@ results_arcee_fcv1 = {
     "name": "Arcee-FCV1",
     "modelType": "Qwen2.5 32B",
     "configurations": [
-        {
-            "instanceType": "r8g.4xlarge",
-            "quantization": "Q8_0",
-            "container": "llama.cpp 11/27/24",
-            "status": "OK",
-            "tokensPerSecond": "xxx",
-            "notes": "-fa",
-        },
         {
             "instanceType": "r8g.4xlarge",
             "quantization": "Q4_0_4_8",
@@ -45,7 +37,7 @@ results_arcee_fcv1 = {
             "notes": "--tensor-parallel-size 4 --max-model-len 16384",
         },
         {
-            "instanceType": "g6e.12xlarge",
             "quantization": "None",
             "container": "vLLM 0.6.4.post1",
             "status": "OK",
@@ -63,10 +55,34 @@ results_arcee_fcv1 = {
         {
             "instanceType": "p5.48xlarge",
             "quantization": "None",
-            "container": "LMI 0.30+vLLM 0.6.2",
             "status": "N/A",
-            "tokensPerSecond": "N/A",
-            "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
         },
     ],
 }

     "name": "Arcee-FCV1",
     "modelType": "Qwen2.5 32B",
     "configurations": [
         {
             "instanceType": "r8g.4xlarge",
             "quantization": "Q4_0_4_8",
             "notes": "--tensor-parallel-size 4 --max-model-len 16384",
         },
         {
+            "instanceType": "g6e.12xlarge (2 GPUs)",
             "quantization": "None",
             "container": "vLLM 0.6.4.post1",
             "status": "OK",
         {
             "instanceType": "p5.48xlarge",
             "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
             "status": "N/A",
+            "tokensPerSecond": "117",
+            "notes": "--tensor-parallel-size 8",
+        },
+        {
+            "instanceType": "p5.48xlarge (4 GPUs)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "88",
+            "notes": "--tensor-parallel-size 4",
+        },
+        {
+            "instanceType": "p5.48xlarge (2 GPUs)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "58",
+            "notes": "--tensor-parallel-size 2",
+        },
+        {
+            "instanceType": "p5.48xlarge (1 GPU)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "38",
+            "notes": "--tensor-parallel-size 1",
         },
     ],
 }

results_arcee_supernova_medius.py CHANGED Viewed

@@ -45,7 +45,7 @@ results_arcee_supernova_medius = {
             "notes": "--tensor-parallel-size 4 --max-model-len 16384",
         },
         {
-            "instanceType": "g6e.12xlarge",
             "quantization": "None",
             "container": "vLLM 0.6.4.post1",
             "status": "OK",
@@ -63,10 +63,34 @@ results_arcee_supernova_medius = {
         {
             "instanceType": "p5.48xlarge",
             "quantization": "None",
-            "container": "LMI 0.30+vLLM 0.6.2",
-            "status": "N/A",
-            "tokensPerSecond": "N/A",
-            "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
         },
     ],
 }

             "notes": "--tensor-parallel-size 4 --max-model-len 16384",
         },
         {
+            "instanceType": "g6e.12xlarge (2 GPUs)",
             "quantization": "None",
             "container": "vLLM 0.6.4.post1",
             "status": "OK",
         {
             "instanceType": "p5.48xlarge",
             "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "162",
+            "notes": "--tensor-parallel-size 8",
+        },
+        {
+            "instanceType": "p5.48xlarge (4 GPUs)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "138",
+            "notes": "--tensor-parallel-size 4",
+        },
+        {
+            "instanceType": "p5.48xlarge (2 GPUs)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "102",
+            "notes": "--tensor-parallel-size 2",
+        },
+        {
+            "instanceType": "p5.48xlarge (1 GPU)",
+            "quantization": "None",
+            "container": "vLLM 0.6.4.post1",
+            "status": "OK",
+            "tokensPerSecond": "73",
+            "notes": "--tensor-parallel-size 1",
         },
     ],
 }