Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Nov 5

Commit

754d71d

•

1 Parent(s): 2056bdb

Add/update Graviton4 with SuperNova Lite

Browse files

Files changed (2) hide show

results.py +7 -0
results_llama_supernova_lite.py +42 -1

results.py CHANGED Viewed

@@ -158,6 +158,13 @@ instance_type_mappings = {
         "url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
         "price": 2.32,
     },
     "c8g.8xlarge": {
         "cloud": "AWS",
         "gpu": "None - Graviton4, 32 vCPUs",

         "url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
         "price": 2.32,
     },
+    "c8g.4xlarge": {
+        "cloud": "AWS",
+        "gpu": "None - Graviton4, 16 vCPUs",
+        "gpuRAM": "-",
+        "url": "https://instances.vantage.sh/aws/ec2/c8g.4xlarge",
+        "price": 0.6381,
+    },
     "c8g.8xlarge": {
         "cloud": "AWS",
         "gpu": "None - Graviton4, 32 vCPUs",

results_llama_supernova_lite.py CHANGED Viewed

@@ -4,6 +4,39 @@ results_llama_supernova_lite = {
     "name": "Llama-3.1-SuperNova-Lite",
     "modelType": "Llama 3.1 8B",
     "configurations": [
         {
             "instanceType": "c7g.8xlarge",
             "quantization": "Q4_0_8_8",
@@ -20,12 +53,20 @@ results_llama_supernova_lite = {
             "tokensPerSecond": "45.5",
             "notes": "",
         },
         {
             "instanceType": "r8g.4xlarge",
             "quantization": "Q4_0_4_8",
             "container": "llama.cpp 9/11/24",
             "status": "OK",
-            "tokensPerSecond": "49",
             "notes": "with Flash Attention",
         },
         {

     "name": "Llama-3.1-SuperNova-Lite",
     "modelType": "Llama 3.1 8B",
     "configurations": [
+        {
+            "instanceType": "c7i.4xlarge",
+            "configurations": [
+                {
+                    "quantization": "Q6_K",
+                    "container": "llama.cpp 10/18/24",
+                    "status": "OK",
+                    "tokensPerSecond": "xxx",
+                    "notes": "AMX enabled, Flash Attention enabled",
+                },
+                {
+                    "quantization": "Q5_K",
+                    "container": "llama.cpp 10/18/24",
+                    "status": "OK",
+                    "tokensPerSecond": "xxx",
+                    "notes": "AMX enabled, Flash Attention enabled",
+                },
+                {
+                    "quantization": "Q4_K",
+                    "container": "llama.cpp 10/18/24",
+                    "status": "OK",
+                    "tokensPerSecond": "xxx",
+                    "notes": "AMX enabled, Flash Attention enabled",
+                },
+                {
+                    "quantization": "IQ4_XS",
+                    "container": "llama.cpp 10/18/24",
+                    "status": "OK",
+                    "tokensPerSecond": "xxx",
+                    "notes": "AMX enabled, Flash Attention enabled",
+                },
+            ],
+        },
         {
             "instanceType": "c7g.8xlarge",
             "quantization": "Q4_0_8_8",
             "tokensPerSecond": "45.5",
             "notes": "",
         },
+        {
+            "instanceType": "c8g.4xlarge",
+            "quantization": "Q4_0_4_8",
+            "container": "llama.cpp 11/05/24",
+            "status": "OK",
+            "tokensPerSecond": "34",
+            "notes": "with Flash Attention",
+        },
         {
             "instanceType": "r8g.4xlarge",
             "quantization": "Q4_0_4_8",
             "container": "llama.cpp 9/11/24",
             "status": "OK",
+            "tokensPerSecond": "40",
             "notes": "with Flash Attention",
         },
         {