Spaces:
Running
Running
Julien Simon
commited on
Commit
•
754d71d
1
Parent(s):
2056bdb
Add/update Graviton4 with SuperNova Lite
Browse files- results.py +7 -0
- results_llama_supernova_lite.py +42 -1
results.py
CHANGED
@@ -158,6 +158,13 @@ instance_type_mappings = {
|
|
158 |
"url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
|
159 |
"price": 2.32,
|
160 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
"c8g.8xlarge": {
|
162 |
"cloud": "AWS",
|
163 |
"gpu": "None - Graviton4, 32 vCPUs",
|
|
|
158 |
"url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
|
159 |
"price": 2.32,
|
160 |
},
|
161 |
+
"c8g.4xlarge": {
|
162 |
+
"cloud": "AWS",
|
163 |
+
"gpu": "None - Graviton4, 16 vCPUs",
|
164 |
+
"gpuRAM": "-",
|
165 |
+
"url": "https://instances.vantage.sh/aws/ec2/c8g.4xlarge",
|
166 |
+
"price": 0.6381,
|
167 |
+
},
|
168 |
"c8g.8xlarge": {
|
169 |
"cloud": "AWS",
|
170 |
"gpu": "None - Graviton4, 32 vCPUs",
|
results_llama_supernova_lite.py
CHANGED
@@ -4,6 +4,39 @@ results_llama_supernova_lite = {
|
|
4 |
"name": "Llama-3.1-SuperNova-Lite",
|
5 |
"modelType": "Llama 3.1 8B",
|
6 |
"configurations": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
{
|
8 |
"instanceType": "c7g.8xlarge",
|
9 |
"quantization": "Q4_0_8_8",
|
@@ -20,12 +53,20 @@ results_llama_supernova_lite = {
|
|
20 |
"tokensPerSecond": "45.5",
|
21 |
"notes": "",
|
22 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
{
|
24 |
"instanceType": "r8g.4xlarge",
|
25 |
"quantization": "Q4_0_4_8",
|
26 |
"container": "llama.cpp 9/11/24",
|
27 |
"status": "OK",
|
28 |
-
"tokensPerSecond": "
|
29 |
"notes": "with Flash Attention",
|
30 |
},
|
31 |
{
|
|
|
4 |
"name": "Llama-3.1-SuperNova-Lite",
|
5 |
"modelType": "Llama 3.1 8B",
|
6 |
"configurations": [
|
7 |
+
{
|
8 |
+
"instanceType": "c7i.4xlarge",
|
9 |
+
"configurations": [
|
10 |
+
{
|
11 |
+
"quantization": "Q6_K",
|
12 |
+
"container": "llama.cpp 10/18/24",
|
13 |
+
"status": "OK",
|
14 |
+
"tokensPerSecond": "xxx",
|
15 |
+
"notes": "AMX enabled, Flash Attention enabled",
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"quantization": "Q5_K",
|
19 |
+
"container": "llama.cpp 10/18/24",
|
20 |
+
"status": "OK",
|
21 |
+
"tokensPerSecond": "xxx",
|
22 |
+
"notes": "AMX enabled, Flash Attention enabled",
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"quantization": "Q4_K",
|
26 |
+
"container": "llama.cpp 10/18/24",
|
27 |
+
"status": "OK",
|
28 |
+
"tokensPerSecond": "xxx",
|
29 |
+
"notes": "AMX enabled, Flash Attention enabled",
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"quantization": "IQ4_XS",
|
33 |
+
"container": "llama.cpp 10/18/24",
|
34 |
+
"status": "OK",
|
35 |
+
"tokensPerSecond": "xxx",
|
36 |
+
"notes": "AMX enabled, Flash Attention enabled",
|
37 |
+
},
|
38 |
+
],
|
39 |
+
},
|
40 |
{
|
41 |
"instanceType": "c7g.8xlarge",
|
42 |
"quantization": "Q4_0_8_8",
|
|
|
53 |
"tokensPerSecond": "45.5",
|
54 |
"notes": "",
|
55 |
},
|
56 |
+
{
|
57 |
+
"instanceType": "c8g.4xlarge",
|
58 |
+
"quantization": "Q4_0_4_8",
|
59 |
+
"container": "llama.cpp 11/05/24",
|
60 |
+
"status": "OK",
|
61 |
+
"tokensPerSecond": "34",
|
62 |
+
"notes": "with Flash Attention",
|
63 |
+
},
|
64 |
{
|
65 |
"instanceType": "r8g.4xlarge",
|
66 |
"quantization": "Q4_0_4_8",
|
67 |
"container": "llama.cpp 9/11/24",
|
68 |
"status": "OK",
|
69 |
+
"tokensPerSecond": "40",
|
70 |
"notes": "with Flash Attention",
|
71 |
},
|
72 |
{
|