Julien Simon commited on
Commit
754d71d
1 Parent(s): 2056bdb

Add/update Graviton4 with SuperNova Lite

Browse files
Files changed (2) hide show
  1. results.py +7 -0
  2. results_llama_supernova_lite.py +42 -1
results.py CHANGED
@@ -158,6 +158,13 @@ instance_type_mappings = {
158
  "url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
159
  "price": 2.32,
160
  },
 
 
 
 
 
 
 
161
  "c8g.8xlarge": {
162
  "cloud": "AWS",
163
  "gpu": "None - Graviton4, 32 vCPUs",
 
158
  "url": "https://instances.vantage.sh/aws/ec2/c7g.16xlarge",
159
  "price": 2.32,
160
  },
161
+ "c8g.4xlarge": {
162
+ "cloud": "AWS",
163
+ "gpu": "None - Graviton4, 16 vCPUs",
164
+ "gpuRAM": "-",
165
+ "url": "https://instances.vantage.sh/aws/ec2/c8g.4xlarge",
166
+ "price": 0.6381,
167
+ },
168
  "c8g.8xlarge": {
169
  "cloud": "AWS",
170
  "gpu": "None - Graviton4, 32 vCPUs",
results_llama_supernova_lite.py CHANGED
@@ -4,6 +4,39 @@ results_llama_supernova_lite = {
4
  "name": "Llama-3.1-SuperNova-Lite",
5
  "modelType": "Llama 3.1 8B",
6
  "configurations": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  {
8
  "instanceType": "c7g.8xlarge",
9
  "quantization": "Q4_0_8_8",
@@ -20,12 +53,20 @@ results_llama_supernova_lite = {
20
  "tokensPerSecond": "45.5",
21
  "notes": "",
22
  },
 
 
 
 
 
 
 
 
23
  {
24
  "instanceType": "r8g.4xlarge",
25
  "quantization": "Q4_0_4_8",
26
  "container": "llama.cpp 9/11/24",
27
  "status": "OK",
28
- "tokensPerSecond": "49",
29
  "notes": "with Flash Attention",
30
  },
31
  {
 
4
  "name": "Llama-3.1-SuperNova-Lite",
5
  "modelType": "Llama 3.1 8B",
6
  "configurations": [
7
+ {
8
+ "instanceType": "c7i.4xlarge",
9
+ "configurations": [
10
+ {
11
+ "quantization": "Q6_K",
12
+ "container": "llama.cpp 10/18/24",
13
+ "status": "OK",
14
+ "tokensPerSecond": "xxx",
15
+ "notes": "AMX enabled, Flash Attention enabled",
16
+ },
17
+ {
18
+ "quantization": "Q5_K",
19
+ "container": "llama.cpp 10/18/24",
20
+ "status": "OK",
21
+ "tokensPerSecond": "xxx",
22
+ "notes": "AMX enabled, Flash Attention enabled",
23
+ },
24
+ {
25
+ "quantization": "Q4_K",
26
+ "container": "llama.cpp 10/18/24",
27
+ "status": "OK",
28
+ "tokensPerSecond": "xxx",
29
+ "notes": "AMX enabled, Flash Attention enabled",
30
+ },
31
+ {
32
+ "quantization": "IQ4_XS",
33
+ "container": "llama.cpp 10/18/24",
34
+ "status": "OK",
35
+ "tokensPerSecond": "xxx",
36
+ "notes": "AMX enabled, Flash Attention enabled",
37
+ },
38
+ ],
39
+ },
40
  {
41
  "instanceType": "c7g.8xlarge",
42
  "quantization": "Q4_0_8_8",
 
53
  "tokensPerSecond": "45.5",
54
  "notes": "",
55
  },
56
+ {
57
+ "instanceType": "c8g.4xlarge",
58
+ "quantization": "Q4_0_4_8",
59
+ "container": "llama.cpp 11/05/24",
60
+ "status": "OK",
61
+ "tokensPerSecond": "34",
62
+ "notes": "with Flash Attention",
63
+ },
64
  {
65
  "instanceType": "r8g.4xlarge",
66
  "quantization": "Q4_0_4_8",
67
  "container": "llama.cpp 9/11/24",
68
  "status": "OK",
69
+ "tokensPerSecond": "40",
70
  "notes": "with Flash Attention",
71
  },
72
  {