Spaces:
Running
Running
Julien Simon
commited on
Commit
•
10d93f3
1
Parent(s):
5716310
Update on SuperNova and SuperNova Lite
Browse files- results.py +2 -0
- results_arcee_supernova.py +32 -6
- results_llama_spark.py +1 -0
- results_llama_supernova_lite.py +9 -0
results.py
CHANGED
@@ -8,6 +8,7 @@ from results_arcee_scribe import results_arcee_scribe
|
|
8 |
from results_arcee_spark import results_arcee_spark
|
9 |
from results_arcee_supernova import results_arcee_supernova
|
10 |
from results_llama_spark import results_llama_spark
|
|
|
11 |
|
12 |
instance_type_mappings = {
|
13 |
"g5.xlarge": {
|
@@ -149,5 +150,6 @@ results = {
|
|
149 |
results_arcee_spark,
|
150 |
results_arcee_lite,
|
151 |
results_arcee_scribe,
|
|
|
152 |
]
|
153 |
}
|
|
|
8 |
from results_arcee_spark import results_arcee_spark
|
9 |
from results_arcee_supernova import results_arcee_supernova
|
10 |
from results_llama_spark import results_llama_spark
|
11 |
+
from results_llama_supernova_lite import results_llama_supernova_lite
|
12 |
|
13 |
instance_type_mappings = {
|
14 |
"g5.xlarge": {
|
|
|
150 |
results_arcee_spark,
|
151 |
results_arcee_lite,
|
152 |
results_arcee_scribe,
|
153 |
+
results_llama_supernova_lite,
|
154 |
]
|
155 |
}
|
results_arcee_supernova.py
CHANGED
@@ -33,18 +33,44 @@ results_arcee_supernova = {
|
|
33 |
},
|
34 |
{
|
35 |
"instanceType": "inf2.48xlarge",
|
36 |
-
"
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
},
|
42 |
{
|
43 |
"instanceType": "p4d.24xlarge",
|
44 |
"quantization": "none",
|
45 |
"container": "TGI 2.2.0",
|
46 |
"status": "OK",
|
47 |
-
"tokensPerSecond": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"notes": "",
|
49 |
},
|
50 |
],
|
|
|
33 |
},
|
34 |
{
|
35 |
"instanceType": "inf2.48xlarge",
|
36 |
+
"configurations": [
|
37 |
+
{
|
38 |
+
"quantization": "none",
|
39 |
+
"container": "transformers-neuronx",
|
40 |
+
"status": "OK",
|
41 |
+
"tokensPerSecond": "28",
|
42 |
+
"notes": "bs=4,seqlen=4096",
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"quantization": "none",
|
46 |
+
"container": "transformers-neuronx",
|
47 |
+
"status": "OK",
|
48 |
+
"tokensPerSecond": "24",
|
49 |
+
"notes": "bs=2,seqlen=8192",
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"quantization": "none",
|
53 |
+
"container": "transformers-neuronx",
|
54 |
+
"status": "?",
|
55 |
+
"tokensPerSecond": "KO",
|
56 |
+
"notes": "OOM bs=2,seqlen=16384",
|
57 |
+
},
|
58 |
+
],
|
59 |
},
|
60 |
{
|
61 |
"instanceType": "p4d.24xlarge",
|
62 |
"quantization": "none",
|
63 |
"container": "TGI 2.2.0",
|
64 |
"status": "OK",
|
65 |
+
"tokensPerSecond": "30",
|
66 |
+
"notes": "",
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"instanceType": "p5.48xlarge",
|
70 |
+
"quantization": "none",
|
71 |
+
"container": "TGI 2.2.0",
|
72 |
+
"status": "?",
|
73 |
+
"tokensPerSecond": "?",
|
74 |
"notes": "",
|
75 |
},
|
76 |
],
|
results_llama_spark.py
CHANGED
@@ -108,6 +108,7 @@ results_llama_spark = {
|
|
108 |
{
|
109 |
"instanceType": "inf2.2xlarge",
|
110 |
"container": "transformers-neuronx 0.11.351",
|
|
|
111 |
"status": "OK",
|
112 |
"tokensPerSecond": "24",
|
113 |
"notes": "Neuron SDK 2.19.1",
|
|
|
108 |
{
|
109 |
"instanceType": "inf2.2xlarge",
|
110 |
"container": "transformers-neuronx 0.11.351",
|
111 |
+
"quantization": "none",
|
112 |
"status": "OK",
|
113 |
"tokensPerSecond": "24",
|
114 |
"notes": "Neuron SDK 2.19.1",
|
results_llama_supernova_lite.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module containing performance results for the Llama-3-Supernova-Lite model."""
|
2 |
+
|
3 |
+
results_llama_supernova_lite = {
|
4 |
+
"name": "Llama-3.1-SuperNova-Lite",
|
5 |
+
"modelType": "Llama 3.1 8B",
|
6 |
+
"configurations": [
|
7 |
+
{},
|
8 |
+
],
|
9 |
+
}
|