Julien Simon commited on
Commit
10d93f3
1 Parent(s): 5716310

Update on SuperNova and SuperNova Lite

Browse files
results.py CHANGED
@@ -8,6 +8,7 @@ from results_arcee_scribe import results_arcee_scribe
8
  from results_arcee_spark import results_arcee_spark
9
  from results_arcee_supernova import results_arcee_supernova
10
  from results_llama_spark import results_llama_spark
 
11
 
12
  instance_type_mappings = {
13
  "g5.xlarge": {
@@ -149,5 +150,6 @@ results = {
149
  results_arcee_spark,
150
  results_arcee_lite,
151
  results_arcee_scribe,
 
152
  ]
153
  }
 
8
  from results_arcee_spark import results_arcee_spark
9
  from results_arcee_supernova import results_arcee_supernova
10
  from results_llama_spark import results_llama_spark
11
+ from results_llama_supernova_lite import results_llama_supernova_lite
12
 
13
  instance_type_mappings = {
14
  "g5.xlarge": {
 
150
  results_arcee_spark,
151
  results_arcee_lite,
152
  results_arcee_scribe,
153
+ results_llama_supernova_lite,
154
  ]
155
  }
results_arcee_supernova.py CHANGED
@@ -33,18 +33,44 @@ results_arcee_supernova = {
33
  },
34
  {
35
  "instanceType": "inf2.48xlarge",
36
- "quantization": "none",
37
- "container": "transformers-neuronx",
38
- "status": "OK",
39
- "tokensPerSecond": "28.4",
40
- "notes": "bs=4,seqlen=4096",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  },
42
  {
43
  "instanceType": "p4d.24xlarge",
44
  "quantization": "none",
45
  "container": "TGI 2.2.0",
46
  "status": "OK",
47
- "tokensPerSecond": "38",
 
 
 
 
 
 
 
 
48
  "notes": "",
49
  },
50
  ],
 
33
  },
34
  {
35
  "instanceType": "inf2.48xlarge",
36
+ "configurations": [
37
+ {
38
+ "quantization": "none",
39
+ "container": "transformers-neuronx",
40
+ "status": "OK",
41
+ "tokensPerSecond": "28",
42
+ "notes": "bs=4,seqlen=4096",
43
+ },
44
+ {
45
+ "quantization": "none",
46
+ "container": "transformers-neuronx",
47
+ "status": "OK",
48
+ "tokensPerSecond": "24",
49
+ "notes": "bs=2,seqlen=8192",
50
+ },
51
+ {
52
+ "quantization": "none",
53
+ "container": "transformers-neuronx",
54
+ "status": "?",
55
+ "tokensPerSecond": "KO",
56
+ "notes": "OOM bs=2,seqlen=16384",
57
+ },
58
+ ],
59
  },
60
  {
61
  "instanceType": "p4d.24xlarge",
62
  "quantization": "none",
63
  "container": "TGI 2.2.0",
64
  "status": "OK",
65
+ "tokensPerSecond": "30",
66
+ "notes": "",
67
+ },
68
+ {
69
+ "instanceType": "p5.48xlarge",
70
+ "quantization": "none",
71
+ "container": "TGI 2.2.0",
72
+ "status": "?",
73
+ "tokensPerSecond": "?",
74
  "notes": "",
75
  },
76
  ],
results_llama_spark.py CHANGED
@@ -108,6 +108,7 @@ results_llama_spark = {
108
  {
109
  "instanceType": "inf2.2xlarge",
110
  "container": "transformers-neuronx 0.11.351",
 
111
  "status": "OK",
112
  "tokensPerSecond": "24",
113
  "notes": "Neuron SDK 2.19.1",
 
108
  {
109
  "instanceType": "inf2.2xlarge",
110
  "container": "transformers-neuronx 0.11.351",
111
+ "quantization": "none",
112
  "status": "OK",
113
  "tokensPerSecond": "24",
114
  "notes": "Neuron SDK 2.19.1",
results_llama_supernova_lite.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """Module containing performance results for the Llama-3-Supernova-Lite model."""
2
+
3
+ results_llama_supernova_lite = {
4
+ "name": "Llama-3.1-SuperNova-Lite",
5
+ "modelType": "Llama 3.1 8B",
6
+ "configurations": [
7
+ {},
8
+ ],
9
+ }