Spaces:
Running
Running
Julien Simon
commited on
Commit
•
735361b
1
Parent(s):
33c2dab
H100 results for SuperNova Medius and FCv1
Browse files- results.py +28 -0
- results_arcee_fcv1.py +28 -12
- results_arcee_supernova_medius.py +29 -5
results.py
CHANGED
@@ -76,6 +76,13 @@ instance_type_mappings = {
|
|
76 |
"url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
|
77 |
"price": 10.493,
|
78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
"g4dn.12xlarge": {
|
80 |
"cloud": "AWS",
|
81 |
"gpu": "4xNVIDIA T4",
|
@@ -104,6 +111,27 @@ instance_type_mappings = {
|
|
104 |
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
105 |
"price": 98.32,
|
106 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
"c6i.xlarge": {
|
108 |
"cloud": "AWS",
|
109 |
"gpu": "-",
|
|
|
76 |
"url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
|
77 |
"price": 10.493,
|
78 |
},
|
79 |
+
"g6e.12xlarge (2 GPUs)": {
|
80 |
+
"cloud": "AWS",
|
81 |
+
"gpu": "2xNVIDIA L40S",
|
82 |
+
"gpuRAM": "96 GB",
|
83 |
+
"url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
|
84 |
+
"price": 5.2465,
|
85 |
+
},
|
86 |
"g4dn.12xlarge": {
|
87 |
"cloud": "AWS",
|
88 |
"gpu": "4xNVIDIA T4",
|
|
|
111 |
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
112 |
"price": 98.32,
|
113 |
},
|
114 |
+
"p5.48xlarge (4 GPUs)": {
|
115 |
+
"cloud": "AWS",
|
116 |
+
"gpu": "4xNVIDIA H100",
|
117 |
+
"gpuRAM": "320GB",
|
118 |
+
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
119 |
+
"price": 49.16,
|
120 |
+
},
|
121 |
+
"p5.48xlarge (2 GPUs)": {
|
122 |
+
"cloud": "AWS",
|
123 |
+
"gpu": "2xNVIDIA H100",
|
124 |
+
"gpuRAM": "160GB",
|
125 |
+
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
126 |
+
"price": 24.58,
|
127 |
+
},
|
128 |
+
"p5.48xlarge (1 GPU)": {
|
129 |
+
"cloud": "AWS",
|
130 |
+
"gpu": "1xNVIDIA H100",
|
131 |
+
"gpuRAM": "80GB",
|
132 |
+
"url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
|
133 |
+
"price": 12.29,
|
134 |
+
},
|
135 |
"c6i.xlarge": {
|
136 |
"cloud": "AWS",
|
137 |
"gpu": "-",
|
results_arcee_fcv1.py
CHANGED
@@ -4,14 +4,6 @@ results_arcee_fcv1 = {
|
|
4 |
"name": "Arcee-FCV1",
|
5 |
"modelType": "Qwen2.5 32B",
|
6 |
"configurations": [
|
7 |
-
{
|
8 |
-
"instanceType": "r8g.4xlarge",
|
9 |
-
"quantization": "Q8_0",
|
10 |
-
"container": "llama.cpp 11/27/24",
|
11 |
-
"status": "OK",
|
12 |
-
"tokensPerSecond": "xxx",
|
13 |
-
"notes": "-fa",
|
14 |
-
},
|
15 |
{
|
16 |
"instanceType": "r8g.4xlarge",
|
17 |
"quantization": "Q4_0_4_8",
|
@@ -45,7 +37,7 @@ results_arcee_fcv1 = {
|
|
45 |
"notes": "--tensor-parallel-size 4 --max-model-len 16384",
|
46 |
},
|
47 |
{
|
48 |
-
"instanceType": "g6e.12xlarge",
|
49 |
"quantization": "None",
|
50 |
"container": "vLLM 0.6.4.post1",
|
51 |
"status": "OK",
|
@@ -63,10 +55,34 @@ results_arcee_fcv1 = {
|
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
"quantization": "None",
|
66 |
-
"container": "
|
67 |
"status": "N/A",
|
68 |
-
"tokensPerSecond": "
|
69 |
-
"notes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
},
|
71 |
],
|
72 |
}
|
|
|
4 |
"name": "Arcee-FCV1",
|
5 |
"modelType": "Qwen2.5 32B",
|
6 |
"configurations": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
{
|
8 |
"instanceType": "r8g.4xlarge",
|
9 |
"quantization": "Q4_0_4_8",
|
|
|
37 |
"notes": "--tensor-parallel-size 4 --max-model-len 16384",
|
38 |
},
|
39 |
{
|
40 |
+
"instanceType": "g6e.12xlarge (2 GPUs)",
|
41 |
"quantization": "None",
|
42 |
"container": "vLLM 0.6.4.post1",
|
43 |
"status": "OK",
|
|
|
55 |
{
|
56 |
"instanceType": "p5.48xlarge",
|
57 |
"quantization": "None",
|
58 |
+
"container": "vLLM 0.6.4.post1",
|
59 |
"status": "N/A",
|
60 |
+
"tokensPerSecond": "117",
|
61 |
+
"notes": "--tensor-parallel-size 8",
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"instanceType": "p5.48xlarge (4 GPUs)",
|
65 |
+
"quantization": "None",
|
66 |
+
"container": "vLLM 0.6.4.post1",
|
67 |
+
"status": "OK",
|
68 |
+
"tokensPerSecond": "88",
|
69 |
+
"notes": "--tensor-parallel-size 4",
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"instanceType": "p5.48xlarge (2 GPUs)",
|
73 |
+
"quantization": "None",
|
74 |
+
"container": "vLLM 0.6.4.post1",
|
75 |
+
"status": "OK",
|
76 |
+
"tokensPerSecond": "58",
|
77 |
+
"notes": "--tensor-parallel-size 2",
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"instanceType": "p5.48xlarge (1 GPU)",
|
81 |
+
"quantization": "None",
|
82 |
+
"container": "vLLM 0.6.4.post1",
|
83 |
+
"status": "OK",
|
84 |
+
"tokensPerSecond": "38",
|
85 |
+
"notes": "--tensor-parallel-size 1",
|
86 |
},
|
87 |
],
|
88 |
}
|
results_arcee_supernova_medius.py
CHANGED
@@ -45,7 +45,7 @@ results_arcee_supernova_medius = {
|
|
45 |
"notes": "--tensor-parallel-size 4 --max-model-len 16384",
|
46 |
},
|
47 |
{
|
48 |
-
"instanceType": "g6e.12xlarge",
|
49 |
"quantization": "None",
|
50 |
"container": "vLLM 0.6.4.post1",
|
51 |
"status": "OK",
|
@@ -63,10 +63,34 @@ results_arcee_supernova_medius = {
|
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
"quantization": "None",
|
66 |
-
"container": "
|
67 |
-
"status": "
|
68 |
-
"tokensPerSecond": "
|
69 |
-
"notes":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
},
|
71 |
],
|
72 |
}
|
|
|
45 |
"notes": "--tensor-parallel-size 4 --max-model-len 16384",
|
46 |
},
|
47 |
{
|
48 |
+
"instanceType": "g6e.12xlarge (2 GPUs)",
|
49 |
"quantization": "None",
|
50 |
"container": "vLLM 0.6.4.post1",
|
51 |
"status": "OK",
|
|
|
63 |
{
|
64 |
"instanceType": "p5.48xlarge",
|
65 |
"quantization": "None",
|
66 |
+
"container": "vLLM 0.6.4.post1",
|
67 |
+
"status": "OK",
|
68 |
+
"tokensPerSecond": "162",
|
69 |
+
"notes": "--tensor-parallel-size 8",
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"instanceType": "p5.48xlarge (4 GPUs)",
|
73 |
+
"quantization": "None",
|
74 |
+
"container": "vLLM 0.6.4.post1",
|
75 |
+
"status": "OK",
|
76 |
+
"tokensPerSecond": "138",
|
77 |
+
"notes": "--tensor-parallel-size 4",
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"instanceType": "p5.48xlarge (2 GPUs)",
|
81 |
+
"quantization": "None",
|
82 |
+
"container": "vLLM 0.6.4.post1",
|
83 |
+
"status": "OK",
|
84 |
+
"tokensPerSecond": "102",
|
85 |
+
"notes": "--tensor-parallel-size 2",
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"instanceType": "p5.48xlarge (1 GPU)",
|
89 |
+
"quantization": "None",
|
90 |
+
"container": "vLLM 0.6.4.post1",
|
91 |
+
"status": "OK",
|
92 |
+
"tokensPerSecond": "73",
|
93 |
+
"notes": "--tensor-parallel-size 1",
|
94 |
},
|
95 |
],
|
96 |
}
|