Julien Simon commited on
Commit
735361b
1 Parent(s): 33c2dab

H100 results for SuperNova Medius and FCv1

Browse files
results.py CHANGED
@@ -76,6 +76,13 @@ instance_type_mappings = {
76
  "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
77
  "price": 10.493,
78
  },
 
 
 
 
 
 
 
79
  "g4dn.12xlarge": {
80
  "cloud": "AWS",
81
  "gpu": "4xNVIDIA T4",
@@ -104,6 +111,27 @@ instance_type_mappings = {
104
  "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
105
  "price": 98.32,
106
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "c6i.xlarge": {
108
  "cloud": "AWS",
109
  "gpu": "-",
 
76
  "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
77
  "price": 10.493,
78
  },
79
+ "g6e.12xlarge (2 GPUs)": {
80
+ "cloud": "AWS",
81
+ "gpu": "2xNVIDIA L40S",
82
+ "gpuRAM": "96 GB",
83
+ "url": "https://instances.vantage.sh/aws/ec2/g6e.12xlarge",
84
+ "price": 5.2465,
85
+ },
86
  "g4dn.12xlarge": {
87
  "cloud": "AWS",
88
  "gpu": "4xNVIDIA T4",
 
111
  "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
112
  "price": 98.32,
113
  },
114
+ "p5.48xlarge (4 GPUs)": {
115
+ "cloud": "AWS",
116
+ "gpu": "4xNVIDIA H100",
117
+ "gpuRAM": "320GB",
118
+ "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
119
+ "price": 49.16,
120
+ },
121
+ "p5.48xlarge (2 GPUs)": {
122
+ "cloud": "AWS",
123
+ "gpu": "2xNVIDIA H100",
124
+ "gpuRAM": "160GB",
125
+ "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
126
+ "price": 24.58,
127
+ },
128
+ "p5.48xlarge (1 GPU)": {
129
+ "cloud": "AWS",
130
+ "gpu": "1xNVIDIA H100",
131
+ "gpuRAM": "80GB",
132
+ "url": "https://instances.vantage.sh/aws/ec2/p5.48xlarge",
133
+ "price": 12.29,
134
+ },
135
  "c6i.xlarge": {
136
  "cloud": "AWS",
137
  "gpu": "-",
results_arcee_fcv1.py CHANGED
@@ -4,14 +4,6 @@ results_arcee_fcv1 = {
4
  "name": "Arcee-FCV1",
5
  "modelType": "Qwen2.5 32B",
6
  "configurations": [
7
- {
8
- "instanceType": "r8g.4xlarge",
9
- "quantization": "Q8_0",
10
- "container": "llama.cpp 11/27/24",
11
- "status": "OK",
12
- "tokensPerSecond": "xxx",
13
- "notes": "-fa",
14
- },
15
  {
16
  "instanceType": "r8g.4xlarge",
17
  "quantization": "Q4_0_4_8",
@@ -45,7 +37,7 @@ results_arcee_fcv1 = {
45
  "notes": "--tensor-parallel-size 4 --max-model-len 16384",
46
  },
47
  {
48
- "instanceType": "g6e.12xlarge",
49
  "quantization": "None",
50
  "container": "vLLM 0.6.4.post1",
51
  "status": "OK",
@@ -63,10 +55,34 @@ results_arcee_fcv1 = {
63
  {
64
  "instanceType": "p5.48xlarge",
65
  "quantization": "None",
66
- "container": "LMI 0.30+vLLM 0.6.2",
67
  "status": "N/A",
68
- "tokensPerSecond": "N/A",
69
- "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  },
71
  ],
72
  }
 
4
  "name": "Arcee-FCV1",
5
  "modelType": "Qwen2.5 32B",
6
  "configurations": [
 
 
 
 
 
 
 
 
7
  {
8
  "instanceType": "r8g.4xlarge",
9
  "quantization": "Q4_0_4_8",
 
37
  "notes": "--tensor-parallel-size 4 --max-model-len 16384",
38
  },
39
  {
40
+ "instanceType": "g6e.12xlarge (2 GPUs)",
41
  "quantization": "None",
42
  "container": "vLLM 0.6.4.post1",
43
  "status": "OK",
 
55
  {
56
  "instanceType": "p5.48xlarge",
57
  "quantization": "None",
58
+ "container": "vLLM 0.6.4.post1",
59
  "status": "N/A",
60
+ "tokensPerSecond": "117",
61
+ "notes": "--tensor-parallel-size 8",
62
+ },
63
+ {
64
+ "instanceType": "p5.48xlarge (4 GPUs)",
65
+ "quantization": "None",
66
+ "container": "vLLM 0.6.4.post1",
67
+ "status": "OK",
68
+ "tokensPerSecond": "88",
69
+ "notes": "--tensor-parallel-size 4",
70
+ },
71
+ {
72
+ "instanceType": "p5.48xlarge (2 GPUs)",
73
+ "quantization": "None",
74
+ "container": "vLLM 0.6.4.post1",
75
+ "status": "OK",
76
+ "tokensPerSecond": "58",
77
+ "notes": "--tensor-parallel-size 2",
78
+ },
79
+ {
80
+ "instanceType": "p5.48xlarge (1 GPU)",
81
+ "quantization": "None",
82
+ "container": "vLLM 0.6.4.post1",
83
+ "status": "OK",
84
+ "tokensPerSecond": "38",
85
+ "notes": "--tensor-parallel-size 1",
86
  },
87
  ],
88
  }
results_arcee_supernova_medius.py CHANGED
@@ -45,7 +45,7 @@ results_arcee_supernova_medius = {
45
  "notes": "--tensor-parallel-size 4 --max-model-len 16384",
46
  },
47
  {
48
- "instanceType": "g6e.12xlarge",
49
  "quantization": "None",
50
  "container": "vLLM 0.6.4.post1",
51
  "status": "OK",
@@ -63,10 +63,34 @@ results_arcee_supernova_medius = {
63
  {
64
  "instanceType": "p5.48xlarge",
65
  "quantization": "None",
66
- "container": "LMI 0.30+vLLM 0.6.2",
67
- "status": "N/A",
68
- "tokensPerSecond": "N/A",
69
- "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  },
71
  ],
72
  }
 
45
  "notes": "--tensor-parallel-size 4 --max-model-len 16384",
46
  },
47
  {
48
+ "instanceType": "g6e.12xlarge (2 GPUs)",
49
  "quantization": "None",
50
  "container": "vLLM 0.6.4.post1",
51
  "status": "OK",
 
63
  {
64
  "instanceType": "p5.48xlarge",
65
  "quantization": "None",
66
+ "container": "vLLM 0.6.4.post1",
67
+ "status": "OK",
68
+ "tokensPerSecond": "162",
69
+ "notes": "--tensor-parallel-size 8",
70
+ },
71
+ {
72
+ "instanceType": "p5.48xlarge (4 GPUs)",
73
+ "quantization": "None",
74
+ "container": "vLLM 0.6.4.post1",
75
+ "status": "OK",
76
+ "tokensPerSecond": "138",
77
+ "notes": "--tensor-parallel-size 4",
78
+ },
79
+ {
80
+ "instanceType": "p5.48xlarge (2 GPUs)",
81
+ "quantization": "None",
82
+ "container": "vLLM 0.6.4.post1",
83
+ "status": "OK",
84
+ "tokensPerSecond": "102",
85
+ "notes": "--tensor-parallel-size 2",
86
+ },
87
+ {
88
+ "instanceType": "p5.48xlarge (1 GPU)",
89
+ "quantization": "None",
90
+ "container": "vLLM 0.6.4.post1",
91
+ "status": "OK",
92
+ "tokensPerSecond": "73",
93
+ "notes": "--tensor-parallel-size 1",
94
  },
95
  ],
96
  }