Spaces:
Running
Running
File size: 5,258 Bytes
0c0f086 b194192 4cec6db 2ac5435 4cec6db 0c0f086 ce1f985 0c0f086 58285ac 3a66d4b 58285ac bf572e3 430197f bf572e3 430197f bf572e3 430197f bf572e3 e850022 430197f bf572e3 ea42ef4 534766c ea42ef4 bf572e3 fe190a7 10d93f3 430197f 10d93f3 430197f 10d93f3 430197f 10d93f3 430197f 10d93f3 430197f 10d93f3 fe190a7 ea42ef4 0c0f086 8639c9c 10d93f3 8639c9c 2b18bbe 8639c9c 0c0f086 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
"""Module containing performance results for the Arcee-SuperNova model."""
results_arcee_supernova = {
"name": "Arcee-SuperNova",
"modelType": "Llama 3.1 70B",
"configurations": [
{
"instanceType": "c7g.16xlarge",
"quantization": "Q4_0_8_8",
"container": "llama.cpp 9/19/24",
"status": "OK",
"tokensPerSecond": "6.5",
"notes": "",
},
{
"instanceType": "r8g.16xlarge",
"quantization": "Q4_0_4_8",
"container": "llama.cpp 9/19/24",
"status": "OK",
"tokensPerSecond": "9.8",
"notes": "With Flash Attention",
},
{
"instanceType": "g5.12xlarge",
"quantization": "awq",
"container": "TGI 2.2.0",
"status": "OK",
"tokensPerSecond": "33",
"notes": "MAX_INPUT_TOKENS: 8192, MAX_TOTAL_TOKENS: 16384",
},
{
"instanceType": "p4d.24xlarge",
"quantization": "awq",
"container": "TGI 2.2.0",
"status": "OK",
"tokensPerSecond": "58",
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
},
{
"instanceType": "p5.48xlarge",
"quantization": "awq",
"container": "TGI 2.2.0",
"status": "OK",
"tokensPerSecond": "73",
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
},
{
"instanceType": "inf2.24xlarge",
"configurations": [
{
"quantization": "none",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "KO",
"tokensPerSecond": "-",
"notes": "OOM bs=2,seqlen=4096 - SDK 2.19.1",
},
{
"quantization": "none",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "KO",
"tokensPerSecond": "-",
"notes": "OOM bs=2,seqlen=2048 - SDK 2.19.1",
},
{
"quantization": "8-bit",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "???",
"tokensPerSecond": "???",
"notes": "bs=2,seqlen=8192 - SDK 2.19.1 - OPTION_LOAD_IN_8BIT=True",
},
],
},
{
"instanceType": "inf2.48xlarge",
"configurations": [
{
"quantization": "none",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "OK",
"tokensPerSecond": "28",
"notes": "bs=4,seqlen=4096 - SDK 2.19.1",
},
{
"quantization": "none",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "OK",
"tokensPerSecond": "24",
"notes": "bs=2,seqlen=8192 - SDK 2.19.1",
},
{
"quantization": "none",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "KO",
"tokensPerSecond": "-",
"notes": "OOM bs=2,seqlen=16384 - SDK 2.19.1",
},
],
},
{
"instanceType": "trn1.32xlarge",
"configurations": [
{
"quantization": "none",
"container": "LMI 0.29+transformers-neuronx 0.11.351",
"status": "OK",
"tokensPerSecond": "32",
"notes": "bs=2,seqlen=8192 - SDK 2.19.1",
},
],
},
{
"instanceType": "p4d.24xlarge",
"configurations": [
{
"quantization": "none",
"container": "TGI 2.2.0",
"status": "OK",
"tokensPerSecond": "30",
"notes": "",
},
{
"quantization": "none",
"container": "LMI 0.29+vLLM 0.5.5",
"status": "OK",
"tokensPerSecond": "45",
"notes": "",
},
],
},
{
"instanceType": "p5.48xlarge",
"configurations": [
{
"quantization": "none",
"container": "TGI 2.2.0",
"status": "OK",
"tokensPerSecond": "58",
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768",
},
{
"quantization": "none",
"container": "LMI 0.29+vLLM 0.5.5",
"status": "OK",
"tokensPerSecond": "70",
"notes": "",
},
],
},
],
}
|