"""Module containing performance results for the Arcee-SuperNova model.""" results_arcee_supernova = { "name": "Arcee-SuperNova", "modelType": "Llama 3.1 70B", "configurations": [ { "instanceType": "g5.12xlarge", "quantization": "awq", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "33", "notes": "MAX_INPUT_TOKENS: 8192, MAX_TOTAL_TOKENS: 16384", }, { "instanceType": "inf2.24xlarge", "configurations": [ { "quantization": "none", "container": "transformers-neuronx", "status": "KO", "tokensPerSecond": "-", "notes": "OOM bs=2,seqlen=4096", }, { "quantization": "none", "container": "transformers-neuronx", "status": "KO", "tokensPerSecond": "-", "notes": "OOM bs=2,seqlen=2048", }, ], }, { "instanceType": "inf2.48xlarge", "configurations": [ { "quantization": "none", "container": "transformers-neuronx", "status": "OK", "tokensPerSecond": "28", "notes": "bs=4,seqlen=4096", }, { "quantization": "none", "container": "transformers-neuronx", "status": "OK", "tokensPerSecond": "24", "notes": "bs=2,seqlen=8192", }, { "quantization": "none", "container": "transformers-neuronx", "status": "?", "tokensPerSecond": "KO", "notes": "OOM bs=2,seqlen=16384", }, ], }, { "instanceType": "p4d.24xlarge", "quantization": "none", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "30", "notes": "", }, { "instanceType": "p5.48xlarge", "quantization": "none", "container": "TGI 2.2.0", "status": "?", "tokensPerSecond": "?", "notes": "", }, ], }