aws-neuron
/

optimum-neuron-cache

Model card Files Files and versions Community

304

dacorvo HF staff commited on Mar 21, 2024

Commit

594abb2

verified ·

1 Parent(s): f99a301

Add most popular llama variants

Browse files

Files changed (1) hide show

inference-cache-config/llama-variants.json +134 -0

inference-cache-config/llama-variants.json ADDED Viewed

	@@ -0,0 +1,134 @@

+{
+   "defog/sqlcoder-7b-2": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    }
+  ],
+  "m-a-p/OpenCodeInterpreter-DS-6.7B": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    }
+  ],
+  "ibm/labradorite-13b": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 8,
+      "auto_cast_type": "fp16"
+    }
+  ],
+  "abacusai/Smaug-72B-v0.1": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 24,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 24,
+      "auto_cast_type": "fp16"
+    }
+  ],
+  "gorilla-llm/gorilla-openfunctions-v2": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    }
+  ],
+   "m-a-p/ChatMusician": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    }
+  ],
+   "LargeWorldModel/LWM-Text-Chat-1M": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    }
+  ],
+   "HuggingFaceTB/cosmo-1b": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 2,
+      "auto_cast_type": "fp16"
+    }
+  ],
+  "01-ai/Yi-34B-200K": [
+    {
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 24,
+      "auto_cast_type": "fp16"
+    },
+    {
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 24,
+      "auto_cast_type": "fp16"
+    }
+  ]
+}