DeciCoder-1b / configuration_decicoder.py
itay-levy's picture
Upload configuration_decicoder.py with huggingface_hub (#2)
1849e15
raw
history blame
No virus
1.84 kB
from transformers.models.llama.configuration_llama import LlamaConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
class DeciCoderConfig(LlamaConfig):
r"""
This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the LLaMA-7B.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
naive_attention_prefill (`bool`, *optional*, defaults to False):
Whether to use naive matmul or scaled dot product attention during prefill.
naive_attention_decode_batched (`bool`, *optional*, defaults to True):
Whether to use naive matmul or scaled dot product attention during decode for batch_size > 1.
naive_attention_decode_single (`bool`, *optional*, defaults to False):
Whether to use naive matmul or scaled dot product attention during decode for batch_size == 1.
```"""
model_type = "llama"
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(
self,
naive_attention_prefill: bool = False,
naive_attention_decode_batched: bool = True,
naive_attention_decode_single: bool = False,
**kwargs,
):
self.naive_attention_prefill = naive_attention_prefill
self.naive_attention_decode_batched = naive_attention_decode_batched
self.naive_attention_decode_single = naive_attention_decode_single
super().__init__(**kwargs,)