prelington
/

OrcaleSeek

Model card Files Files and versions

prelington commited on Oct 2

Commit

806b578

·

verified ·

1 Parent(s): c264cbb

Create model_config.py

Files changed (1) hide show

model_config.py +25 -0

model_config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from dataclasses import dataclass
+from transformers import PretrainedConfig
+@dataclass
+class OrcaleSeekConfig:
+    model_type: str = "orcaleseek"
+    vocab_size: int = 50257
+    n_embd: int = 768
+    n_layer: int = 12
+    n_head: int = 12
+    n_inner: int = 3072
+    activation_function: str = "gelu_new"
+    resid_pdrop: float = 0.1
+    embd_pdrop: float = 0.1
+    attn_pdrop: float = 0.1
+    layer_norm_epsilon: float = 1e-5
+    initializer_range: float = 0.02
+    scale_attn_weights: bool = True
+    use_cache: bool = True
+    bos_token_id: int = 50256
+    eos_token_id: int = 50256
+    architectures = ["OrcaleSeekForCausalLM"]
+    def to_hf_config(self):
+        return PretrainedConfig(**self.__dict__)