| |
| """ |
| Configuration for DevOps-Engineer-SLM: A Role-Based SLM for DevOps Engineer. |
| ~1B params, LLaMA-style architecture with RoPE β supports up to 5M token context. |
| """ |
|
|
| from dataclasses import dataclass, field |
| from pathlib import Path |
| from typing import Optional |
|
|
|
|
| @dataclass |
| class SLMConfig: |
| """All hyperparameters and paths in one place.""" |
|
|
| |
| project_dir: Path = Path(__file__).resolve().parent |
| data_dir: Path = field(default=None) |
| tokenizer_dir: Path = field(default=None) |
| checkpoint_dir: Path = field(default=None) |
|
|
| |
| domain_name: str = "DevOps Engineer" |
| domain_slug: str = "devops_engineer" |
| tokenizer_filename: str = "devops_engineer_tokenizer.json" |
|
|
| |
| vocab_size: int = 32_768 |
| min_frequency: int = 2 |
| special_tokens: list = field( |
| default_factory=lambda: [ |
| "<pad>", "<unk>", "<bos>", "<eos>", |
| "<|system|>", "<|user|>", "<|assistant|>", |
| ] |
| ) |
|
|
| |
| n_layer: int = 32 |
| n_head: int = 20 |
| n_embd: int = 1600 |
| block_size: int = 1_000_000 |
| dropout: float = 0.05 |
| bias: bool = False |
| ffn_multiplier: float = 2.667 |
|
|
| |
| max_position_embeddings: int = 5_000_000 |
| rope_theta: float = 5_000_000.0 |
|
|
| |
| sliding_window: Optional[int] = None |
|
|
| |
| gradient_checkpointing: bool = True |
|
|
| |
| batch_size: int = 1 |
| gradient_accumulation_steps: int = 16 |
| learning_rate: float = 2e-4 |
| weight_decay: float = 0.1 |
| max_epochs: int = 3 |
| dataset_stride: int = 512 |
| warmup_steps: int = 100 |
| grad_clip: float = 1.0 |
| eval_interval: int = 50 |
| eval_samples: int = 10 |
| log_interval: int = 10 |
| device: str = "auto" |
|
|
| |
| max_new_tokens: int = 5_000_000 |
| temperature: float = 0.8 |
| top_k: int = 50 |
| top_p: float = 0.9 |
|
|
| |
| hf_repo_name: str = "devops-engineer-slm-5m" |
| hf_model_card_tags: list = field(default_factory=lambda: ['devops', 'cicd', 'docker', 'kubernetes', 'infrastructure', 'slm', 'llama-style', 'rope', '5m-context', 'from-scratch', '1b-params']) |
|
|
| def __post_init__(self): |
| if self.data_dir is None: |
| self.data_dir = self.project_dir / "data" |
| if self.tokenizer_dir is None: |
| self.tokenizer_dir = self.project_dir / "tokenizer" |
| if self.checkpoint_dir is None: |
| self.checkpoint_dir = self.project_dir / "checkpoints" |
|
|
| self.data_dir.mkdir(parents=True, exist_ok=True) |
| self.tokenizer_dir.mkdir(parents=True, exist_ok=True) |
| self.checkpoint_dir.mkdir(parents=True, exist_ok=True) |
|
|
| if self.device == "auto": |
| import torch |
| if torch.cuda.is_available(): |
| self.device = "cuda" |
| elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): |
| self.device = "mps" |
| else: |
| self.device = "cpu" |
|
|
|
|
| cfg = SLMConfig() |
|
|