| | """ |
| | neural_config.py — Configuration and hyperparameters for MLX LoRA training. |
| | """ |
| |
|
| | from dataclasses import dataclass, field |
| | from pathlib import Path |
| | import json |
| | import os |
| |
|
| |
|
| | @dataclass |
| | class NeuralConfig: |
| | """Training hyperparameters and daemon configuration.""" |
| |
|
| | |
| | daemon_port: int = 8766 |
| | daemon_host: str = "0.0.0.0" |
| |
|
| | |
| | model_key: str = "" |
| | model_path: str = "" |
| | model_architecture: str = "" |
| |
|
| | |
| | lora_rank: int = 32 |
| | lora_alpha: float = 32.0 |
| | lora_targets: list = field(default_factory=lambda: ["q_proj", "v_proj", "out_proj", "down_proj"]) |
| | lora_dropout: float = 0.0 |
| | lora_num_layers: int = -1 |
| |
|
| | |
| | training_backend: str = "mlx" |
| | learning_rate: float = 5e-4 |
| | min_learning_rate: float = 5e-5 |
| | cosine_period_steps: int = 5000 |
| | warmup_fraction: float = 0.1 |
| | steps_per_cycle: int = 1 |
| | batch_size: int = 0 |
| | epochs_per_cycle: int = 1 |
| | train_epochs: int = 15 |
| | early_stop_loss: float = 0.8 |
| | early_stop_patience: int = 2 |
| | max_seq_len: int = 512 |
| | gradient_clip: float = 1.0 |
| | warmup_steps: int = 10 |
| | auto_train: bool = True |
| | replay_ratio: float = 0.3 |
| |
|
| | |
| | adam_beta1: float = 0.9 |
| | adam_beta2: float = 0.999 |
| | adam_eps: float = 1e-8 |
| | weight_decay: float = 0.0 |
| |
|
| | |
| | rolling_buffer_size: int = 100 |
| | replay_buffer_size: int = 500 |
| | min_response_tokens: int = 10 |
| |
|
| | |
| | ane_compile_budget: int = 110 |
| | ane_min_tensor_dim: int = 16 |
| | ane_seq_len: int = 16 |
| |
|
| | |
| | base_dir: str = "~/.jarvis/fine-tune" |
| | adapter_dir: str = "" |
| | replay_path: str = "" |
| | auto_save_interval: int = 10 |
| |
|
| | |
| | lms_cli_path: str = "" |
| | lms_api_url: str = "http://localhost:1234" |
| |
|
| | @property |
| | def lora_scaling(self) -> float: |
| | return self.lora_alpha / self.lora_rank |
| |
|
| | def resolve_paths(self): |
| | """Expand ~ and set dynamic paths.""" |
| | self.base_dir = str(Path(self.base_dir).expanduser()) |
| | if not self.adapter_dir: |
| | key = self.model_key or "default" |
| | self.adapter_dir = str(Path(self.base_dir) / "adapters" / key) |
| | if not self.replay_path: |
| | self.replay_path = str(Path(self.base_dir) / "replay.jsonl") |
| |
|
| | |
| | if not self.lms_cli_path: |
| | candidates = [ |
| | Path.home() / ".lmstudio" / "bin" / "lms", |
| | Path("/usr/local/bin/lms"), |
| | ] |
| | for c in candidates: |
| | if c.exists(): |
| | self.lms_cli_path = str(c) |
| | break |
| |
|
| | def ensure_dirs(self): |
| | """Create required directories.""" |
| | self.resolve_paths() |
| | Path(self.base_dir).mkdir(parents=True, exist_ok=True) |
| | Path(self.adapter_dir).mkdir(parents=True, exist_ok=True) |
| |
|
| | def save(self, path: str = ""): |
| | """Save config to JSON.""" |
| | path = path or str(Path(self.base_dir) / "config.json") |
| | self.resolve_paths() |
| | Path(path).parent.mkdir(parents=True, exist_ok=True) |
| | with open(path, "w") as f: |
| | json.dump(self.__dict__, f, indent=2) |
| |
|
| | @classmethod |
| | def load(cls, path: str) -> "NeuralConfig": |
| | """Load config from JSON.""" |
| | with open(path) as f: |
| | data = json.load(f) |
| | cfg = cls() |
| | for k, v in data.items(): |
| | if hasattr(cfg, k): |
| | setattr(cfg, k, v) |
| | cfg.resolve_paths() |
| | return cfg |
| |
|
| | def to_dict(self) -> dict: |
| | """Convert to dict for API responses.""" |
| | self.resolve_paths() |
| | d = self.__dict__.copy() |
| | d["lora_scaling"] = self.lora_scaling |
| | return d |
| |
|
| | def update_from_dict(self, data: dict): |
| | """Update config from API request.""" |
| | allowed = { |
| | "learning_rate", "min_learning_rate", "cosine_period_steps", |
| | "warmup_fraction", "steps_per_cycle", "lora_rank", "lora_alpha", |
| | "lora_targets", "lora_num_layers", "training_backend", |
| | "auto_train", "replay_ratio", "gradient_clip", "warmup_steps", |
| | "rolling_buffer_size", "min_response_tokens", "auto_save_interval", |
| | "max_seq_len", "lora_dropout", "weight_decay", |
| | "epochs_per_cycle", "train_epochs", |
| | "early_stop_loss", "early_stop_patience", |
| | } |
| | for k, v in data.items(): |
| | if k in allowed and hasattr(self, k): |
| | setattr(self, k, v) |
| |
|