| """ |
| Export MINDI 1.0 420M to a Hugging Face-ready model folder. |
| |
| What this script does: |
| 1) Loads your full-quality checkpoint (step_3200.pt by default). |
| 2) Builds the model architecture with the exact Component 4 config. |
| 3) Saves model weights as model.safetensors. |
| 4) Copies tokenizer files. |
| 5) Writes Hugging Face config files + custom model code. |
| 6) Writes a professional model card README. |
| 7) Writes a helper upload script with exact commands. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import shutil |
| import sys |
| from pathlib import Path |
| from typing import Any, Dict |
|
|
| import torch |
| import yaml |
| from safetensors.torch import save_file |
|
|
| PROJECT_ROOT = Path(__file__).resolve().parents[1] |
| if str(PROJECT_ROOT) not in sys.path: |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| from src.model_architecture.code_transformer import ( |
| CodeTransformerLM, |
| ModelConfig, |
| get_model_presets, |
| ) |
|
|
|
|
| |
| PAD_ID = 0 |
| UNK_ID = 1 |
| BOS_ID = 2 |
| EOS_ID = 3 |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser(description="Export MINDI 1.0 420M to Hugging Face format.") |
| parser.add_argument("--repo_id", required=True, help="Hugging Face repo id, for example: yourname/MINDI-1.0-420M") |
| parser.add_argument( |
| "--checkpoint_path", |
| default="checkpoints/component5_420m/step_3200.pt", |
| help="Path to full-quality checkpoint file.", |
| ) |
| parser.add_argument( |
| "--model_config_path", |
| default="configs/component4_model_config.yaml", |
| help="Path to model architecture YAML config.", |
| ) |
| parser.add_argument( |
| "--tokenizer_dir", |
| default="artifacts/tokenizer/code_tokenizer_v1", |
| help="Path to tokenizer directory containing tokenizer.json and tokenizer_config.json.", |
| ) |
| parser.add_argument( |
| "--output_dir", |
| default="hf_release/MINDI-1.0-420M", |
| help="Output folder for Hugging Face package.", |
| ) |
| parser.add_argument( |
| "--private", |
| action="store_true", |
| help="If set, helper script will create a private repo instead of public.", |
| ) |
| return parser.parse_args() |
|
|
|
|
| def load_yaml(path: Path) -> Dict[str, Any]: |
| if not path.exists(): |
| raise FileNotFoundError(f"Config not found: {path}") |
| with path.open("r", encoding="utf-8") as f: |
| data = yaml.safe_load(f) |
| if not isinstance(data, dict): |
| raise ValueError(f"Invalid YAML format: {path}") |
| return data |
|
|
|
|
| def build_model_config(model_cfg_path: Path) -> ModelConfig: |
| cfg = load_yaml(model_cfg_path) |
| preset = cfg.get("preset") |
| model_cfg = cfg.get("model", {}) |
|
|
| if preset: |
| presets = get_model_presets() |
| if preset not in presets: |
| raise ValueError(f"Unknown model preset: {preset}") |
| merged = presets[preset].__dict__.copy() |
| merged.update(model_cfg) |
| return ModelConfig(**merged) |
|
|
| return ModelConfig(**model_cfg) |
|
|
|
|
| def extract_model_state(checkpoint_path: Path) -> Dict[str, torch.Tensor]: |
| if not checkpoint_path.exists(): |
| raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}") |
|
|
| payload = torch.load(checkpoint_path, map_location="cpu") |
| if isinstance(payload, dict) and "model_state" in payload: |
| state = payload["model_state"] |
| elif isinstance(payload, dict): |
| state = payload |
| else: |
| raise ValueError("Unsupported checkpoint format. Expected dict payload.") |
|
|
| if not isinstance(state, dict): |
| raise ValueError("Checkpoint model state is not a dictionary.") |
|
|
| return state |
|
|
|
|
| def write_configuration_py(output_dir: Path) -> None: |
| content = '''""" |
| Hugging Face config class for MINDI 1.0 420M. |
| """ |
| |
| from transformers import PretrainedConfig |
| |
| |
| class MindiConfig(PretrainedConfig): |
| model_type = "mindi" |
| |
| def __init__( |
| self, |
| vocab_size=50000, |
| max_seq_len=2048, |
| d_model=1152, |
| n_layers=23, |
| n_heads=16, |
| d_ff=4608, |
| dropout=0.1, |
| tie_embeddings=True, |
| init_std=0.02, |
| rms_norm_eps=1e-5, |
| bos_token_id=2, |
| eos_token_id=3, |
| pad_token_id=0, |
| **kwargs, |
| ): |
| super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, pad_token_id=pad_token_id, **kwargs) |
| self.vocab_size = vocab_size |
| self.max_seq_len = max_seq_len |
| self.d_model = d_model |
| self.n_layers = n_layers |
| self.n_heads = n_heads |
| self.d_ff = d_ff |
| self.dropout = dropout |
| self.tie_embeddings = tie_embeddings |
| self.init_std = init_std |
| self.rms_norm_eps = rms_norm_eps |
| ''' |
| (output_dir / "configuration_mindi.py").write_text(content, encoding="utf-8") |
|
|
|
|
| def write_modeling_py(output_dir: Path) -> None: |
| content = '''""" |
| Hugging Face model class for MINDI 1.0 420M. |
| """ |
| |
| from __future__ import annotations |
| |
| from dataclasses import dataclass |
| from typing import Optional, Tuple |
| |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from transformers import PreTrainedModel |
| from transformers.modeling_outputs import CausalLMOutputWithPast |
| |
| from .configuration_mindi import MindiConfig |
| |
| |
| @dataclass |
| class _Cfg: |
| vocab_size: int |
| max_seq_len: int |
| d_model: int |
| n_layers: int |
| n_heads: int |
| d_ff: int |
| dropout: float |
| tie_embeddings: bool |
| init_std: float |
| rms_norm_eps: float |
| |
| @property |
| def head_dim(self) -> int: |
| if self.d_model % self.n_heads != 0: |
| raise ValueError("d_model must be divisible by n_heads") |
| return self.d_model // self.n_heads |
| |
| |
| class RMSNorm(nn.Module): |
| def __init__(self, dim: int, eps: float = 1e-5) -> None: |
| super().__init__() |
| self.eps = eps |
| self.weight = nn.Parameter(torch.ones(dim)) |
| |
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| norm = x.pow(2).mean(dim=-1, keepdim=True) |
| x = x * torch.rsqrt(norm + self.eps) |
| return self.weight * x |
| |
| |
| class RotaryEmbedding(nn.Module): |
| def __init__(self, head_dim: int, max_seq_len: int) -> None: |
| super().__init__() |
| if head_dim % 2 != 0: |
| raise ValueError("head_dim must be even for rotary embeddings") |
| inv_freq = 1.0 / (10000 ** (torch.arange(0, head_dim, 2).float() / head_dim)) |
| t = torch.arange(max_seq_len, dtype=torch.float32) |
| freqs = torch.outer(t, inv_freq) |
| self.register_buffer("cos_cached", torch.cos(freqs), persistent=False) |
| self.register_buffer("sin_cached", torch.sin(freqs), persistent=False) |
| |
| def forward(self, q: torch.Tensor, k: torch.Tensor, seq_len: int) -> Tuple[torch.Tensor, torch.Tensor]: |
| cos = self.cos_cached[:seq_len].unsqueeze(0).unsqueeze(0) |
| sin = self.sin_cached[:seq_len].unsqueeze(0).unsqueeze(0) |
| return self._apply_rotary(q, cos, sin), self._apply_rotary(k, cos, sin) |
| |
| @staticmethod |
| def _apply_rotary(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) -> torch.Tensor: |
| x1 = x[..., ::2] |
| x2 = x[..., 1::2] |
| xe = x1 * cos - x2 * sin |
| xo = x1 * sin + x2 * cos |
| return torch.stack((xe, xo), dim=-1).flatten(-2) |
| |
| |
| class CausalSelfAttention(nn.Module): |
| def __init__(self, cfg: _Cfg) -> None: |
| super().__init__() |
| self.n_heads = cfg.n_heads |
| self.head_dim = cfg.head_dim |
| self.scale = self.head_dim ** -0.5 |
| self.q_proj = nn.Linear(cfg.d_model, cfg.d_model, bias=False) |
| self.k_proj = nn.Linear(cfg.d_model, cfg.d_model, bias=False) |
| self.v_proj = nn.Linear(cfg.d_model, cfg.d_model, bias=False) |
| self.o_proj = nn.Linear(cfg.d_model, cfg.d_model, bias=False) |
| self.dropout = nn.Dropout(cfg.dropout) |
| self.rotary = RotaryEmbedding(self.head_dim, cfg.max_seq_len) |
| |
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| bsz, seq_len, _ = x.shape |
| q = self.q_proj(x).view(bsz, seq_len, self.n_heads, self.head_dim).transpose(1, 2) |
| k = self.k_proj(x).view(bsz, seq_len, self.n_heads, self.head_dim).transpose(1, 2) |
| v = self.v_proj(x).view(bsz, seq_len, self.n_heads, self.head_dim).transpose(1, 2) |
| q, k = self.rotary(q, k, seq_len=seq_len) |
| out = F.scaled_dot_product_attention( |
| q, |
| k, |
| v, |
| attn_mask=None, |
| dropout_p=self.dropout.p if self.training else 0.0, |
| is_causal=True, |
| scale=self.scale, |
| ) |
| out = out.transpose(1, 2).contiguous().view(bsz, seq_len, -1) |
| return self.o_proj(out) |
| |
| |
| class FeedForward(nn.Module): |
| def __init__(self, cfg: _Cfg) -> None: |
| super().__init__() |
| self.fc1 = nn.Linear(cfg.d_model, cfg.d_ff, bias=False) |
| self.fc2 = nn.Linear(cfg.d_ff, cfg.d_model, bias=False) |
| self.dropout = nn.Dropout(cfg.dropout) |
| |
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| x = self.fc1(x) |
| x = F.gelu(x, approximate="tanh") |
| x = self.fc2(x) |
| x = self.dropout(x) |
| return x |
| |
| |
| class TransformerBlock(nn.Module): |
| def __init__(self, cfg: _Cfg) -> None: |
| super().__init__() |
| self.norm1 = RMSNorm(cfg.d_model, cfg.rms_norm_eps) |
| self.attn = CausalSelfAttention(cfg) |
| self.norm2 = RMSNorm(cfg.d_model, cfg.rms_norm_eps) |
| self.ffn = FeedForward(cfg) |
| |
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| x = x + self.attn(self.norm1(x)) |
| x = x + self.ffn(self.norm2(x)) |
| return x |
| |
| |
| class MindiForCausalLM(PreTrainedModel): |
| config_class = MindiConfig |
| base_model_prefix = "mindi" |
| supports_gradient_checkpointing = False |
| |
| def __init__(self, config: MindiConfig): |
| super().__init__(config) |
| cfg = _Cfg( |
| vocab_size=config.vocab_size, |
| max_seq_len=config.max_seq_len, |
| d_model=config.d_model, |
| n_layers=config.n_layers, |
| n_heads=config.n_heads, |
| d_ff=config.d_ff, |
| dropout=config.dropout, |
| tie_embeddings=config.tie_embeddings, |
| init_std=config.init_std, |
| rms_norm_eps=config.rms_norm_eps, |
| ) |
| |
| self.embed_tokens = nn.Embedding(cfg.vocab_size, cfg.d_model) |
| self.dropout = nn.Dropout(cfg.dropout) |
| self.blocks = nn.ModuleList([TransformerBlock(cfg) for _ in range(cfg.n_layers)]) |
| self.norm_final = RMSNorm(cfg.d_model, cfg.rms_norm_eps) |
| self.lm_head = nn.Linear(cfg.d_model, cfg.vocab_size, bias=False) |
| |
| if cfg.tie_embeddings: |
| self.lm_head.weight = self.embed_tokens.weight |
| |
| self.post_init() |
| |
| def _init_weights(self, module: nn.Module) -> None: |
| if isinstance(module, nn.Linear): |
| nn.init.normal_(module.weight, mean=0.0, std=self.config.init_std) |
| elif isinstance(module, nn.Embedding): |
| nn.init.normal_(module.weight, mean=0.0, std=self.config.init_std) |
| |
| def get_input_embeddings(self) -> nn.Module: |
| return self.embed_tokens |
| |
| def set_input_embeddings(self, value: nn.Module) -> None: |
| self.embed_tokens = value |
| |
| def get_output_embeddings(self) -> nn.Module: |
| return self.lm_head |
| |
| def set_output_embeddings(self, new_embeddings: nn.Module) -> None: |
| self.lm_head = new_embeddings |
| |
| def forward( |
| self, |
| input_ids: torch.Tensor, |
| attention_mask: Optional[torch.Tensor] = None, |
| labels: Optional[torch.Tensor] = None, |
| **kwargs, |
| ) -> CausalLMOutputWithPast: |
| del attention_mask, kwargs |
| |
| x = self.embed_tokens(input_ids) |
| x = self.dropout(x) |
| |
| for block in self.blocks: |
| x = block(x) |
| |
| x = self.norm_final(x) |
| logits = self.lm_head(x) |
| |
| loss = None |
| if labels is not None: |
| shift_logits = logits[:, :-1, :].contiguous() |
| shift_labels = labels[:, 1:].contiguous() |
| loss = F.cross_entropy( |
| shift_logits.view(-1, shift_logits.size(-1)), |
| shift_labels.view(-1), |
| ignore_index=-100, |
| ) |
| |
| return CausalLMOutputWithPast(loss=loss, logits=logits) |
| |
| @torch.no_grad() |
| def prepare_inputs_for_generation(self, input_ids: torch.Tensor, **kwargs): |
| del kwargs |
| return {"input_ids": input_ids} |
| ''' |
| (output_dir / "modeling_mindi.py").write_text(content, encoding="utf-8") |
|
|
|
|
|
|
| def write_tokenization_py(output_dir: Path) -> None: |
| content = '''""" |
| Hugging Face tokenizer class for MINDI 1.0 420M. |
| """ |
| |
| from pathlib import Path |
| from transformers import PreTrainedTokenizerFast |
| |
| |
| class MindiTokenizer(PreTrainedTokenizerFast): |
| vocab_files_names = {"tokenizer_file": "tokenizer.json"} |
| model_input_names = ["input_ids", "attention_mask"] |
| |
| @classmethod |
| def from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs): |
| if kwargs.get("tokenizer_file") is None: |
| local_candidate = Path(str(pretrained_model_name_or_path)) / "tokenizer.json" |
| if local_candidate.exists(): |
| kwargs["tokenizer_file"] = str(local_candidate) |
| return super().from_pretrained(pretrained_model_name_or_path, *init_inputs, **kwargs) |
| |
| def __init__(self, tokenizer_file=None, **kwargs): |
| name_or_path = kwargs.pop("name_or_path", None) |
| if tokenizer_file is None and name_or_path is not None: |
| candidate = Path(name_or_path) / "tokenizer.json" |
| if candidate.exists(): |
| tokenizer_file = str(candidate) |
| if tokenizer_file is None: |
| tokenizer_file = str(Path(__file__).resolve().parent / "tokenizer.json") |
| kwargs.setdefault("bos_token", "<BOS>") |
| kwargs.setdefault("eos_token", "<EOS>") |
| kwargs.setdefault("unk_token", "<UNK>") |
| kwargs.setdefault("pad_token", "<PAD>") |
| super().__init__(tokenizer_file=tokenizer_file, **kwargs) |
| ''' |
| (output_dir / "tokenization_mindi.py").write_text(content, encoding="utf-8") |
| def write_model_card(output_dir: Path, repo_id: str, num_params: int) -> None: |
| text = f'''--- |
| license: mit |
| language: |
| - en |
| library_name: transformers |
| pipeline_tag: text-generation |
| tags: |
| - code |
| - python |
| - javascript |
| - local-llm |
| - offline |
| --- |
| |
| # MINDI 1.0 420M |
| |
| MINDI 1.0 420M is a 420M-parameter coding language model focused on Python first and JavaScript second. |
| It is built for local, offline code generation workflows. |
| |
| ## Capabilities |
| |
| - Code generation from natural language prompts |
| - Code completion |
| - Bug-fix suggestions |
| - Code explanation |
| |
| ## Model Details |
| |
| - Parameters: {num_params:,} |
| - Architecture: Decoder-only Transformer |
| - Context length: 2048 tokens |
| - Focus languages: Python, JavaScript |
| |
| ## Hardware Requirements |
| |
| Recommended: |
| - NVIDIA GPU with 8GB+ VRAM |
| - CUDA-enabled PyTorch |
| |
| Minimum: |
| - CPU inference works but is slower |
| |
| ## Quick Start (GPU) |
| |
| ```python |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
| |
| repo_id = "{repo_id}" |
| |
| tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True) |
| model = AutoModelForCausalLM.from_pretrained( |
| repo_id, |
| trust_remote_code=True, |
| torch_dtype=torch.float16, |
| ).cuda() |
| |
| prompt = "Write a Python function to check if a string is a palindrome." |
| inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
| |
| with torch.no_grad(): |
| output = model.generate( |
| **inputs, |
| max_new_tokens=220, |
| temperature=0.2, |
| top_p=0.9, |
| do_sample=True, |
| ) |
| |
| print(tokenizer.decode(output[0], skip_special_tokens=True)) |
| ``` |
| |
| ## Limitations |
| |
| - The model can still produce syntax or logic errors. |
| - Generated code should always be reviewed and tested. |
| - Not intended for safety-critical production use without validation. |
| |
| ## Safety |
| |
| Always run tests and static checks before using generated code in production. |
| ''' |
| (output_dir / "README.md").write_text(text, encoding="utf-8") |
|
|
|
|
| def write_upload_helper(output_dir: Path, repo_id: str, private: bool) -> None: |
| visibility = "--private" if private else "--public" |
| script = f'''# Upload helper for MINDI 1.0 420M |
| # Run from PowerShell. |
| |
| huggingface-cli login |
| huggingface-cli repo create {repo_id.split('/')[-1]} --type model {visibility} |
| huggingface-cli upload {repo_id} "{output_dir}" . --repo-type model |
| ''' |
| helper_path = output_dir / "UPLOAD_TO_HF.ps1" |
| helper_path.write_text(script, encoding="utf-8") |
|
|
|
|
| def write_runtime_requirements(output_dir: Path) -> None: |
| req = '''torch>=2.4.1 |
| transformers>=4.46.3 |
| safetensors>=0.4.5 |
| tokenizers>=0.20.1 |
| ''' |
| (output_dir / "requirements_runtime.txt").write_text(req, encoding="utf-8") |
|
|
|
|
| def write_license(output_dir: Path) -> None: |
| mit = '''MIT License |
| |
| Copyright (c) 2026 MINDI 1.0 420M Contributors |
| |
| Permission is hereby granted, free of charge, to any person obtaining a copy |
| of this software and associated documentation files (the "Software"), to deal |
| in the Software without restriction, including without limitation the rights |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| copies of the Software, and to permit persons to whom the Software is |
| furnished to do so, subject to the following conditions: |
| |
| The above copyright notice and this permission notice shall be included in all |
| copies or substantial portions of the Software. |
| |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| SOFTWARE. |
| ''' |
| (output_dir / "LICENSE").write_text(mit, encoding="utf-8") |
|
|
|
|
| def main() -> None: |
| args = parse_args() |
|
|
| ckpt_path = PROJECT_ROOT / args.checkpoint_path |
| model_cfg_path = PROJECT_ROOT / args.model_config_path |
| tokenizer_dir = PROJECT_ROOT / args.tokenizer_dir |
| output_dir = PROJECT_ROOT / args.output_dir |
|
|
| if output_dir.exists(): |
| shutil.rmtree(output_dir) |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| if not tokenizer_dir.exists(): |
| raise FileNotFoundError(f"Tokenizer directory not found: {tokenizer_dir}") |
|
|
| model_cfg = build_model_config(model_cfg_path) |
| model = CodeTransformerLM(model_cfg) |
|
|
| state = extract_model_state(ckpt_path) |
| model.load_state_dict(state, strict=True) |
| model.eval() |
|
|
| |
| tensor_state = {k: v.detach().cpu().contiguous() for k, v in model.state_dict().items()} |
| if model_cfg.tie_embeddings and "lm_head.weight" in tensor_state: |
| tensor_state.pop("lm_head.weight") |
| save_file(tensor_state, str(output_dir / "model.safetensors"), metadata={"format": "pt"}) |
|
|
| |
| hf_config = { |
| "model_type": "mindi", |
| "architectures": ["MindiForCausalLM"], |
| "auto_map": { |
| "AutoConfig": "configuration_mindi.MindiConfig", |
| "AutoModelForCausalLM": "modeling_mindi.MindiForCausalLM", |
| "AutoTokenizer": [None, "tokenization_mindi.MindiTokenizer"], |
| }, |
| "vocab_size": model_cfg.vocab_size, |
| "max_seq_len": model_cfg.max_seq_len, |
| "d_model": model_cfg.d_model, |
| "n_layers": model_cfg.n_layers, |
| "n_heads": model_cfg.n_heads, |
| "d_ff": model_cfg.d_ff, |
| "dropout": model_cfg.dropout, |
| "tie_embeddings": model_cfg.tie_embeddings, |
| "init_std": model_cfg.init_std, |
| "rms_norm_eps": model_cfg.rms_norm_eps, |
| "bos_token_id": BOS_ID, |
| "eos_token_id": EOS_ID, |
| "pad_token_id": PAD_ID, |
| "torch_dtype": "float16", |
| "transformers_version": "4.46.3", |
| } |
| (output_dir / "config.json").write_text(json.dumps(hf_config, indent=2), encoding="utf-8") |
|
|
| generation_cfg = { |
| "bos_token_id": BOS_ID, |
| "eos_token_id": EOS_ID, |
| "pad_token_id": PAD_ID, |
| "max_new_tokens": 220, |
| "temperature": 0.2, |
| "top_p": 0.9, |
| "do_sample": True, |
| } |
| (output_dir / "generation_config.json").write_text(json.dumps(generation_cfg, indent=2), encoding="utf-8") |
|
|
| |
| shutil.copy2(tokenizer_dir / "tokenizer.json", output_dir / "tokenizer.json") |
|
|
| |
| tokenizer_cfg = { |
| "tokenizer_class": "MindiTokenizer", |
| "model_max_length": int(model_cfg.max_seq_len), |
| "bos_token": "<BOS>", |
| "eos_token": "<EOS>", |
| "unk_token": "<UNK>", |
| "pad_token": "<PAD>", |
| "tokenizer_file": "tokenizer.json", |
| "auto_map": {"AutoTokenizer": [None, "tokenization_mindi.MindiTokenizer"]}, |
| "padding_side": "right", |
| "truncation_side": "right", |
| } |
| (output_dir / "tokenizer_config.json").write_text(json.dumps(tokenizer_cfg, indent=2), encoding="utf-8") |
|
|
| special_map = { |
| "bos_token": "<BOS>", |
| "eos_token": "<EOS>", |
| "unk_token": "<UNK>", |
| "pad_token": "<PAD>", |
| } |
| (output_dir / "special_tokens_map.json").write_text(json.dumps(special_map, indent=2), encoding="utf-8") |
|
|
| |
| write_configuration_py(output_dir) |
| write_modeling_py(output_dir) |
| write_tokenization_py(output_dir) |
|
|
| |
| num_params = sum(p.numel() for p in model.parameters()) |
| write_model_card(output_dir, args.repo_id, num_params) |
| write_upload_helper(output_dir, args.repo_id, args.private) |
| write_runtime_requirements(output_dir) |
| write_license(output_dir) |
|
|
| print("Hugging Face package export completed.") |
| print(f"Output folder: {output_dir}") |
| print(f"Weights: {output_dir / 'model.safetensors'}") |
| print(f"Tokenizer: {output_dir / 'tokenizer.json'}") |
| print(f"Model card: {output_dir / 'README.md'}") |
|
|
|
|
| if __name__ == "__main__": |
| try: |
| main() |
| except Exception as exc: |
| print("HF export failed.") |
| print(f"What went wrong: {exc}") |
| print( |
| "Fix suggestion: verify checkpoint path, tokenizer path, and that safetensors/yaml are installed " |
| "in your active Python environment." |
| ) |
| raise SystemExit(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|