| """ |
| Train a US Architectural Floor Plan LLM using SFT with LoRA. |
| |
| Base model: Qwen/Qwen2.5-3B-Instruct |
| Dataset: Nithins03/us-architectural-floorplan-sft |
| Method: SFT with LoRA (rank=128, all-linear) following "LoRA Without Regret" recipe |
| Output: Nithins03/us-architectural-floorplan-llm |
| |
| Reference implementations: |
| - TRL SFT docs: https://huggingface.co/docs/trl/sft_trainer |
| - LoRA Without Regret: https://huggingface.co/docs/trl/lora_without_regret |
| - OptiScene (arxiv:2506.07570): LoRA r=16, alpha=32, lr=5e-6, 10 epochs |
| - DStruct2Design (arxiv:2407.15723): LLaMA3-8B + 8-bit + LoRA |
| """ |
|
|
| import os |
| import torch |
| from datasets import load_dataset |
| from peft import LoraConfig |
| from trl import SFTTrainer, SFTConfig |
| import trackio |
|
|
| |
| |
| |
|
|
| MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" |
| DATASET_NAME = "Nithins03/us-architectural-floorplan-sft" |
| OUTPUT_DIR = "./floorplan-llm-output" |
| HUB_MODEL_ID = "Nithins03/us-architectural-floorplan-llm" |
|
|
| peft_config = LoraConfig( |
| r=128, |
| lora_alpha=32, |
| lora_dropout=0.05, |
| bias="none", |
| task_type="CAUSAL_LM", |
| target_modules="all-linear", |
| ) |
|
|
| training_args = SFTConfig( |
| output_dir=OUTPUT_DIR, |
| num_train_epochs=5, |
| learning_rate=1e-4, |
| lr_scheduler_type="cosine", |
| warmup_ratio=0.05, |
| weight_decay=0.01, |
| max_grad_norm=1.0, |
| per_device_train_batch_size=2, |
| gradient_accumulation_steps=4, |
| max_length=4096, |
| gradient_checkpointing=True, |
| bf16=True, |
| eval_strategy="steps", |
| eval_steps=500, |
| per_device_eval_batch_size=2, |
| logging_strategy="steps", |
| logging_steps=25, |
| logging_first_step=True, |
| disable_tqdm=True, |
| report_to=["trackio"], |
| save_strategy="steps", |
| save_steps=500, |
| save_total_limit=3, |
| load_best_model_at_end=True, |
| metric_for_best_model="eval_loss", |
| push_to_hub=True, |
| hub_model_id=HUB_MODEL_ID, |
| hub_strategy="every_save", |
| packing=False, |
| assistant_only_loss=True, |
| seed=42, |
| ) |
|
|
| def main(): |
| print("=" * 60) |
| print("US Architectural Floor Plan LLM Training") |
| print("=" * 60) |
| |
| trackio.init(project="us-floorplan-llm", name="qwen2.5-3b-lora-sft") |
| |
| dataset = load_dataset(DATASET_NAME) |
| print(f"Train: {len(dataset['train'])} | Test: {len(dataset['test'])}") |
| |
| trainer = SFTTrainer( |
| model=MODEL_NAME, |
| args=training_args, |
| train_dataset=dataset["train"], |
| eval_dataset=dataset["test"], |
| peft_config=peft_config, |
| ) |
| |
| model = trainer.model |
| trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| total = sum(p.numel() for p in model.parameters()) |
| print(f"Trainable: {trainable:,} / {total:,} ({100*trainable/total:.2f}%)") |
| |
| train_result = trainer.train() |
| |
| metrics = train_result.metrics |
| print(f"Train loss: {metrics.get('train_loss', 'N/A')}") |
| |
| eval_metrics = trainer.evaluate() |
| print(f"Eval loss: {eval_metrics.get('eval_loss', 'N/A')}") |
| |
| trainer.save_model() |
| trainer.push_to_hub(commit_message="Final model after SFT training on US floor plans") |
| print(f"Model pushed to: https://huggingface.co/{HUB_MODEL_ID}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|