Spaces:

RayMelius
/

soci2

Paused

App Files Files Community

soci2 / scripts /finetune_local.py

RayMelius

Add multi-model support to fine-tune pipeline (0.5b–8b)

7a4df27 about 2 months ago

raw

history blame contribute delete

25.7 kB

	"""
	finetune_local.py — Local adaptation of Soci_FineTune_3_Incremental
	Fine-tunes Qwen2.5-0.5B-Instruct on Soci city-simulation tasks using Unsloth.

	Differences from the Colab version:
	- No Google Drive / google.colab dependencies
	- Local checkpoint and adapter storage in data/training/
	- Loads live conversation data from data/training/processed/
	- HF token from HF_TOKEN env var (or .env file)
	- --debug flag for quick 1-epoch smoke test (no HF push)
	- --resume flag to continue from saved LoRA adapters

	Usage (from project root):
	# Debug / smoke test (fast, no push):
	"C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py --debug

	# Full round-1 training on default 0.5b model + push to HF:
	"C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py

	# Fine-tune specific model sizes:
	"C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py --base-model 7b
	"C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py --base-model 8b

	# Resume round 2 for a specific model:
	"C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py --base-model 7b --resume

	Model profiles (base model -> HF repo):
	0.5b -> RayMelius/soci-agent-q4 (Qwen2.5-0.5B, batch=2, seq=2048)
	1.5b -> RayMelius/soci-agent-1b5 (Qwen2.5-1.5B, batch=2, seq=2048)
	3b -> RayMelius/soci-agent-3b (Qwen2.5-3B, batch=2, seq=2048)
	7b -> RayMelius/soci-agent-7b (Qwen2.5-7B, batch=1, seq=1024)
	8b -> RayMelius/soci-agent-8b (Llama-3.1-8B, batch=1, seq=1024)
	"""

	from __future__ import annotations

	import sys
	import io
	import os

	# Force UTF-8 stdout/stderr on Windows (unsloth prints emoji characters)
	if sys.platform == "win32":
	sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
	sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")

	# Disable torch.compile/inductor — triton 3.x on Windows doesn't export 'triton_key'
	# which inductor needs at compile time. Training still uses CUDA kernels, just not
	# the AOT-compiled fusion path. Has no meaningful effect on a single-GPU setup.
	os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1")
	os.environ.setdefault("TORCH_COMPILE_DISABLE", "1")

	# Import unsloth FIRST so it can patch transformers before anything else loads.
	# Then patch list_repo_templates to skip the 'additional_chat_templates' HF Hub
	# check that fails on unsloth's quantized repos (transformers 4.56+ behavior).
	import unsloth # noqa: F401 — must be first
	import transformers.utils.hub
	import transformers.tokenization_utils_base
	_noop = lambda a, *kw: []
	transformers.tokenization_utils_base.list_repo_templates = _noop
	transformers.utils.hub.list_repo_templates = _noop

	import argparse
	import json
	import os
	import shutil
	from datetime import datetime
	from pathlib import Path

	# ── Parse args first (before heavy imports) ───────────────────────────────────
	parser = argparse.ArgumentParser(description="Soci local fine-tune")
	parser.add_argument("--resume", action="store_true", help="Resume from saved LoRA adapters")
	parser.add_argument("--debug", action="store_true", help="Debug/smoke-test: 1 epoch, 20 examples, no push")
	parser.add_argument("--no-push", action="store_true", help="Skip HF Hub push")
	parser.add_argument("--no-gguf", action="store_true", help="Skip GGUF export")
	parser.add_argument("--epochs", type=int, default=None, help="Override epoch count")
	parser.add_argument("--hf-repo", default=None, help="HF repo ID (overrides default)")
	parser.add_argument("--base-model", default="0.5b",
	choices=["0.5b", "1.5b", "3b", "7b", "8b"],
	help="Base model size to fine-tune (default: 0.5b)")
	args = parser.parse_args()

	# ── Model profiles (base model → unsloth ID, HF repo, VRAM settings) ──────────
	_MODEL_PROFILES = {
	"0.5b": dict(
	model_id = "unsloth/Qwen2.5-0.5B-Instruct-unsloth-bnb-4bit",
	repo_name = "soci-agent-q4",
	seq_len = 2048,
	batch = 2,
	grad_accum = 4,
	lora_r = 16,
	lora_targets = ["q_proj", "k_proj", "v_proj", "o_proj",
	"gate_proj", "up_proj", "down_proj"],
	),
	"1.5b": dict(
	model_id = "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit",
	repo_name = "soci-agent-1b5",
	seq_len = 2048,
	batch = 2,
	grad_accum = 4,
	lora_r = 16,
	lora_targets = ["q_proj", "k_proj", "v_proj", "o_proj",
	"gate_proj", "up_proj", "down_proj"],
	),
	"3b": dict(
	model_id = "unsloth/Qwen2.5-3B-Instruct-bnb-4bit",
	repo_name = "soci-agent-3b",
	seq_len = 2048,
	batch = 2,
	grad_accum = 4,
	lora_r = 16,
	lora_targets = ["q_proj", "k_proj", "v_proj", "o_proj",
	"gate_proj", "up_proj", "down_proj"],
	),
	# 7B and 8B: minimal LoRA to stay within 6.4 GB VRAM on RTX 4050 Laptop.
	# 7B in 4-bit uses ~3.8GB; only ~2.6GB left for activations + optimizer.
	# r=8, q+v only → ~5M trainable params, small optimizer footprint.
	"7b": dict(
	model_id = "unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
	repo_name = "soci-agent-7b",
	seq_len = 512,
	batch = 1,
	grad_accum = 8,
	lora_r = 8,
	lora_targets = ["q_proj", "v_proj"],
	),
	"8b": dict(
	model_id = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
	repo_name = "soci-agent-8b",
	seq_len = 512,
	batch = 1,
	grad_accum = 8,
	lora_r = 8,
	lora_targets = ["q_proj", "v_proj"],
	),
	}
	_PROFILE = _MODEL_PROFILES[args.base_model]

	# ── Paths (per-model subdirs so runs don't clobber each other) ─────────────────
	TRAIN_DIR = Path("data/training")
	MODEL_DIR = TRAIN_DIR / args.base_model # e.g. data/training/7b/
	LORA_SAVE_DIR = MODEL_DIR / "lora_adapters"
	DATA_ARCHIVE_DIR = MODEL_DIR / "data_archive"
	GGUF_DIR = MODEL_DIR / "gguf"
	CHECKPOINTS_DIR = MODEL_DIR / "checkpoints"
	ROUND_FILE = MODEL_DIR / "training_round.json"
	CORE_DATA_FILE = TRAIN_DIR / "core_examples.json"
	LIVE_DATA_FILE = TRAIN_DIR / "processed" / "soci_training.jsonl"

	for d in [LORA_SAVE_DIR, DATA_ARCHIVE_DIR, GGUF_DIR, CHECKPOINTS_DIR]:
	d.mkdir(parents=True, exist_ok=True)

	# ── Config ────────────────────────────────────────────────────────────────────
	MAX_SEQ_LENGTH = _PROFILE["seq_len"]
	HF_USERNAME = "RayMelius"
	HF_REPO_ID = args.hf_repo or f"{HF_USERNAME}/{_PROFILE['repo_name']}"

	# Load HF token
	try:
	from dotenv import load_dotenv
	load_dotenv()
	except ImportError:
	pass
	HF_TOKEN = os.environ.get("HF_TOKEN", "")
	if not HF_TOKEN:
	# Try to read from the project .env
	env_file = Path(".env")
	if env_file.exists():
	for line in env_file.read_text().splitlines():
	if line.startswith("HF_TOKEN="):
	HF_TOKEN = line.split("=", 1)[1].strip().strip('"')

	# ── GPU check ─────────────────────────────────────────────────────────────────
	import torch
	if not torch.cuda.is_available():
	print("[WARN] No CUDA GPU detected — training will be very slow on CPU.")
	print(" Consider running on Colab or a machine with a GPU.")
	else:
	print(f"GPU : {torch.cuda.get_device_name(0)}")
	print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

	# ── Patch unsloth fused CE loss for low-VRAM GPUs ─────────────────────────────
	# unsloth_zoo._get_chunk_multiplier checks free VRAM after model load.
	# On 6.4 GB GPUs the 7B model consumes almost all VRAM, leaving ~0 free,
	# which causes it to raise "No or negligible GPU memory available".
	# Replace with a version that falls back to 100 MB budget instead of raising.
	import functools
	import unsloth_zoo.fused_losses.cross_entropy_loss as _unsloth_ce

	@functools.cache
	def _safe_chunk_multiplier(vocab_size, target_gb=None):
	if target_gb is None:
	try:
	free, _ = torch.cuda.mem_get_info(0)
	free_gb = free / (1024 ** 3) * 0.5
	except Exception:
	free_gb = 0.0
	target_gb = max(free_gb, 0.1) # always at least 100 MB budget
	if target_gb <= 1e-9:
	target_gb = 0.1
	multiplier = (vocab_size * 4 / (1024 ** 3)) / target_gb
	multiplier = multiplier / 4
	return multiplier

	_unsloth_ce._get_chunk_multiplier = _safe_chunk_multiplier
	print("Patched unsloth fused CE loss for low-VRAM GPU")

	# ── Determine training round ──────────────────────────────────────────────────
	RESUME = args.resume
	if RESUME and ROUND_FILE.exists():
	round_info = json.loads(ROUND_FILE.read_text())
	CURRENT_ROUND = round_info["round"] + 1
	print(f"Resuming from round {round_info['round']} -> round {CURRENT_ROUND}")
	print(f"Previous loss: {round_info.get('final_loss', 'N/A')}")
	elif RESUME:
	CURRENT_ROUND = 2
	print("No round file found, assuming round 2")
	else:
	CURRENT_ROUND = 1
	print("Starting fresh (round 1)")

	# ── Load model ────────────────────────────────────────────────────────────────
	from unsloth import FastLanguageModel # noqa: already imported via 'import unsloth'

	if RESUME and LORA_SAVE_DIR.exists() and any(LORA_SAVE_DIR.iterdir()):
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = str(LORA_SAVE_DIR),
	max_seq_length = MAX_SEQ_LENGTH,
	dtype = None,
	load_in_4bit = True,
	)
	print(f"Resumed LoRA adapters from {LORA_SAVE_DIR}")
	else:
	if RESUME:
	print(f"[WARN] No LoRA adapters at {LORA_SAVE_DIR}, starting fresh.")
	CURRENT_ROUND = 1
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = _PROFILE["model_id"],
	max_seq_length = MAX_SEQ_LENGTH,
	dtype = None,
	load_in_4bit = True,
	)
	print(f"Fresh base model loaded (round 1): {_PROFILE['model_id']}")

	# ── Attach LoRA ───────────────────────────────────────────────────────────────
	if CURRENT_ROUND == 1:
	model = FastLanguageModel.get_peft_model(
	model,
	r = _PROFILE["lora_r"],
	target_modules = _PROFILE["lora_targets"],
	lora_alpha = _PROFILE["lora_r"], # lora_alpha == r is standard
	lora_dropout = 0,
	bias = "none",
	use_gradient_checkpointing = "unsloth",
	random_state = 42,
	)
	print("Fresh LoRA adapters attached")
	else:
	model.gradient_checkpointing_enable()
	print(f"Resumed LoRA adapters from round {CURRENT_ROUND - 1}")

	model.print_trainable_parameters()

	# ── System prompt ─────────────────────────────────────────────────────────────
	SYSTEM_PROMPT = (
	"You are the reasoning engine for Soci, an LLM-powered city population simulator. "
	"You control AI agents (NPCs) living in a city. Each agent has a persona, needs "
	"(hunger, energy, social, purpose, comfort, fun), memories, and relationships. "
	"You receive structured context and must respond ONLY with valid JSON. "
	"Never add explanation outside the JSON."
	)

	# ── Load training data ────────────────────────────────────────────────────────
	print("\nLoading training data...")

	# 1. Core examples (from data/training/core_examples.json, extracted from v3 script)
	core_examples: list[dict] = []
	if CORE_DATA_FILE.exists():
	core_examples = json.loads(CORE_DATA_FILE.read_text(encoding="utf-8"))
	print(f" Core examples: {len(core_examples)}")
	else:
	print(f" [WARN] {CORE_DATA_FILE} not found — run extract step or collect_training_data.py first")

	# 2. Live collected data from the running simulation
	live_examples: list[dict] = []
	if LIVE_DATA_FILE.exists():
	with open(LIVE_DATA_FILE, encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if not line:
	continue
	try:
	ex = json.loads(line)
	# Convert messages format -> instruction/response format
	msgs = ex.get("messages", [])
	if len(msgs) >= 3:
	# Find system-ish context in user message; use Soci system prompt
	user_content = msgs[1]["content"]
	asst_content = msgs[2]["content"]
	# Prepend persona context from system message as part of instruction
	persona_ctx = msgs[0]["content"]
	# Keep persona as part of instruction since we use unified system prompt
	instruction = f"{persona_ctx}\n\n{user_content}"
	live_examples.append({
	"instruction": instruction,
	"response": asst_content,
	})
	except (json.JSONDecodeError, KeyError):
	pass
	print(f" Live examples: {len(live_examples)} (from Render simulation)")

	# 3. Replay archived examples from previous rounds
	replay_examples: list[dict] = []
	if CURRENT_ROUND > 1:
	for archive_f in sorted(DATA_ARCHIVE_DIR.glob("round_*.json")):
	try:
	batch = json.loads(archive_f.read_text(encoding="utf-8"))
	replay_examples.extend(batch)
	except Exception:
	pass
	print(f" Replay examples: {len(replay_examples)}")

	# 4. New examples for this round (add yours here for incremental training)
	new_examples_this_round: list[dict] = [
	# Add new instruction/response pairs here for incremental training rounds.
	# Example:
	# {"instruction": "You are playing Diana Novak, 41, grocery store owner. ...",
	# "response": '{"action": "work", "location": "grocery_store", "reason": "..."}'},
	]
	if new_examples_this_round:
	print(f" New examples this round: {len(new_examples_this_round)}")

	# Merge and deduplicate by instruction
	seen: set[str] = set()
	all_examples: list[dict] = []
	for ex in core_examples + live_examples + new_examples_this_round + replay_examples:
	key = ex.get("instruction", "")[:100]
	if key not in seen:
	seen.add(key)
	all_examples.append(ex)

	if args.debug:
	all_examples = all_examples[:20]
	print(f" DEBUG mode: using {len(all_examples)} examples")

	print(f" Total (deduped): {len(all_examples)}")

	# ── Format into chat template ─────────────────────────────────────────────────
	from datasets import Dataset

	def format_example(ex: dict) -> dict:
	msgs = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": ex["instruction"]},
	{"role": "assistant", "content": ex["response"]},
	]
	return {"text": tokenizer.apply_chat_template(
	msgs, tokenize=False, add_generation_prompt=False
	)}

	dataset = Dataset.from_list(all_examples).map(format_example)
	print(f"Formatted {len(dataset)} examples. Sample:")
	print(dataset[0]["text"][:400])

	# ── Training config ───────────────────────────────────────────────────────────
	from trl import SFTTrainer, SFTConfig
	from unsloth import is_bfloat16_supported

	if args.debug:
	LR, EPOCHS, WARMUP, SCHEDULER = 2e-4, 1, 2, "linear"
	print(f"\nDEBUG: 1 epoch smoke test")
	elif CURRENT_ROUND == 1:
	LR, EPOCHS, WARMUP, SCHEDULER = 2e-4, 3, 5, "linear"
	print(f"\nRound 1: Full training — LR={LR}, epochs={EPOCHS}")
	else:
	LR, EPOCHS, WARMUP, SCHEDULER = 5e-5, 2, 10, "cosine"
	print(f"\nRound {CURRENT_ROUND}: Incremental — LR={LR}, epochs={EPOCHS}")

	if args.epochs is not None:
	EPOCHS = args.epochs
	print(f"Epoch override: {EPOCHS}")

	trainer = SFTTrainer(
	model = model,
	tokenizer = tokenizer,
	train_dataset = dataset,
	dataset_text_field = "text",
	max_seq_length = MAX_SEQ_LENGTH,
	dataset_num_proc = 2,
	args = SFTConfig(
	per_device_train_batch_size = _PROFILE["batch"],
	gradient_accumulation_steps = _PROFILE["grad_accum"],
	warmup_steps = WARMUP,
	num_train_epochs = EPOCHS,
	learning_rate = LR,
	fp16 = not is_bfloat16_supported(),
	bf16 = is_bfloat16_supported(),
	logging_steps = 5,
	optim = "adamw_8bit",
	weight_decay = 0.01,
	lr_scheduler_type = SCHEDULER,
	seed = 42,
	output_dir = str(CHECKPOINTS_DIR),
	report_to = "none",
	dataset_text_field = "text",
	max_seq_length = MAX_SEQ_LENGTH,
	),
	)

	print(f"\nTraining round {CURRENT_ROUND} on {len(dataset)} examples...")
	torch.cuda.empty_cache() # free any cached fragments before training starts
	stats = trainer.train()
	print(f"\nRound {CURRENT_ROUND} complete!")
	print(f" Steps: {stats.global_step} \| Final loss: {stats.training_loss:.4f}")

	# ── Save LoRA adapters ────────────────────────────────────────────────────────
	print(f"\nSaving LoRA adapters to {LORA_SAVE_DIR}...")
	model.save_pretrained(str(LORA_SAVE_DIR))
	tokenizer.save_pretrained(str(LORA_SAVE_DIR))
	print(" Saved.")

	# ── Save round metadata ───────────────────────────────────────────────────────
	round_info = {
	"round": CURRENT_ROUND,
	"final_loss": stats.training_loss,
	"global_steps": stats.global_step,
	"total_examples": len(all_examples),
	"new_examples": len(new_examples_this_round) + len(live_examples),
	"learning_rate": LR,
	"epochs": EPOCHS,
	"timestamp": datetime.now().isoformat(),
	}
	ROUND_FILE.write_text(json.dumps(round_info, indent=2))
	print(f" Round info: {ROUND_FILE}")

	# Archive new examples
	all_new = new_examples_this_round + live_examples
	if all_new:
	archive_file = DATA_ARCHIVE_DIR / f"round_{CURRENT_ROUND:03d}.json"
	archive_file.write_text(json.dumps(all_new, indent=2, ensure_ascii=False))
	print(f" Archived {len(all_new)} new examples")

	# Training history
	history_file = TRAIN_DIR / "training_history.jsonl"
	with open(history_file, "a", encoding="utf-8") as f:
	f.write(json.dumps(round_info) + "\n")

	# ── Quick inference test ──────────────────────────────────────────────────────
	print(f"\n=== Testing after Round {CURRENT_ROUND} ===\n")
	FastLanguageModel.for_inference(model)

	def ask(question: str, label: str = "") -> None:
	msgs = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": question},
	]
	encoded = tokenizer.apply_chat_template(
	msgs, tokenize=True, add_generation_prompt=True, return_tensors="pt"
	)
	if hasattr(encoded, "input_ids"):
	inp = encoded.input_ids.to("cuda")
	else:
	inp = encoded.to("cuda")
	out = model.generate(
	input_ids=inp, max_new_tokens=200,
	temperature=0.7, top_p=0.9, do_sample=True,
	)
	resp = tokenizer.decode(out[0][inp.shape[1]:], skip_special_tokens=True)
	print(f"[{label}]")
	print(f"Q: {question[:100]}...")
	try:
	parsed = json.loads(resp)
	print(f"A (valid JSON):\n{json.dumps(parsed, indent=2)}")
	except Exception:
	print(f"A (raw): {resp}")
	print("-" * 60)

	ask(
	"You are playing Elena Vasquez, 34, software engineer. "
	"Needs: energy=0.3, hunger=0.7. Location: office. Time: 12:30. "
	"Decide next action. JSON: {\"action\": str, \"location\": str, \"reason\": str}",
	"decide_action",
	)
	ask(
	"You are playing Marcus Chen talking to Zoe. "
	"Zoe says: 'Marcus, I bombed my exam.' Continue as Marcus. "
	"JSON: {\"speech\": str, \"emotion\": str}",
	"conversation_turn",
	)

	# ── GGUF export ───────────────────────────────────────────────────────────────
	# Windows: unsloth GGUF export requires building llama.cpp via apt-get (Linux only).
	# Auto-skip on Windows; use --no-gguf on Linux too if llama.cpp isn't set up.
	import platform
	_on_windows = platform.system() == "Windows"
	skip_gguf = args.no_gguf or args.debug or _on_windows
	if _on_windows and not args.no_gguf and not args.debug:
	print("\nSkipping GGUF export (Windows — llama.cpp build not supported via unsloth on Win)")
	print(" To export GGUF manually, use llama.cpp's convert_hf_to_gguf.py")
	print(f" LoRA merged weights saved to: {GGUF_DIR}/ (after push)")

	if not skip_gguf:
	print(f"\nExporting GGUF Q4_K_M (takes a few minutes)...")
	model.save_pretrained_gguf(str(GGUF_DIR), tokenizer, quantization_method="q4_k_m")
	gguf_files = list(GGUF_DIR.glob("*.gguf"))
	for gf in gguf_files:
	print(f" GGUF: {gf.name} ({gf.stat().st_size / 1e6:.0f} MB)")
	else:
	if args.debug:
	print("\nSkipping GGUF export (debug mode)")
	gguf_files = []

	# ── Push to HuggingFace Hub ───────────────────────────────────────────────────
	skip_push = args.no_push or args.debug
	if skip_push:
	print("\nSkipping HF push (debug mode or --no-push)")
	else:
	if not HF_TOKEN:
	print("\n[WARN] No HF_TOKEN found — skipping push.")
	print(" Set HF_TOKEN env var or add to .env file.")
	else:
	from huggingface_hub import login, HfApi
	print(f"\nPushing to HuggingFace: {HF_REPO_ID}")
	login(token=HF_TOKEN)
	api = HfApi()
	api.create_repo(repo_id=HF_REPO_ID, repo_type="model", exist_ok=True)

	# Push LoRA adapters
	print(" Uploading LoRA adapters...")
	api.upload_folder(
	folder_path = str(LORA_SAVE_DIR),
	repo_id = HF_REPO_ID,
	repo_type = "model",
	path_in_repo= "lora_adapters",
	)
	print(f" LoRA -> https://huggingface.co/{HF_REPO_ID}/tree/main/lora_adapters")

	# Push GGUF file(s)
	for gf in gguf_files:
	mb = gf.stat().st_size / 1e6
	print(f" Uploading {gf.name} ({mb:.0f} MB)...")
	api.upload_file(
	path_or_fileobj = str(gf),
	path_in_repo = gf.name,
	repo_id = HF_REPO_ID,
	repo_type = "model",
	)
	print(f" Done: https://huggingface.co/{HF_REPO_ID}/blob/main/{gf.name}")

	# Push round metadata
	api.upload_file(
	path_or_fileobj = str(ROUND_FILE),
	path_in_repo = "training_round.json",
	repo_id = HF_REPO_ID,
	repo_type = "model",
	)

	print(f"\nUpload complete! Model at: https://huggingface.co/{HF_REPO_ID}")

	# ── Training history display ──────────────────────────────────────────────────
	print("\n=== Training History ===\n")
	if history_file.exists():
	print(f"{'Round':>6} {'Loss':>8} {'Steps':>7} {'Examples':>9} {'New':>5} {'LR':>10} {'Date':>12}")
	print("-" * 65)
	with open(history_file, encoding="utf-8") as f:
	for line in f:
	r = json.loads(line)
	date = r.get("timestamp", "")[:10]
	print(f"{r['round']:>6} {r['final_loss']:>8.4f} {r['global_steps']:>7} "
	f"{r['total_examples']:>9} {r['new_examples']:>5} "
	f"{r['learning_rate']:>10.1e} {date:>12}")

	print(f"\nTo resume: python scripts/finetune_local.py --resume")
	print(f"LoRA adapters: {LORA_SAVE_DIR}")
	if gguf_files:
	print(f"GGUF: {gguf_files[0]}")
	print(f"\nOllama integration:")
	print(f" ollama create soci-agent -f Modelfile")
	print(f" set SOCI_PROVIDER=ollama && set OLLAMA_MODEL=soci-agent")