SmolFactory / config /train_smollm3_long_context.py
Tonic's picture
adds no think tag correctly
32fca7d verified
"""
SmolLM3 Long-Context Training Configuration
Optimized for long-context tasks (up to 128k tokens)
"""
from config.train_smollm3 import SmolLM3Config
config = SmolLM3Config(
# Model configuration
model_name="HuggingFaceTB/SmolLM3-3B",
max_seq_length=131072, # 128k tokens
use_flash_attention=True,
use_gradient_checkpointing=True,
# Training configuration
batch_size=1, # Reduced for long sequences
gradient_accumulation_steps=8, # Increased to maintain effective batch size
learning_rate=1e-5, # Lower learning rate for stability
weight_decay=0.01,
warmup_steps=200,
max_iters=500,
# Mixed precision
fp16=True,
bf16=False,
# Logging and saving
save_steps=100,
eval_steps=50,
logging_steps=10,
# Chat template configuration
use_chat_template=True,
chat_template_kwargs={
"add_generation_prompt": True,
"no_think_system_message": True # Allow thinking for long context tasks
}
)