Spaces:
Running
Running
File size: 1,015 Bytes
d8dd7a1 32fca7d d8dd7a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
"""
SmolLM3 Long-Context Training Configuration
Optimized for long-context tasks (up to 128k tokens)
"""
from config.train_smollm3 import SmolLM3Config
config = SmolLM3Config(
# Model configuration
model_name="HuggingFaceTB/SmolLM3-3B",
max_seq_length=131072, # 128k tokens
use_flash_attention=True,
use_gradient_checkpointing=True,
# Training configuration
batch_size=1, # Reduced for long sequences
gradient_accumulation_steps=8, # Increased to maintain effective batch size
learning_rate=1e-5, # Lower learning rate for stability
weight_decay=0.01,
warmup_steps=200,
max_iters=500,
# Mixed precision
fp16=True,
bf16=False,
# Logging and saving
save_steps=100,
eval_steps=50,
logging_steps=10,
# Chat template configuration
use_chat_template=True,
chat_template_kwargs={
"add_generation_prompt": True,
"no_think_system_message": True # Allow thinking for long context tasks
}
) |