Dynamic-Function-Calling-Agent / tool_trainer_intensive.py
jlov7's picture
feat: Multi-tool selection and robustness testing
6639f75
"""
tool_trainer_intensive.py - Intensive Training for 80% Target
This trainer implements:
1. 10+ epochs (vs 3 before)
2. Better learning rate schedule
3. Optimized training parameters
4. Progress monitoring for 80% target
"""
import torch
from transformers import (
AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments,
DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import Dataset
import json
import time
def load_training_data(file_path="tool_pairs_massive.jsonl"):
"""Load the massive training dataset."""
pairs = []
with open(file_path, 'r') as f:
for line in f:
pairs.append(json.loads(line.strip()))
return pairs
def format_training_data(pairs, tokenizer):
"""Format training data for the model."""
formatted = []
for pair in pairs:
# Create training example: prompt + chosen response
full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token
formatted.append({"text": full_text})
return formatted
def tokenize_function(examples, tokenizer, max_length=400):
"""Tokenize with optimized settings for intensive training."""
tokenized = tokenizer(
examples["text"],
truncation=True,
padding="max_length",
max_length=max_length,
return_tensors=None
)
# For causal LM, labels are the same as input_ids
tokenized["labels"] = tokenized["input_ids"]
return tokenized
def main():
print("πŸš€ INTENSIVE Training: SmolLM3-3B for 80% Target")
print("=" * 60)
# Setup device
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"βœ… Using device: {device}")
start_time = time.time()
# 1. Load model and tokenizer
print("πŸ“₯ Loading SmolLM3-3B...")
model_name = "HuggingFaceTB/SmolLM3-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32,
device_map={"": device} if device == "mps" else "auto"
)
print(f"βœ… Model loaded: {model.num_parameters() / 1e9:.1f}B params")
# 2. Setup LoRA with higher rank for better capacity
print("πŸ”© Setting up enhanced LoRA (rank 32)...")
lora_config = LoraConfig(
r=32, # Increased from 16 for better capacity
lora_alpha=64, # Increased proportionally
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=0.1,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"🎯 Trainable: {trainable_params:,} parameters ({100 * trainable_params / total_params:.2f}%)")
# 3. Load massive training data
print("πŸ“Š Loading massive training data...")
pairs = load_training_data()
print(f"βœ… {len(pairs)} training examples ready")
# 4. Format and tokenize
print("πŸ”€ Tokenizing massive dataset...")
formatted_data = format_training_data(pairs, tokenizer)
dataset = Dataset.from_list(formatted_data)
tokenized_dataset = dataset.map(
lambda x: tokenize_function(x, tokenizer),
batched=True,
remove_columns=dataset.column_names
)
print(f"πŸ“Š Tokenized {len(tokenized_dataset)} examples")
# 5. Setup intensive training arguments
print("βš™οΈ Configuring intensive training...")
training_args = TrainingArguments(
output_dir="./smollm3_intensive",
num_train_epochs=12, # Much longer training
per_device_train_batch_size=2, # Smaller batch for stability
gradient_accumulation_steps=4, # Effective batch size = 8
warmup_steps=100, # Longer warmup
learning_rate=3e-5, # Slightly higher learning rate
lr_scheduler_type="cosine", # Better learning schedule
weight_decay=0.01,
logging_steps=10,
save_steps=100,
save_total_limit=3,
push_to_hub=False,
report_to=None,
dataloader_pin_memory=False,
fp16=False, # Stability over speed
gradient_checkpointing=True, # Memory efficiency
max_grad_norm=1.0, # Gradient clipping
adam_epsilon=1e-8,
adam_beta1=0.9,
adam_beta2=0.999,
)
# 6. Data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
pad_to_multiple_of=8,
)
# 7. Initialize intensive trainer
print("πŸ‹οΈ Initializing intensive trainer...")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=data_collator,
)
# 8. Start intensive training
print("🎯 Starting INTENSIVE training...")
print(f"πŸ“Š Dataset: {len(pairs)} examples")
print(f"πŸ“Š Epochs: 12 (vs 3 before)")
print(f"πŸ“Š Learning rate: 3e-5 with cosine schedule")
print(f"⏱️ Expected time: ~10-15 minutes")
print("πŸ“ˆ Monitoring for dramatic improvement...")
train_result = trainer.train()
training_time = time.time() - start_time
print(f"\nπŸŽ‰ INTENSIVE Training completed!")
print(f"πŸ“Š Final loss: {train_result.training_loss:.4f}")
print(f"⏱️ Training time: {training_time:.1f}s")
# 9. Save the intensively trained model
print("πŸ’Ύ Saving intensively trained model...")
model.save_pretrained("./smollm3_intensive")
tokenizer.save_pretrained("./smollm3_intensive")
# 10. Quick validation test
print("πŸ§ͺ Quick validation test...")
model.eval()
test_input = "Get weather for New York"
inputs = tokenizer(test_input, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
print(f"πŸ€– Model response: {response}")
# Try to parse as JSON
try:
parsed = json.loads(response.strip())
print(f"βœ… Valid JSON! {parsed}")
except json.JSONDecodeError as e:
print(f"❌ JSON error: {e}")
print(f"\nπŸ† Intensive training complete!")
print(f"πŸ“ˆ Ready for 80% target evaluation")
return model, tokenizer
if __name__ == "__main__":
model, tokenizer = main()