""" tool_trainer_intensive.py - Intensive Training for 80% Target This trainer implements: 1. 10+ epochs (vs 3 before) 2. Better learning rate schedule 3. Optimized training parameters 4. Progress monitoring for 80% target """ import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling ) from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from datasets import Dataset import json import time def load_training_data(file_path="tool_pairs_massive.jsonl"): """Load the massive training dataset.""" pairs = [] with open(file_path, 'r') as f: for line in f: pairs.append(json.loads(line.strip())) return pairs def format_training_data(pairs, tokenizer): """Format training data for the model.""" formatted = [] for pair in pairs: # Create training example: prompt + chosen response full_text = pair["prompt"] + pair["chosen"] + tokenizer.eos_token formatted.append({"text": full_text}) return formatted def tokenize_function(examples, tokenizer, max_length=400): """Tokenize with optimized settings for intensive training.""" tokenized = tokenizer( examples["text"], truncation=True, padding="max_length", max_length=max_length, return_tensors=None ) # For causal LM, labels are the same as input_ids tokenized["labels"] = tokenized["input_ids"] return tokenized def main(): print("๐Ÿš€ INTENSIVE Training: SmolLM3-3B for 80% Target") print("=" * 60) # Setup device device = "mps" if torch.backends.mps.is_available() else "cpu" print(f"โœ… Using device: {device}") start_time = time.time() # 1. Load model and tokenizer print("๐Ÿ“ฅ Loading SmolLM3-3B...") model_name = "HuggingFaceTB/SmolLM3-3B" tokenizer = AutoTokenizer.from_pretrained(model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map={"": device} if device == "mps" else "auto" ) print(f"โœ… Model loaded: {model.num_parameters() / 1e9:.1f}B params") # 2. Setup LoRA with higher rank for better capacity print("๐Ÿ”ฉ Setting up enhanced LoRA (rank 32)...") lora_config = LoraConfig( r=32, # Increased from 16 for better capacity lora_alpha=64, # Increased proportionally target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0.1, bias="none", task_type="CAUSAL_LM" ) model = get_peft_model(model, lora_config) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = sum(p.numel() for p in model.parameters()) print(f"๐ŸŽฏ Trainable: {trainable_params:,} parameters ({100 * trainable_params / total_params:.2f}%)") # 3. Load massive training data print("๐Ÿ“Š Loading massive training data...") pairs = load_training_data() print(f"โœ… {len(pairs)} training examples ready") # 4. Format and tokenize print("๐Ÿ”ค Tokenizing massive dataset...") formatted_data = format_training_data(pairs, tokenizer) dataset = Dataset.from_list(formatted_data) tokenized_dataset = dataset.map( lambda x: tokenize_function(x, tokenizer), batched=True, remove_columns=dataset.column_names ) print(f"๐Ÿ“Š Tokenized {len(tokenized_dataset)} examples") # 5. Setup intensive training arguments print("โš™๏ธ Configuring intensive training...") training_args = TrainingArguments( output_dir="./smollm3_intensive", num_train_epochs=12, # Much longer training per_device_train_batch_size=2, # Smaller batch for stability gradient_accumulation_steps=4, # Effective batch size = 8 warmup_steps=100, # Longer warmup learning_rate=3e-5, # Slightly higher learning rate lr_scheduler_type="cosine", # Better learning schedule weight_decay=0.01, logging_steps=10, save_steps=100, save_total_limit=3, push_to_hub=False, report_to=None, dataloader_pin_memory=False, fp16=False, # Stability over speed gradient_checkpointing=True, # Memory efficiency max_grad_norm=1.0, # Gradient clipping adam_epsilon=1e-8, adam_beta1=0.9, adam_beta2=0.999, ) # 6. Data collator data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False, pad_to_multiple_of=8, ) # 7. Initialize intensive trainer print("๐Ÿ‹๏ธ Initializing intensive trainer...") trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, data_collator=data_collator, ) # 8. Start intensive training print("๐ŸŽฏ Starting INTENSIVE training...") print(f"๐Ÿ“Š Dataset: {len(pairs)} examples") print(f"๐Ÿ“Š Epochs: 12 (vs 3 before)") print(f"๐Ÿ“Š Learning rate: 3e-5 with cosine schedule") print(f"โฑ๏ธ Expected time: ~10-15 minutes") print("๐Ÿ“ˆ Monitoring for dramatic improvement...") train_result = trainer.train() training_time = time.time() - start_time print(f"\n๐ŸŽ‰ INTENSIVE Training completed!") print(f"๐Ÿ“Š Final loss: {train_result.training_loss:.4f}") print(f"โฑ๏ธ Training time: {training_time:.1f}s") # 9. Save the intensively trained model print("๐Ÿ’พ Saving intensively trained model...") model.save_pretrained("./smollm3_intensive") tokenizer.save_pretrained("./smollm3_intensive") # 10. Quick validation test print("๐Ÿงช Quick validation test...") model.eval() test_input = "Get weather for New York" inputs = tokenizer(test_input, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=100, temperature=0.1, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) print(f"๐Ÿค– Model response: {response}") # Try to parse as JSON try: parsed = json.loads(response.strip()) print(f"โœ… Valid JSON! {parsed}") except json.JSONDecodeError as e: print(f"โŒ JSON error: {e}") print(f"\n๐Ÿ† Intensive training complete!") print(f"๐Ÿ“ˆ Ready for 80% target evaluation") return model, tokenizer if __name__ == "__main__": model, tokenizer = main()