| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | from datasets import load_dataset
|
| | from peft import LoraConfig
|
| | from trl import SFTTrainer, SFTConfig
|
| |
|
| |
|
| | print("Loading dataset...")
|
| | dataset = load_dataset("open-r1/codeforces-cots", "solutions_py_decontaminated", split="train")
|
| | print(f"Dataset loaded: {len(dataset)} examples")
|
| |
|
| |
|
| | dataset = dataset.shuffle(seed=42).select(range(min(5000, len(dataset))))
|
| | print(f"Using {len(dataset)} examples")
|
| |
|
| |
|
| | print("Creating train/eval split...")
|
| | dataset_split = dataset.train_test_split(test_size=0.1, seed=42)
|
| | train_dataset = dataset_split["train"]
|
| | eval_dataset = dataset_split["test"]
|
| | print(f"Train: {len(train_dataset)} examples")
|
| | print(f"Eval: {len(eval_dataset)} examples")
|
| |
|
| |
|
| | config = SFTConfig(
|
| | output_dir="qwen3-0.6b-codeforces-sft",
|
| | push_to_hub=True,
|
| | hub_model_id="luiscosio/qwen3-0.6b-codeforces-sft",
|
| | hub_strategy="every_save",
|
| | num_train_epochs=3,
|
| | per_device_train_batch_size=2,
|
| | gradient_accumulation_steps=8,
|
| | gradient_checkpointing=True,
|
| | learning_rate=2e-4,
|
| | logging_steps=10,
|
| | save_strategy="steps",
|
| | save_steps=100,
|
| | save_total_limit=3,
|
| | eval_strategy="steps",
|
| | eval_steps=100,
|
| | warmup_ratio=0.1,
|
| | lr_scheduler_type="cosine",
|
| | bf16=True,
|
| | max_length=2048,
|
| | report_to="none",
|
| | )
|
| |
|
| |
|
| | peft_config = LoraConfig(
|
| | r=16,
|
| | lora_alpha=32,
|
| | lora_dropout=0.05,
|
| | bias="none",
|
| | task_type="CAUSAL_LM",
|
| | target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
| | )
|
| |
|
| |
|
| | print("Initializing trainer...")
|
| | trainer = SFTTrainer(
|
| | model="Qwen/Qwen3-0.6B",
|
| | train_dataset=train_dataset,
|
| | eval_dataset=eval_dataset,
|
| | args=config,
|
| | peft_config=peft_config,
|
| | )
|
| |
|
| | print("Starting training...")
|
| | trainer.train()
|
| |
|
| | print("Pushing to Hub...")
|
| | trainer.push_to_hub()
|
| |
|
| | print("Complete! Model at: https://huggingface.co/luiscosio/qwen3-0.6b-codeforces-sft")
|
| |
|