papebaba commited on
Commit
beebee9
Β·
verified Β·
1 Parent(s): 641fa02

Upload train_qwen_codeforces.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_qwen_codeforces.py +43 -36
train_qwen_codeforces.py CHANGED
@@ -6,21 +6,17 @@ from datasets import load_dataset
6
  from peft import LoraConfig
7
  from trl import SFTTrainer, SFTConfig
8
  import trackio
9
- import os
10
 
11
  # Load dataset - 1000 examples for ~20 min training
 
12
  dataset = load_dataset(
13
  "open-r1/codeforces-cots",
14
  "solutions_w_editorials_py_decontaminated",
15
  split="train[:1000]"
16
  )
17
-
18
  print(f"πŸ“Š Training on {len(dataset)} examples for 3 epochs")
19
 
20
- # Get username for hub model id
21
- username = os.environ.get("HF_USERNAME", "papebaba")
22
-
23
- # Configure LoRA for efficient training on T4 small
24
  peft_config = LoraConfig(
25
  r=8,
26
  lora_alpha=16,
@@ -30,46 +26,57 @@ peft_config = LoraConfig(
30
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
31
  )
32
 
33
- # Configure trainer - optimized for T4 small
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  trainer = SFTTrainer(
35
  model="Qwen/Qwen2.5-0.5B",
36
  train_dataset=dataset,
37
- # No eval dataset to save ~40% memory on T4 small
38
  peft_config=peft_config,
39
- args=SFTConfig(
40
- output_dir="qwen-codeforces-finetuned",
41
- num_train_epochs=3,
42
- per_device_train_batch_size=1,
43
- gradient_accumulation_steps=8, # Effective batch size = 8
44
- gradient_checkpointing=True,
45
- learning_rate=2e-4,
46
- lr_scheduler_type="cosine",
47
- warmup_ratio=0.1,
48
- logging_steps=10,
49
- save_strategy="epoch",
50
- save_total_limit=1,
51
- # Hub configuration
52
- push_to_hub=True,
53
- hub_model_id=f"{username}/qwen-codeforces-finetuned",
54
- hub_strategy="end",
55
- hub_private_repo=False,
56
- # Trackio monitoring
57
- report_to="trackio",
58
- run_name="qwen-codeforces-sft-1k",
59
- # Optimization for T4 small
60
- bf16=True,
61
- max_grad_norm=1.0,
62
- optim="adamw_torch",
63
- max_length=512,
64
- )
65
  )
66
 
67
- # Train the model
68
  print("πŸš€ Starting training on T4 small...")
69
  trainer.train()
70
 
71
- # Final push to hub
72
  print("πŸ“€ Pushing final model to Hub...")
73
  trainer.push_to_hub()
74
 
75
  print("βœ… Training complete!")
 
 
 
6
  from peft import LoraConfig
7
  from trl import SFTTrainer, SFTConfig
8
  import trackio
 
9
 
10
  # Load dataset - 1000 examples for ~20 min training
11
+ print("πŸ“¦ Loading dataset...")
12
  dataset = load_dataset(
13
  "open-r1/codeforces-cots",
14
  "solutions_w_editorials_py_decontaminated",
15
  split="train[:1000]"
16
  )
 
17
  print(f"πŸ“Š Training on {len(dataset)} examples for 3 epochs")
18
 
19
+ # LoRA configuration for efficient training
 
 
 
20
  peft_config = LoraConfig(
21
  r=8,
22
  lora_alpha=16,
 
26
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
27
  )
28
 
29
+ # Training configuration - optimized for T4 small
30
+ config = SFTConfig(
31
+ # Hub settings - CRITICAL for saving results
32
+ output_dir="qwen-codeforces-finetuned",
33
+ push_to_hub=True,
34
+ hub_model_id="papebaba/qwen-codeforces-finetuned",
35
+ hub_strategy="end",
36
+ hub_private_repo=False,
37
+
38
+ # Training parameters
39
+ num_train_epochs=3,
40
+ per_device_train_batch_size=1,
41
+ gradient_accumulation_steps=8, # Effective batch size = 8
42
+ learning_rate=2e-4,
43
+ max_length=512, # Shorter sequences for T4 small
44
+
45
+ # Checkpointing
46
+ logging_steps=10,
47
+ save_strategy="epoch",
48
+ save_total_limit=1,
49
+
50
+ # Optimization for T4 small
51
+ gradient_checkpointing=True,
52
+ bf16=True,
53
+ max_grad_norm=1.0,
54
+ warmup_ratio=0.1,
55
+ lr_scheduler_type="cosine",
56
+ optim="adamw_torch",
57
+
58
+ # Trackio monitoring
59
+ report_to="trackio",
60
+ run_name="qwen-codeforces-sft-1k",
61
+ )
62
+
63
+ # Initialize trainer
64
+ print("🎯 Initializing trainer...")
65
  trainer = SFTTrainer(
66
  model="Qwen/Qwen2.5-0.5B",
67
  train_dataset=dataset,
68
+ args=config,
69
  peft_config=peft_config,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  )
71
 
72
+ # Train
73
  print("πŸš€ Starting training on T4 small...")
74
  trainer.train()
75
 
76
+ # Push to Hub
77
  print("πŸ“€ Pushing final model to Hub...")
78
  trainer.push_to_hub()
79
 
80
  print("βœ… Training complete!")
81
+ print("πŸ“Š View metrics at: https://huggingface.co/spaces/papebaba/trackio")
82
+ print("πŸ€— Model at: https://huggingface.co/papebaba/qwen-codeforces-finetuned")