Nitishkumar-ai commited on
Commit
b3eb082
·
1 Parent(s): f1f76e5

Fix: Explicit HF token handling and final GRPO batch alignment

Browse files
Files changed (1) hide show
  1. scripts/train_grpo.py +2 -0
scripts/train_grpo.py CHANGED
@@ -104,6 +104,7 @@ def main():
104
  wandb.init(project=WANDB_PROJECT, name=f"grpo-{MODEL_NAME.split('/')[-1]}-run1")
105
 
106
  # 1. Load Model
 
107
  print(f"Loading {MODEL_NAME} with Unsloth 4-bit...")
108
  model, tokenizer = FastLanguageModel.from_pretrained(
109
  model_name=MODEL_NAME,
@@ -111,6 +112,7 @@ def main():
111
  load_in_4bit=True,
112
  fast_inference=True,
113
  max_lora_rank=16,
 
114
  )
115
 
116
  model = FastLanguageModel.get_peft_model(
 
104
  wandb.init(project=WANDB_PROJECT, name=f"grpo-{MODEL_NAME.split('/')[-1]}-run1")
105
 
106
  # 1. Load Model
107
+ hf_token = os.getenv("HF_TOKEN")
108
  print(f"Loading {MODEL_NAME} with Unsloth 4-bit...")
109
  model, tokenizer = FastLanguageModel.from_pretrained(
110
  model_name=MODEL_NAME,
 
112
  load_in_4bit=True,
113
  fast_inference=True,
114
  max_lora_rank=16,
115
+ token=hf_token,
116
  )
117
 
118
  model = FastLanguageModel.get_peft_model(