|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | """## Import libraries""" | 
					
						
						|  |  | 
					
						
						|  | import torch | 
					
						
						|  | from datasets import load_dataset | 
					
						
						|  | from transformers import AutoModelForCausalLM, AutoTokenizer | 
					
						
						|  | from trl import SFTConfig, SFTTrainer, setup_chat_format | 
					
						
						|  | from peft import LoraConfig | 
					
						
						|  |  | 
					
						
						|  | """# Load Dataset""" | 
					
						
						|  |  | 
					
						
						|  | dataset_name = "allenai/tulu-3-sft-personas-code" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | dataset = load_dataset(dataset_name, split="train") | 
					
						
						|  | print(f"Dataset loaded: {dataset}") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | print("\nSample data:") | 
					
						
						|  | print(dataset[0]) | 
					
						
						|  |  | 
					
						
						|  | dataset = dataset.remove_columns("prompt") | 
					
						
						|  | dataset = dataset.train_test_split(test_size=0.2) | 
					
						
						|  |  | 
					
						
						|  | print( | 
					
						
						|  | f"Train Samples: {len(dataset['train'])}\nTest Samples: {len(dataset['test'])}" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | """## Configuration | 
					
						
						|  |  | 
					
						
						|  | Set up the configuration parameters for the fine-tuning process. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model_name = "Qwen/Qwen3-30B-A3B" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | output_dir = "./tmp/sft-model" | 
					
						
						|  | num_train_epochs = 1 | 
					
						
						|  | per_device_train_batch_size = 1 | 
					
						
						|  | gradient_accumulation_steps = 1 | 
					
						
						|  | learning_rate = 2e-4 | 
					
						
						|  |  | 
					
						
						|  | """## Load model and tokenizer""" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model = AutoModelForCausalLM.from_pretrained( | 
					
						
						|  | model_name, | 
					
						
						|  | torch_dtype=torch.bfloat16, | 
					
						
						|  | use_cache=False, | 
					
						
						|  | device_map="auto", | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tokenizer = AutoTokenizer.from_pretrained(model_name) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | """## Configure PEFT (if enabled)""" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | peft_config = LoraConfig( | 
					
						
						|  | r=32, | 
					
						
						|  | lora_alpha=16, | 
					
						
						|  | lora_dropout=0.05, | 
					
						
						|  | bias="none", | 
					
						
						|  | task_type="CAUSAL_LM", | 
					
						
						|  | target_modules="all-linear", | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | """## Configure SFT Trainer""" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | training_args = SFTConfig( | 
					
						
						|  | output_dir=output_dir, | 
					
						
						|  | num_train_epochs=num_train_epochs, | 
					
						
						|  | per_device_train_batch_size=per_device_train_batch_size, | 
					
						
						|  | gradient_accumulation_steps=gradient_accumulation_steps, | 
					
						
						|  | learning_rate=learning_rate, | 
					
						
						|  | gradient_checkpointing=True, | 
					
						
						|  | logging_steps=25, | 
					
						
						|  | save_strategy="epoch", | 
					
						
						|  | optim="adamw_torch", | 
					
						
						|  | lr_scheduler_type="cosine", | 
					
						
						|  | warmup_ratio=0.1, | 
					
						
						|  | max_length=1024, | 
					
						
						|  | packing=True, | 
					
						
						|  | eos_token=tokenizer.eos_token, | 
					
						
						|  | bf16=True, | 
					
						
						|  | fp16=False, | 
					
						
						|  | max_steps=1000, | 
					
						
						|  | report_to="wandb", | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | """## Initialize and run the SFT Trainer""" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | trainer = SFTTrainer( | 
					
						
						|  | model=model, | 
					
						
						|  | args=training_args, | 
					
						
						|  | train_dataset=dataset["train"], | 
					
						
						|  | eval_dataset=dataset["test"] if "test" in dataset else None, | 
					
						
						|  | peft_config=peft_config, | 
					
						
						|  | processing_class=tokenizer, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | trainer.train() | 
					
						
						|  |  | 
					
						
						|  | """## Save the fine-tuned model""" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | trainer.save_model(output_dir) | 
					
						
						|  |  | 
					
						
						|  | """## Test the fine-tuned model""" | 
					
						
						|  |  | 
					
						
						|  | from peft import PeftModel, PeftConfig | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | base_model = AutoModelForCausalLM.from_pretrained( | 
					
						
						|  | model_name, trust_remote_code=True, torch_dtype=torch.bfloat16 | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model = PeftModel.from_pretrained(base_model, output_dir) | 
					
						
						|  | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | 
					
						
						|  |  | 
					
						
						|  | prompt = """Write a function called is_palindrome that takes a single string as input and returns True if the string is a palindrome, and False otherwise. | 
					
						
						|  |  | 
					
						
						|  | Palindrome Definition: | 
					
						
						|  |  | 
					
						
						|  | A palindrome is a word, phrase, number, or other sequence of characters that reads the same forward and backward, ignoring spaces, punctuation, and capitalization. | 
					
						
						|  |  | 
					
						
						|  | Example: | 
					
						
						|  | ``` | 
					
						
						|  | is_palindrome("racecar")  # Returns True | 
					
						
						|  | is_palindrome("hello")  # Returns False | 
					
						
						|  | is_palindrome("A man, a plan, a canal: Panama")  # Returns True | 
					
						
						|  | ``` | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | messages = [ | 
					
						
						|  | {"role": "system", "content": "You are a helpful assistant."}, | 
					
						
						|  | {"role": "user", "content": prompt}, | 
					
						
						|  | ] | 
					
						
						|  | formatted_prompt = tokenizer.apply_chat_template( | 
					
						
						|  | messages, tokenize=False, add_generation_prompt=True | 
					
						
						|  | ) | 
					
						
						|  | print(f"Formatted prompt: {formatted_prompt}") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model.eval() | 
					
						
						|  | inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) | 
					
						
						|  | with torch.no_grad(): | 
					
						
						|  | outputs = model.generate( | 
					
						
						|  | **inputs, | 
					
						
						|  | max_new_tokens=500, | 
					
						
						|  | temperature=0.7, | 
					
						
						|  | top_p=0.9, | 
					
						
						|  | do_sample=True, | 
					
						
						|  | pad_token_id=tokenizer.eos_token_id, | 
					
						
						|  | ) | 
					
						
						|  | response = tokenizer.decode(outputs[0], skip_special_tokens=True) | 
					
						
						|  | print("\nGenerated Response:") | 
					
						
						|  | print(response) | 
					
						
						|  |  | 
					
						
						|  | model.push_to_hub("burtenshaw/Qwen3-30B-A3B-python-code") |