from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments from peft import get_peft_model, LoraConfig, TaskType import os # Load SST-2 dataset (sentiment classification) and take a small subset for fast training dataset = load_dataset("glue", "sst2") small_train = dataset["train"].select(range(500)) tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") def tokenize_fn(batch): return tokenizer(batch["sentence"], padding=True, truncation=True) tokenized_train = small_train.map(tokenize_fn, batched=True) # Load model model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2) # Apply PEFT with LoRA — FIXED: target_modules is now set for DistilBERT peft_config = LoraConfig( task_type=TaskType.SEQ_CLS, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["q_lin", "v_lin"] # Required for DistilBERT ) model = get_peft_model(model, peft_config) # Hugging Face token (set as a Secret in Space settings) hf_token = os.environ.get("HF_TOKEN") or "hf_xxx" # Replace if needed training_args = TrainingArguments( output_dir="results", per_device_train_batch_size=8, num_train_epochs=1, logging_dir="./logs", logging_steps=10, save_strategy="epoch", push_to_hub=True, hub_model_id="NightPrince/peft-distilbert-sst2", hub_token=hf_token ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_train, ) trainer.train()