| |
| import torch |
| from datasets import load_dataset |
| from transformers import ( |
| AutoTokenizer, |
| AutoModelForCausalLM, |
| Trainer, |
| TrainingArguments, |
| DataCollatorForLanguageModeling, |
| ) |
|
|
| |
| |
| |
| print("Loading dataset...") |
| ds = load_dataset("Anthropic/hh-rlhf") |
|
|
| |
| |
| |
| train_data = [] |
| for item in ds["train"]: |
| text = item["chosen"] |
| |
| if "Assistant:" in text: |
| parts = text.split("Assistant:") |
| human = parts[0].replace("Human:", "").strip() |
| assistant = parts[1].strip() |
| train_data.append({"input": human, "output": assistant}) |
|
|
| print(f"Total training examples: {len(train_data)}") |
| print("Example:", train_data[0]) |
|
|
| |
| |
| |
| model_name = "distilgpt2" |
| print(f"Loading model and tokenizer: {model_name}") |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| |
| if tokenizer.pad_token is None: |
| tokenizer.add_special_tokens({"pad_token": "[PAD]"}) |
| model = AutoModelForCausalLM.from_pretrained(model_name) |
| model.resize_token_embeddings(len(tokenizer)) |
|
|
| |
| |
| |
| def tokenize_function(example): |
| return tokenizer( |
| example["input"] + " " + example["output"], |
| truncation=True, |
| padding="max_length", |
| max_length=128, |
| ) |
|
|
| tokenized_data = [tokenize_function(item) for item in train_data] |
|
|
| class CustomDataset(torch.utils.data.Dataset): |
| def __init__(self, data): |
| self.data = data |
| def __len__(self): |
| return len(self.data) |
| def __getitem__(self, idx): |
| return {k: torch.tensor(v) for k, v in self.data[idx].items()} |
|
|
| train_dataset = CustomDataset(tokenized_data) |
|
|
| |
| |
| |
| data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) |
|
|
| training_args = TrainingArguments( |
| output_dir="./hh_rlhf_model", |
| num_train_epochs=1, |
| per_device_train_batch_size=2, |
| save_steps=500, |
| logging_steps=50, |
| save_total_limit=1, |
| fp16=torch.cuda.is_available(), |
| report_to="none", |
| ) |
|
|
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=train_dataset, |
| data_collator=data_collator, |
| ) |
|
|
| print("Starting fine-tuning...") |
| trainer.train() |
| print("✅ Training complete!") |
|
|
| |
| |
| |
| conversation_history = [] |
|
|
| def chat(user_input): |
| full_input = " ".join([f"You: {u} AI: {a}" for u, a in conversation_history]) |
| full_input += f" You: {user_input} AI:" |
| input_ids = tokenizer.encode(full_input, return_tensors="pt") |
| output_ids = model.generate( |
| input_ids, |
| max_length=150, |
| pad_token_id=tokenizer.pad_token_id, |
| do_sample=True, |
| temperature=0.7, |
| ) |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True) |
| conversation_history.append((user_input, response)) |
| return response |
|
|
| print("\nAnthropic hh-rlhf chatbot ready! Type 'exit' to quit.\n") |
| while True: |
| user_input = input("You: ") |
| if user_input.lower() == "exit": |
| break |
| print("AI:", chat(user_input)) |
|
|