| import gradio as gr |
| from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments |
| from datasets import load_dataset |
| import numpy as np |
| import torch |
|
|
| |
| model_name = "gpt2" |
| tokenizer = GPT2Tokenizer.from_pretrained(model_name) |
| model = GPT2LMHeadModel.from_pretrained(model_name) |
|
|
| |
| def fine_tune_gpt2_with_ppo(dataset_name, epochs, learning_rate): |
| |
| dataset = load_dataset(dataset_name) |
| |
| |
| def encode(examples): |
| return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128) |
|
|
| tokenized_dataset = dataset.map(encode, batched=True) |
| train_dataset = tokenized_dataset["train"] |
| |
| |
| data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) |
| training_args = TrainingArguments( |
| output_dir="./results", |
| overwrite_output_dir=True, |
| num_train_epochs=epochs, |
| per_device_train_batch_size=4, |
| save_steps=10_000, |
| save_total_limit=2, |
| learning_rate=learning_rate |
| ) |
| |
| |
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| data_collator=data_collator, |
| train_dataset=train_dataset |
| ) |
| |
| |
| trainer.train() |
| |
| return "Training Completed!" |
|
|
| |
| def train_interface(dataset, epochs, learning_rate): |
| result = fine_tune_gpt2_with_ppo(dataset, int(epochs), float(learning_rate)) |
| return result |
|
|
| |
| gradio_interface = gr.Interface( |
| fn=train_interface, |
| inputs=[ |
| gr.inputs.Textbox(label="Dataset (e.g. 'wikitext')"), |
| gr.inputs.Slider(1, 10, step=1, label="Epochs"), |
| gr.inputs.Textbox(label="Learning Rate") |
| ], |
| outputs="text", |
| title="GPT-2 RL Training App", |
| description="Fine-tune GPT-2 using PPO via a Gradio interface." |
| ) |
|
|
| |
| gradio_interface.launch() |