Corrigan123 commited on
Commit
cdecb4d
1 Parent(s): 191e433

Update app.py with optimized training settings

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -1,15 +1,9 @@
1
  from transformers import (GPT2Tokenizer, GPT2LMHeadModel, Trainer,
2
- TrainingArguments, DataCollatorWithPadding, GradientAccumulationScheduler)
3
  from datasets import load_dataset
4
- import torch
5
 
6
- # Assuming your hardware supports it, enable gradient checkpointing
7
- model_config = {
8
- "gradient_checkpointing": True, # Enable gradient checkpointing
9
- }
10
-
11
- # Load the GPT-2 model with gradient checkpointing enabled
12
- model = GPT2LMHeadModel.from_pretrained("gpt2", **model_config)
13
 
14
  # Initialize the GPT-2 tokenizer with a reduced max_length
15
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
@@ -34,13 +28,13 @@ tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mas
34
  # Use a DataCollator that dynamically pads the batches
35
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")
36
 
37
- # Define training arguments with a smaller batch size and optimized settings
38
  training_args = TrainingArguments(
39
  output_dir="./output",
40
  overwrite_output_dir=True,
41
  num_train_epochs=3,
42
  per_device_train_batch_size=2, # Decreased batch size
43
- gradient_accumulation_steps=8, # Adjust based on your new batch size to simulate larger batches
44
  save_steps=10_000,
45
  save_total_limit=2,
46
  no_cuda=False,
@@ -49,8 +43,7 @@ training_args = TrainingArguments(
49
  warmup_steps=100,
50
  logging_dir='./logs',
51
  logging_steps=100,
52
- # Enable fp16 for memory and speed improvement if your hardware supports it
53
- fp16=torch.cuda.is_available(),
54
  )
55
 
56
  trainer = Trainer(
 
1
  from transformers import (GPT2Tokenizer, GPT2LMHeadModel, Trainer,
2
+ TrainingArguments, DataCollatorWithPadding)
3
  from datasets import load_dataset
 
4
 
5
+ # Load the GPT-2 model
6
+ model = GPT2LMHeadModel.from_pretrained("gpt2")
 
 
 
 
 
7
 
8
  # Initialize the GPT-2 tokenizer with a reduced max_length
9
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 
28
  # Use a DataCollator that dynamically pads the batches
29
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")
30
 
31
+ # Define training arguments with optimized settings
32
  training_args = TrainingArguments(
33
  output_dir="./output",
34
  overwrite_output_dir=True,
35
  num_train_epochs=3,
36
  per_device_train_batch_size=2, # Decreased batch size
37
+ gradient_accumulation_steps=8, # Adjusted for gradient accumulation
38
  save_steps=10_000,
39
  save_total_limit=2,
40
  no_cuda=False,
 
43
  warmup_steps=100,
44
  logging_dir='./logs',
45
  logging_steps=100,
46
+ fp16=True, # Enable fp16 for memory and speed improvement if your hardware supports it
 
47
  )
48
 
49
  trainer = Trainer(