Kevin Fink commited on
Commit
8504394
·
1 Parent(s): afdaed1
Files changed (1) hide show
  1. app.py +44 -33
app.py CHANGED
@@ -28,11 +28,44 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
28
  model = get_peft_model(model, lora_config)
29
  tokenizer = AutoTokenizer.from_pretrained(model_name)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  max_length = 128
32
  try:
33
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
34
  tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
35
- tokenized_datasets = concatenate_datasets([tokenized_train_dataset, tokenized_test_dataset])
 
 
 
 
 
 
 
 
36
  except:
37
  # Tokenize the dataset
38
  def tokenize_function(examples):
@@ -63,44 +96,22 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
63
  tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
64
  tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
65
 
 
 
 
 
 
 
 
 
66
 
67
- # Set training arguments
68
- training_args = TrainingArguments(
69
- output_dir='./results',
70
- eval_strategy="steps", # Change this to steps
71
- save_strategy='steps',
72
- learning_rate=lr*0.00001,
73
- per_device_train_batch_size=int(batch_size),
74
- per_device_eval_batch_size=int(batch_size),
75
- num_train_epochs=int(num_epochs),
76
- weight_decay=0.01,
77
- gradient_accumulation_steps=int(grad),
78
- max_grad_norm = 1.0,
79
- load_best_model_at_end=True,
80
- metric_for_best_model="accuracy",
81
- greater_is_better=True,
82
- logging_dir='./logs',
83
- logging_steps=10,
84
- #push_to_hub=True,
85
- hub_model_id=hub_id.strip(),
86
- fp16=True,
87
- #lr_scheduler_type='cosine',
88
- save_steps=200, # Save checkpoint every 500 steps
89
- save_total_limit=3,
90
- )
91
  # Check if a checkpoint exists and load it
92
  if os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir):
93
  print("Loading model from checkpoint...")
94
  model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
95
 
96
- # Create Trainer
97
- trainer = Trainer(
98
- model=model,
99
- args=training_args,
100
- train_dataset=tokenized_datasets['train'],
101
- eval_dataset=tokenized_datasets['test'],
102
- #callbacks=[LoggingCallback()],
103
- )
104
 
105
  # Fine-tune the model
106
  trainer.train()
 
28
  model = get_peft_model(model, lora_config)
29
  tokenizer = AutoTokenizer.from_pretrained(model_name)
30
 
31
+ # Set training arguments
32
+ training_args = TrainingArguments(
33
+ output_dir='./results',
34
+ eval_strategy="steps", # Change this to steps
35
+ save_strategy='steps',
36
+ learning_rate=lr*0.00001,
37
+ per_device_train_batch_size=int(batch_size),
38
+ per_device_eval_batch_size=int(batch_size),
39
+ num_train_epochs=int(num_epochs),
40
+ weight_decay=0.01,
41
+ gradient_accumulation_steps=int(grad),
42
+ max_grad_norm = 1.0,
43
+ load_best_model_at_end=True,
44
+ metric_for_best_model="accuracy",
45
+ greater_is_better=True,
46
+ logging_dir='./logs',
47
+ logging_steps=10,
48
+ #push_to_hub=True,
49
+ hub_model_id=hub_id.strip(),
50
+ fp16=True,
51
+ #lr_scheduler_type='cosine',
52
+ save_steps=200, # Save checkpoint every 500 steps
53
+ save_total_limit=3,
54
+ )
55
+
56
  max_length = 128
57
  try:
58
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
59
  tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
60
+
61
+ # Create Trainer
62
+ trainer = Trainer(
63
+ model=model,
64
+ args=training_args,
65
+ train_dataset=tokenized_train_dataset,
66
+ eval_dataset=tokenized_test_dataset,
67
+ #callbacks=[LoggingCallback()],
68
+ )
69
  except:
70
  # Tokenize the dataset
71
  def tokenize_function(examples):
 
96
  tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
97
  tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
98
 
99
+ # Create Trainer
100
+ trainer = Trainer(
101
+ model=model,
102
+ args=training_args,
103
+ train_dataset=tokenized_datasets['train'],
104
+ eval_dataset=tokenized_datasets['test'],
105
+ #callbacks=[LoggingCallback()],
106
+ )
107
 
108
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # Check if a checkpoint exists and load it
110
  if os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir):
111
  print("Loading model from checkpoint...")
112
  model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
113
 
114
+
 
 
 
 
 
 
 
115
 
116
  # Fine-tune the model
117
  trainer.train()