Kevin Fink commited on
Commit
ee9996e
·
1 Parent(s): 82ed3c8
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -113,7 +113,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
113
  #max_length = 512
114
  # Load the dataset
115
  max_length = model.get_input_embeddings().weight.shape[0]
116
- print(f"TESTING: {os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK)}")
117
  try:
118
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
119
  if os.access(f'/data/{hub_id.strip()}_test_dataset', os.R_OK):
@@ -147,11 +147,11 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
147
  print("FOUND VALIDATION")
148
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
149
  third_third = dataset['train'].select(range(third_size*2, train_size))
150
- #dataset['train'] = third_third
151
  print(dataset)
152
  print(dataset.keys())
153
- tokenized_second_half = tokenize_function(third_third)
154
- #tokenized_second_half = dataset.map(tokenize_function)
155
  dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
156
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
157
  return 'THIRD THIRD LOADED'
 
113
  #max_length = 512
114
  # Load the dataset
115
  max_length = model.get_input_embeddings().weight.shape[0]
116
+
117
  try:
118
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
119
  if os.access(f'/data/{hub_id.strip()}_test_dataset', os.R_OK):
 
147
  print("FOUND VALIDATION")
148
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
149
  third_third = dataset['train'].select(range(third_size*2, train_size))
150
+ dataset['train'] = third_third
151
  print(dataset)
152
  print(dataset.keys())
153
+ #tokenized_second_half = tokenize_function(third_third)
154
+ tokenized_second_half = dataset.map(tokenize_function)
155
  dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
156
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
157
  return 'THIRD THIRD LOADED'