Kevin Fink commited on
Commit
f1ce0f3
·
1 Parent(s): 42338b1
Files changed (1) hide show
  1. app.py +1 -5
app.py CHANGED
@@ -166,7 +166,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
166
 
167
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
168
  dataset = load_dataset(dataset_name.strip())
169
- dataset['train'] = dataset['train'].select(range(15000))
170
  train_size = len(dataset['train'])
171
  third_size = train_size // 3
172
  del dataset['test']
@@ -183,9 +182,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
183
 
184
 
185
  if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
186
- dataset = load_dataset(dataset_name.strip())
187
- dataset['train'] = dataset['train'].select(range(15000))
188
- dataset['validation'] = dataset['validation'].select(range(2000))
189
  train_size = len(dataset['train'])
190
  third_size = train_size // 3
191
  second_third = dataset['train'].select(range(third_size, third_size*2))
@@ -201,7 +198,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
201
  except Exception as e:
202
  print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
203
  dataset = load_dataset(dataset_name.strip())
204
- dataset['train'] = dataset['train'].select(range(15000))
205
  train_size = len(dataset['train'])
206
  third_size = train_size // 3
207
  # Tokenize the dataset
 
166
 
167
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
168
  dataset = load_dataset(dataset_name.strip())
 
169
  train_size = len(dataset['train'])
170
  third_size = train_size // 3
171
  del dataset['test']
 
182
 
183
 
184
  if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
185
+ dataset = load_dataset(dataset_name.strip())
 
 
186
  train_size = len(dataset['train'])
187
  third_size = train_size // 3
188
  second_third = dataset['train'].select(range(third_size, third_size*2))
 
198
  except Exception as e:
199
  print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
200
  dataset = load_dataset(dataset_name.strip())
 
201
  train_size = len(dataset['train'])
202
  third_size = train_size // 3
203
  # Tokenize the dataset