Kevin Fink
commited on
Commit
·
ee9996e
1
Parent(s):
82ed3c8
dev
Browse files
app.py
CHANGED
@@ -113,7 +113,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
113 |
#max_length = 512
|
114 |
# Load the dataset
|
115 |
max_length = model.get_input_embeddings().weight.shape[0]
|
116 |
-
|
117 |
try:
|
118 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
119 |
if os.access(f'/data/{hub_id.strip()}_test_dataset', os.R_OK):
|
@@ -147,11 +147,11 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
147 |
print("FOUND VALIDATION")
|
148 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
149 |
third_third = dataset['train'].select(range(third_size*2, train_size))
|
150 |
-
|
151 |
print(dataset)
|
152 |
print(dataset.keys())
|
153 |
-
tokenized_second_half = tokenize_function(third_third)
|
154 |
-
|
155 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
|
156 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
157 |
return 'THIRD THIRD LOADED'
|
|
|
113 |
#max_length = 512
|
114 |
# Load the dataset
|
115 |
max_length = model.get_input_embeddings().weight.shape[0]
|
116 |
+
|
117 |
try:
|
118 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
119 |
if os.access(f'/data/{hub_id.strip()}_test_dataset', os.R_OK):
|
|
|
147 |
print("FOUND VALIDATION")
|
148 |
saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
149 |
third_third = dataset['train'].select(range(third_size*2, train_size))
|
150 |
+
dataset['train'] = third_third
|
151 |
print(dataset)
|
152 |
print(dataset.keys())
|
153 |
+
#tokenized_second_half = tokenize_function(third_third)
|
154 |
+
tokenized_second_half = dataset.map(tokenize_function)
|
155 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
|
156 |
dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
157 |
return 'THIRD THIRD LOADED'
|