Commit
·
58c3693
1
Parent(s):
5021159
add random print sample when eval
Browse files
main.py
CHANGED
@@ -140,7 +140,7 @@ if __name__ == "__main__":
|
|
140 |
train_dataset_shard_idx = epoch_idx % num_train_shards
|
141 |
# Get test shard depend on train shard id
|
142 |
test_dataset_shard_idx = round(train_dataset_shard_idx / (num_train_shards / num_test_shards))
|
143 |
-
num_test_sub_shard =
|
144 |
idx_sub_shard = train_dataset_shard_idx % num_test_sub_shard # loop over test shard subset
|
145 |
|
146 |
# load train shard
|
@@ -150,7 +150,7 @@ if __name__ == "__main__":
|
|
150 |
cache_file_name=os.path.join(cache_processing_dataset_folder, 'train',
|
151 |
'cache-train-shard-{}.arrow'.format(
|
152 |
train_dataset_shard_idx))
|
153 |
-
)
|
154 |
# load test shard subset
|
155 |
test_dataset = load_prepared_dataset(os.path.join(test_dataset_root_folder,
|
156 |
'shard_{}'.format(test_dataset_shard_idx)),
|
|
|
140 |
train_dataset_shard_idx = epoch_idx % num_train_shards
|
141 |
# Get test shard depend on train shard id
|
142 |
test_dataset_shard_idx = round(train_dataset_shard_idx / (num_train_shards / num_test_shards))
|
143 |
+
num_test_sub_shard = 8 # Split test shard into subset. Default is 8
|
144 |
idx_sub_shard = train_dataset_shard_idx % num_test_sub_shard # loop over test shard subset
|
145 |
|
146 |
# load train shard
|
|
|
150 |
cache_file_name=os.path.join(cache_processing_dataset_folder, 'train',
|
151 |
'cache-train-shard-{}.arrow'.format(
|
152 |
train_dataset_shard_idx))
|
153 |
+
) #.shard(1000, 0) # Remove shard split when train
|
154 |
# load test shard subset
|
155 |
test_dataset = load_prepared_dataset(os.path.join(test_dataset_root_folder,
|
156 |
'shard_{}'.format(test_dataset_shard_idx)),
|