|
import os |
|
from datasets import load_from_disk, concatenate_datasets |
|
|
|
def load_and_merge_datasets(directories): |
|
datasets = [] |
|
for directory in directories: |
|
dataset = load_from_disk(directory) |
|
datasets.append(dataset) |
|
|
|
merged_dataset = concatenate_datasets(datasets) |
|
return merged_dataset |
|
|
|
data_directories = ["/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part0", |
|
"/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part1", |
|
"/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part2", |
|
"/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part3"] |
|
|
|
merged_dataset = load_and_merge_datasets(data_directories) |
|
|
|
merged_dataset.push_to_hub("CNX-PathLLM/TCGA-WSI-Text") |
|
|