CodePDE / data /extract_data_subsets.py
LDA1020's picture
feat: code release
56c4b9b verified
import h5py
import numpy as np
import os
def work(dataset_path, subset_path, subset_selection):
# Load data from file
with h5py.File(dataset_path, 'r') as f:
# Load the data
t_coordinate = np.array(f['t-coordinate'])[:-1] # Keep as is
x_coordinate = np.array(f['x-coordinate']) # Keep as is
u = subset_selection(np.array(f['tensor']))
# Navier-Stokes data has different structure
# Vx = subset_selection((f['Vx']))
# density = subset_selection(np.array(f['density']))
# pressure = subset_selection(np.array(f['pressure']))
# Verify shapes
print(t_coordinate.shape, x_coordinate.shape, u.shape)
# (201,) (1024,) (100, 201, 1024) for burgers equation
# Save the subset to a new HDF5 file
with h5py.File(subset_path, 'w') as f:
# Create datasets in the new file
f.create_dataset('t-coordinate', data=t_coordinate)
f.create_dataset('tensor', data=u)
f.create_dataset('x-coordinate', data=x_coordinate)
# Uncomment if you want to save Navier-Stokes specific data
# f.create_dataset('Vx', data=Vx)
# f.create_dataset('density', data=density)
# f.create_dataset('pressure', data=pressure)
print(f"Subset data saved successfully at {subset_path}!")
if __name__ == '__main__':
dataset_dir = '../dataset/1D/Burgers/Train'
test_subset_size = 100
dev_subset_size = 50
subset_dir = '../dataset/CodePDE/Burgers'
if not os.path.exists(subset_dir):
print(f"Creating: {subset_dir}")
os.makedirs(subset_dir)
else:
print(f"Exist: {subset_dir}")
for item in os.listdir(dataset_dir):
full_path = os.path.join(dataset_dir, item)
if os.path.isfile(full_path):
print(full_path)
subset_path = os.path.join(subset_dir, item)
work(full_path, subset_path, lambda x: x[:test_subset_size])
development_subset_path = subset_path.replace('.hdf5', '_development.hdf5')
work(full_path, development_subset_path, lambda x: x[-dev_subset_size:])