ZackBradshaw's picture
Upload folder using huggingface_hub
e67043b verified
raw
history blame contribute delete
920 Bytes
import random
def subsample_data(data, subsample_size):
"""
Subsample data. Data is in the form of a tuple of lists.
"""
inputs, outputs = data
assert len(inputs) == len(outputs)
indices = random.sample(range(len(inputs)), subsample_size)
inputs = [inputs[i] for i in indices]
outputs = [outputs[i] for i in indices]
return inputs, outputs
def create_split(data, split_size):
"""
Split data into two parts. Data is in the form of a tuple of lists.
"""
inputs, outputs = data
assert len(inputs) == len(outputs)
indices = random.sample(range(len(inputs)), split_size)
inputs1 = [inputs[i] for i in indices]
outputs1 = [outputs[i] for i in indices]
inputs2 = [inputs[i] for i in range(len(inputs)) if i not in indices]
outputs2 = [outputs[i] for i in range(len(inputs)) if i not in indices]
return (inputs1, outputs1), (inputs2, outputs2)