Spaces:
Sleeping
Sleeping
File size: 642 Bytes
ef4c8c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# Tokenization/pretraining/instruction_formatter.py
class InstructionFormatter:
@staticmethod
def format_sample(sample):
"""
Formats a sample dict with 'instruction', 'input', and 'output' fields.
This is a placeholder; customize as needed for your data.
"""
# Ensure required fields exist
instruction = sample.get("instruction", "")
input_ = sample.get("input", "")
output = sample.get("output", "")
return {
"instruction": instruction.strip(),
"input": input_.strip(),
"output": output.strip(),
}
|