ChatExplorer / dataset_adapters /ed2b4cf199998dfb4690d6ae767d25dca1256ccd97729b257db3a37206a72969_bp.py
thomasgauthier's picture
will this work?
10c1f9c
def transform_data(data):
# Create the base structure for the transformed data
transformed = {'conversations': []}
# Check for system message type, if any, before human input and output
system_msg = next((msg for msg in data.get('messages', []) if msg.get('role') == 'system'), None)
input_msg = next((msg for msg in data.get('messages', []) if msg.get('role') == 'user'), None)
output_msg = next((msg for msg in data.get('messages', []) if msg.get('role') == 'assistant'), None)
# Include system message if present
if system_msg:
transformed['conversations'].append({'from': 'system', 'value': system_msg['content']})
# Handle input and instruction
if input_msg:
transformed['conversations'].append({'from': 'human', 'value': input_msg['content']})
# Include GPT message if present and after human input
if output_msg:
transformed['conversations'].append({'from': 'gpt', 'value': output_msg['content']})
return transformed