ChatExplorer / dataset_adapters /4d52bd9e40bac418bcc390a42ffaf0c0c1e85370628381af2608ddcbfb3a679b.py
thomasgauthier's picture
will this work?
10c1f9c
def transform_data(data):
conversations = []
# Iterate over messages, always processing 'input' and 'instruction' before 'output'
for message in data.get('messages', []):
# Check if it's a 'system' message and place it first if it exists
if message['role'] == 'system':
conversations.insert(0, {'from': 'system', 'value': message['content']})
elif message['role'] == 'assistant':
# 'assistant' is taken to be 'gpt'
conversations.append({'from': 'gpt', 'value': message['content']})
else:
# 'user' is taken to be 'human'
# Add 'instruction' directly if there is no 'input' for concatenation
if message.get('role') == 'input' and message.get('content'):
# If there are instructions before the input, we concatenate them.
if conversations and conversations[-1]['from'] == 'human':
conversations[-1]['value'] += '\n\n' + message['content']
else:
conversations.append({'from': 'human', 'value': message['content']})
else:
conversations.append({'from': 'human', 'value': message['content']})
# Check for the order of conversation entries
if conversations and conversations[0]['from'] == 'gpt':
# If the first message is from 'gpt', prepend a 'human' message
conversations.insert(0, {'from': 'human', 'value': ''})
return conversations