Spaces:
Sleeping
Sleeping
File size: 1,905 Bytes
10c1f9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
def transform_data(data):
# Setup alias roles for conversion
role_mapping = {'user': 'human', 'assistant': 'gpt', 'system': 'system'}
conversations = []
# Check for system messages and prepend if present
system_messages = [msg for msg in data['messages'] if msg['role'] == 'system']
if system_messages:
for msg in system_messages:
conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})
# Prepare human and gpt messages
prompt = data.get('prompt', '')
human_messages = [msg for msg in data['messages'] if msg['role'] == 'user']
gpt_messages = [msg for msg in data['messages'] if msg['role'] == 'assistant']
# If there are both "instruction" and "input" and "input" is not empty, append it to first message
if human_messages and prompt.strip():
human_messages[0]['content'] = prompt + '\n\n' + human_messages[0]['content']
# Pair each human message with corresponding gpt message, ensuring human speaks first
paired_messages = zip(human_messages, gpt_messages)
# Append paired messages to the conversation list
for user_msg, gpt_msg in paired_messages:
conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})
conversations.append({'from': role_mapping[gpt_msg['role']], 'value': gpt_msg['content']})
# Handle possible remaining unpaired human message
for user_msg in human_messages[len(gpt_messages):]:
conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})
# Handle any unprocessed system message if present
for msg in system_messages[len(conversations):]:
conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})
# Resulting data is a dictionary with a single key "conversations"
return {'conversations': conversations}
|