Spaces:
Sleeping
Sleeping
ChatExplorer
/
dataset_adapters
/4d52bd9e40bac418bcc390a42ffaf0c0c1e85370628381af2608ddcbfb3a679b.py
def transform_data(data): | |
conversations = [] | |
# Iterate over messages, always processing 'input' and 'instruction' before 'output' | |
for message in data.get('messages', []): | |
# Check if it's a 'system' message and place it first if it exists | |
if message['role'] == 'system': | |
conversations.insert(0, {'from': 'system', 'value': message['content']}) | |
elif message['role'] == 'assistant': | |
# 'assistant' is taken to be 'gpt' | |
conversations.append({'from': 'gpt', 'value': message['content']}) | |
else: | |
# 'user' is taken to be 'human' | |
# Add 'instruction' directly if there is no 'input' for concatenation | |
if message.get('role') == 'input' and message.get('content'): | |
# If there are instructions before the input, we concatenate them. | |
if conversations and conversations[-1]['from'] == 'human': | |
conversations[-1]['value'] += '\n\n' + message['content'] | |
else: | |
conversations.append({'from': 'human', 'value': message['content']}) | |
else: | |
conversations.append({'from': 'human', 'value': message['content']}) | |
# Check for the order of conversation entries | |
if conversations and conversations[0]['from'] == 'gpt': | |
# If the first message is from 'gpt', prepend a 'human' message | |
conversations.insert(0, {'from': 'human', 'value': ''}) | |
return conversations | |