def transform_data(data): conversations = [] # Iterate over messages, always processing 'input' and 'instruction' before 'output' for message in data.get('messages', []): # Check if it's a 'system' message and place it first if it exists if message['role'] == 'system': conversations.insert(0, {'from': 'system', 'value': message['content']}) elif message['role'] == 'assistant': # 'assistant' is taken to be 'gpt' conversations.append({'from': 'gpt', 'value': message['content']}) else: # 'user' is taken to be 'human' # Add 'instruction' directly if there is no 'input' for concatenation if message.get('role') == 'input' and message.get('content'): # If there are instructions before the input, we concatenate them. if conversations and conversations[-1]['from'] == 'human': conversations[-1]['value'] += '\n\n' + message['content'] else: conversations.append({'from': 'human', 'value': message['content']}) else: conversations.append({'from': 'human', 'value': message['content']}) # Check for the order of conversation entries if conversations and conversations[0]['from'] == 'gpt': # If the first message is from 'gpt', prepend a 'human' message conversations.insert(0, {'from': 'human', 'value': ''}) return conversations