def transform_data(data): # Setup alias roles for conversion role_mapping = {'user': 'human', 'assistant': 'gpt', 'system': 'system'} conversations = [] # Check for system messages and prepend if present system_messages = [msg for msg in data['messages'] if msg['role'] == 'system'] if system_messages: for msg in system_messages: conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']}) # Prepare human and gpt messages prompt = data.get('prompt', '') human_messages = [msg for msg in data['messages'] if msg['role'] == 'user'] gpt_messages = [msg for msg in data['messages'] if msg['role'] == 'assistant'] # If there are both "instruction" and "input" and "input" is not empty, append it to first message if human_messages and prompt.strip(): human_messages[0]['content'] = prompt + '\n\n' + human_messages[0]['content'] # Pair each human message with corresponding gpt message, ensuring human speaks first paired_messages = zip(human_messages, gpt_messages) # Append paired messages to the conversation list for user_msg, gpt_msg in paired_messages: conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']}) conversations.append({'from': role_mapping[gpt_msg['role']], 'value': gpt_msg['content']}) # Handle possible remaining unpaired human message for user_msg in human_messages[len(gpt_messages):]: conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']}) # Handle any unprocessed system message if present for msg in system_messages[len(conversations):]: conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']}) # Resulting data is a dictionary with a single key "conversations" return {'conversations': conversations}