def transform_data(data): # Define roles and map them to the 'from' fields role_mapping = { 'role_1': 'human', 'message_1': 'human', 'message_2': 'gpt', } # Use heuristics to properly order the messages conv_order = ['role_1', 'message_1', 'message_2'] # Add 'instruction' if available, ensuring it comes before 'output' if 'instruction' in data: conv_order.insert(conv_order.index('message_1'), 'instruction') # Iterate over the data in the specified order and construct the conversation list conversation = [] for key in conv_order: if key in data and data[key]: from_role = 'system' if 'system' in key else role_mapping.get(key, 'human') msg_value = data[key] if 'message' in key else data[key].split('.')[-1].replace('_', ' ').capitalize() # If there is 'instruction' and 'input', concat 'input' at the end of the message if key == 'instruction' and 'input' in data and data['input']: msg_value += ' ' + data['input'] conv_item = { 'from': from_role, 'value': msg_value.strip() } conversation.append(conv_item) return {'conversations': conversation}