File size: 1,275 Bytes
10c1f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def transform_data(data):
    # Define roles and map them to the 'from' fields
    role_mapping = {
        'role_1': 'human',
        'message_1': 'human',
        'message_2': 'gpt',
    }

    # Use heuristics to properly order the messages
    conv_order = ['role_1', 'message_1', 'message_2']
    # Add 'instruction' if available, ensuring it comes before 'output'
    if 'instruction' in data:
        conv_order.insert(conv_order.index('message_1'), 'instruction')

    # Iterate over the data in the specified order and construct the conversation list
    conversation = []
    for key in conv_order:
        if key in data and data[key]:
            from_role = 'system' if 'system' in key else role_mapping.get(key, 'human')
            msg_value = data[key] if 'message' in key else data[key].split('.')[-1].replace('_', ' ').capitalize()

            # If there is 'instruction' and 'input', concat 'input' at the end of the message
            if key == 'instruction' and 'input' in data and data['input']:
                msg_value += ' ' + data['input']

            conv_item = {
                'from': from_role,
                'value': msg_value.strip()
            }
            conversation.append(conv_item)

    return {'conversations': conversation}