File size: 1,492 Bytes
10c1f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def transform_data(data):
    conversations = []

    # Iterate over messages, always processing 'input' and 'instruction' before 'output'
    for message in data.get('messages', []):
        # Check if it's a 'system' message and place it first if it exists
        if message['role'] == 'system':
            conversations.insert(0, {'from': 'system', 'value': message['content']})
        elif message['role'] == 'assistant':
            # 'assistant' is taken to be 'gpt'
            conversations.append({'from': 'gpt', 'value': message['content']})
        else:
            # 'user' is taken to be 'human'
            # Add 'instruction' directly if there is no 'input' for concatenation
            if message.get('role') == 'input' and message.get('content'):
                # If there are instructions before the input, we concatenate them.
                if conversations and conversations[-1]['from'] == 'human':
                    conversations[-1]['value'] += '\n\n' + message['content']
                else:
                    conversations.append({'from': 'human', 'value': message['content']})
            else:
                conversations.append({'from': 'human', 'value': message['content']})

    # Check for the order of conversation entries
    if conversations and conversations[0]['from'] == 'gpt':
        # If the first message is from 'gpt', prepend a 'human' message
        conversations.insert(0, {'from': 'human', 'value': ''})

    return conversations