File size: 1,905 Bytes
10c1f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def transform_data(data):
    # Setup alias roles for conversion
    role_mapping = {'user': 'human', 'assistant': 'gpt', 'system': 'system'}
    conversations = []

    # Check for system messages and prepend if present
    system_messages = [msg for msg in data['messages'] if msg['role'] == 'system']
    if system_messages:
        for msg in system_messages:
            conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})

    # Prepare human and gpt messages
    prompt = data.get('prompt', '')
    human_messages = [msg for msg in data['messages'] if msg['role'] == 'user']
    gpt_messages = [msg for msg in data['messages'] if msg['role'] == 'assistant']

    # If there are both "instruction" and "input" and "input" is not empty, append it to first message
    if human_messages and prompt.strip():
        human_messages[0]['content'] = prompt + '\n\n' + human_messages[0]['content']

    # Pair each human message with corresponding gpt message, ensuring human speaks first
    paired_messages = zip(human_messages, gpt_messages)

    # Append paired messages to the conversation list
    for user_msg, gpt_msg in paired_messages:
        conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})
        conversations.append({'from': role_mapping[gpt_msg['role']], 'value': gpt_msg['content']})

    # Handle possible remaining unpaired human message
    for user_msg in human_messages[len(gpt_messages):]:
        conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})

    # Handle any unprocessed system message if present
    for msg in system_messages[len(conversations):]:
        conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})

    # Resulting data is a dictionary with a single key "conversations"
    return {'conversations': conversations}