File size: 2,661 Bytes
10c1f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# def transform_data(data):
#     conversations = []
    
#     # start with instruction or input
#     if "instruction" in data:
#         conversation = {}
#         conversation["from"] = "system"
#         conversation["value"] = data["instruction"]
#         conversations.append(conversation)

#     if "input" in data and data["input"].strip() != "":
#         if conversations:
#             # Concat the input at the end of the first message
#             conversations[0]["value"] += "\n" + data["input"]
#         else:
#             conversation = {}
#             conversation["from"] = "human"
#             conversation["value"] = data["input"]
#             conversations.append(conversation)

#     # finalize with "output"
#     if "output" in data:
#         conversation = {}
#         conversation["from"] = "gpt"
#         conversation["value"] = data["output"]
#         conversations.append(conversation)
    
#     return {"conversations": conversations}


# def transform_data(data):
#     # Initialize the final result list
#     result = []

#     # Process "instruction"
#     if "instruction" in data and data["instruction"]:
#         result.append({
#             'from': 'system',
#             'value': data["instruction"]
#         })
        
#     # Process "input"
#     if "input" in data and data["input"]:
#         # If "instruction" has already been added
#         if result:
#             # Add "input" to the end of the first message
#             result[0]['value'] += '\n' + data["input"]
#         else:
#             # If there's no "instruction", add "input" as a separate message
#             result.append({
#                 'from': 'human',
#                 'value': data["input"]
#             })

#     # Process "output"
#     if "output" in data and data["output"]:
#         result.append({
#             'from': 'gpt',
#             'value': data["output"]
#         })

#     return { 'conversations': result }


def transform_data(data):
    result = {'conversations': []}

    if 'instruction' in data and data['instruction']:
        msg = {'from': 'system', 'value': data['instruction']}
        result['conversations'].append(msg)

    if 'input' in data and data['input']:
        if 'instruction' in data and data['instruction']:
            result['conversations'][-1]['value'] += ' ' + data['input']
        else:
            msg = {'from': 'human', 'value': data['input']}
            result['conversations'].append(msg)

    if 'output' in data and data['output']:
        msg = {'from': 'gpt', 'value': data['output']}
        result['conversations'].append(msg)
    
    return result