ChatExplorer / dataset_adapters /87522c634adeee86c404df5141f0a2b983dff4bdde32f7d475db4cefa1dc2520.py
thomasgauthier's picture
will this work?
10c1f9c
# def transform_data(data):
# conversations = []
# # start with instruction or input
# if "instruction" in data:
# conversation = {}
# conversation["from"] = "system"
# conversation["value"] = data["instruction"]
# conversations.append(conversation)
# if "input" in data and data["input"].strip() != "":
# if conversations:
# # Concat the input at the end of the first message
# conversations[0]["value"] += "\n" + data["input"]
# else:
# conversation = {}
# conversation["from"] = "human"
# conversation["value"] = data["input"]
# conversations.append(conversation)
# # finalize with "output"
# if "output" in data:
# conversation = {}
# conversation["from"] = "gpt"
# conversation["value"] = data["output"]
# conversations.append(conversation)
# return {"conversations": conversations}
# def transform_data(data):
# # Initialize the final result list
# result = []
# # Process "instruction"
# if "instruction" in data and data["instruction"]:
# result.append({
# 'from': 'system',
# 'value': data["instruction"]
# })
# # Process "input"
# if "input" in data and data["input"]:
# # If "instruction" has already been added
# if result:
# # Add "input" to the end of the first message
# result[0]['value'] += '\n' + data["input"]
# else:
# # If there's no "instruction", add "input" as a separate message
# result.append({
# 'from': 'human',
# 'value': data["input"]
# })
# # Process "output"
# if "output" in data and data["output"]:
# result.append({
# 'from': 'gpt',
# 'value': data["output"]
# })
# return { 'conversations': result }
def transform_data(data):
result = {'conversations': []}
if 'instruction' in data and data['instruction']:
msg = {'from': 'system', 'value': data['instruction']}
result['conversations'].append(msg)
if 'input' in data and data['input']:
if 'instruction' in data and data['instruction']:
result['conversations'][-1]['value'] += ' ' + data['input']
else:
msg = {'from': 'human', 'value': data['input']}
result['conversations'].append(msg)
if 'output' in data and data['output']:
msg = {'from': 'gpt', 'value': data['output']}
result['conversations'].append(msg)
return result