Spaces:

thomasgauthier
/

ChatExplorer

Sleeping

ChatExplorer / dataset_adapters /4d52bd9e40bac418bcc390a42ffaf0c0c1e85370628381af2608ddcbfb3a679b.py

will this work?

10c1f9c 12 months ago

1.49 kB

	def transform_data(data):
	conversations = []

	# Iterate over messages, always processing 'input' and 'instruction' before 'output'
	for message in data.get('messages', []):
	# Check if it's a 'system' message and place it first if it exists
	if message['role'] == 'system':
	conversations.insert(0, {'from': 'system', 'value': message['content']})
	elif message['role'] == 'assistant':
	# 'assistant' is taken to be 'gpt'
	conversations.append({'from': 'gpt', 'value': message['content']})
	else:
	# 'user' is taken to be 'human'
	# Add 'instruction' directly if there is no 'input' for concatenation
	if message.get('role') == 'input' and message.get('content'):
	# If there are instructions before the input, we concatenate them.
	if conversations and conversations[-1]['from'] == 'human':
	conversations[-1]['value'] += '\n\n' + message['content']
	else:
	conversations.append({'from': 'human', 'value': message['content']})
	else:
	conversations.append({'from': 'human', 'value': message['content']})

	# Check for the order of conversation entries
	if conversations and conversations[0]['from'] == 'gpt':
	# If the first message is from 'gpt', prepend a 'human' message
	conversations.insert(0, {'from': 'human', 'value': ''})

	return conversations