Spaces:

thomasgauthier
/

ChatExplorer

Sleeping

App Files Files Community

ChatExplorer / dataset_adapters /ed2b4cf199998dfb4690d6ae767d25dca1256ccd97729b257db3a37206a72969.py

thomasgauthier

will this work?

10c1f9c 8 months ago

raw

history blame contribute delete

No virus

1.91 kB

	def transform_data(data):
	# Setup alias roles for conversion
	role_mapping = {'user': 'human', 'assistant': 'gpt', 'system': 'system'}
	conversations = []

	# Check for system messages and prepend if present
	system_messages = [msg for msg in data['messages'] if msg['role'] == 'system']
	if system_messages:
	for msg in system_messages:
	conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})

	# Prepare human and gpt messages
	prompt = data.get('prompt', '')
	human_messages = [msg for msg in data['messages'] if msg['role'] == 'user']
	gpt_messages = [msg for msg in data['messages'] if msg['role'] == 'assistant']

	# If there are both "instruction" and "input" and "input" is not empty, append it to first message
	if human_messages and prompt.strip():
	human_messages[0]['content'] = prompt + '\n\n' + human_messages[0]['content']

	# Pair each human message with corresponding gpt message, ensuring human speaks first
	paired_messages = zip(human_messages, gpt_messages)

	# Append paired messages to the conversation list
	for user_msg, gpt_msg in paired_messages:
	conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})
	conversations.append({'from': role_mapping[gpt_msg['role']], 'value': gpt_msg['content']})

	# Handle possible remaining unpaired human message
	for user_msg in human_messages[len(gpt_messages):]:
	conversations.append({'from': role_mapping[user_msg['role']], 'value': user_msg['content']})

	# Handle any unprocessed system message if present
	for msg in system_messages[len(conversations):]:
	conversations.append({'from': role_mapping[msg['role']], 'value': msg['content']})

	# Resulting data is a dictionary with a single key "conversations"
	return {'conversations': conversations}