Spaces:

hallucinations-leaderboard
/

leaderboard

Running on CPU Upgrade

leaderboard / src /backend /tasks /halueval /halueval_dialogue.yaml

update

d7b202a 10 months ago

664 Bytes

	task: halueval_dialogue
	dataset_path: pminervini/HaluEval
	dataset_name: dialogue_samples
	output_type: generate_until
	training_split: null
	validation_split: null
	test_split: data
	num_fewshot: 0
	doc_to_text: !function utils.doc_to_text_dialogue
	doc_to_target: !function utils.doc_to_target
	process_results: !function utils.process_results
	generation_kwargs:
	until:
	- "\n"
	- "."
	do_sample: false
	temperature: 0.0
	metric_list:
	- metric: em
	aggregation: mean
	higher_is_better: true
	- metric: correctness
	aggregation: mean
	higher_is_better: true
	- metric: acc
	aggregation: mean
	higher_is_better: true
	metadata:
	version: 0.0