leaderboard / src /backend /tasks /halueval /halueval_dialogue.yaml
pminervini's picture
update
d7b202a
raw
history blame
664 Bytes
task: halueval_dialogue
dataset_path: pminervini/HaluEval
dataset_name: dialogue_samples
output_type: generate_until
training_split: null
validation_split: null
test_split: data
num_fewshot: 0
doc_to_text: !function utils.doc_to_text_dialogue
doc_to_target: !function utils.doc_to_target
process_results: !function utils.process_results
generation_kwargs:
until:
- "\n"
- "."
do_sample: false
temperature: 0.0
metric_list:
- metric: em
aggregation: mean
higher_is_better: true
- metric: correctness
aggregation: mean
higher_is_better: true
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 0.0