group: - halueval task: halueval_qa dataset_path: pminervini/HaluEval dataset_name: qa_samples output_type: generate_until training_split: null validation_split: null test_split: data num_fewshot: 0 doc_to_text: !function utils.doc_to_text_qa doc_to_target: !function utils.doc_to_target process_results: !function utils.process_results generation_kwargs: until: - "\n" - "." do_sample: false temperature: 0.0 metric_list: - metric: em aggregation: mean higher_is_better: true - metric: correctness aggregation: mean higher_is_better: true - metric: acc aggregation: mean higher_is_better: true metadata: - version: 0.0