{ "results": { "kobest_boolq": { "acc": 0.5968660968660968, "acc_stderr": 0.013095867778461178, "macro_f1": 0.5304603080064664, "macro_f1_stderr": 0.013586196223496537 }, "kobest_copa": { "acc": 0.794, "acc_stderr": 0.012795613612786553, "macro_f1": 0.7937326775501048, "macro_f1_stderr": 0.012808081534088474 }, "kobest_hellaswag": { "acc": 0.498, "acc_stderr": 0.022382894986483524, "acc_norm": 0.59, "acc_norm_stderr": 0.022017482578127676, "macro_f1": 0.493789564405272, "macro_f1_stderr": 0.022368543611080722 }, "kobest_sentineg": { "acc": 0.9193954659949622, "acc_stderr": 0.013679912566808524, "macro_f1": 0.9191692965489158, "macro_f1_stderr": 0.013704064312276852 } }, "versions": { "kobest_boolq": 0, "kobest_copa": 0, "kobest_hellaswag": 0, "kobest_sentineg": 0 }, "config": { "model": "gpt2", "model_args": "pretrained=/home/work/DeepSpeedExamples/DeepSpeedExamples/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/output/actor", "num_fewshot": 10, "batch_size": "8", "device": "cuda:0", "no_cache": true, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }