{ "results": { "kobest_boolq": { "acc": 0.6004273504273504, "acc_stderr": 0.013076733548156574, "macro_f1": 0.5383070602365428, "macro_f1_stderr": 0.01360326947457233 }, "kobest_copa": { "acc": 0.789, "acc_stderr": 0.012909130321042099, "macro_f1": 0.7887412079797753, "macro_f1_stderr": 0.012915850547665574 }, "kobest_hellaswag": { "acc": 0.5, "acc_stderr": 0.022383074051792257, "acc_norm": 0.586, "acc_norm_stderr": 0.02204949796982787, "macro_f1": 0.49550311762225463, "macro_f1_stderr": 0.02234977157067147 }, "kobest_sentineg": { "acc": 0.924433249370277, "acc_stderr": 0.013281757903323496, "macro_f1": 0.9242944317315027, "macro_f1_stderr": 0.013286696883726102 } }, "versions": { "kobest_boolq": 0, "kobest_copa": 0, "kobest_hellaswag": 0, "kobest_sentineg": 0 }, "config": { "model": "gpt2", "model_args": "pretrained=/home/work/.folder/DeepSpeedExamples-master/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/output/actor/", "num_fewshot": 10, "batch_size": "8", "device": "cuda:0", "no_cache": true, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }