Delete eval_llm_benchmark.log
Browse files- eval_llm_benchmark.log +0 -61
eval_llm_benchmark.log
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl_eval2/lib/python3.10/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
|
2 |
-
warn("The installed version of bitsandbytes was compiled without GPU support. "
|
3 |
-
/mnt/petrelfs/wangweiyun/miniconda3/envs/internvl_eval2/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32
|
4 |
-
model path is /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-4B
|
5 |
-
11/21 15:12:04 - OpenCompass - WARNING - No previous results to reuse!
|
6 |
-
11/21 15:12:04 - OpenCompass - INFO - Reusing experiements from 20241121_151204
|
7 |
-
11/21 15:12:04 - OpenCompass - INFO - Current exp folder: /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-4B/20241121_151204
|
8 |
-
11/21 15:12:08 - OpenCompass - INFO - Partitioned into 256 tasks.
|
9 |
-
[ ] 0/256, elapsed: 0s, ETA:
|
10 |
-
11/21 16:04:08 - OpenCompass - INFO - Partitioned into 287 tasks.
|
11 |
-
[ ] 0/287, elapsed: 0s, ETA:
|
12 |
-
dataset version metric mode internvl-chat-20b
|
13 |
-
---------------------------- --------- ---------------------------- ------ -------------------
|
14 |
-
mmlu - naive_average gen 17.00
|
15 |
-
mmlu_pro - - - -
|
16 |
-
cmmlu - naive_average gen 17.71
|
17 |
-
ceval - naive_average gen 17.43
|
18 |
-
agieval - - - -
|
19 |
-
GaokaoBench - weighted_average gen 18.45
|
20 |
-
GPQA_extended - - - -
|
21 |
-
GPQA_main - - - -
|
22 |
-
GPQA_diamond - - - -
|
23 |
-
ARC-c - - - -
|
24 |
-
truthfulqa - - - -
|
25 |
-
triviaqa 2121ce score gen 14.79
|
26 |
-
triviaqa_wiki_1shot - - - -
|
27 |
-
nq 3dcea1 score gen 5.60
|
28 |
-
C3 8c358f accuracy gen 22.90
|
29 |
-
race-high 9a54b6 accuracy gen 21.47
|
30 |
-
flores_100 - - - -
|
31 |
-
winogrande b36770 accuracy gen 23.52
|
32 |
-
hellaswag e42710 accuracy gen 21.30
|
33 |
-
bbh - naive_average gen 18.67
|
34 |
-
gsm8k 1d7fe4 accuracy gen 16.00
|
35 |
-
math 393424 accuracy gen 9.30
|
36 |
-
TheoremQA 6f0af8 score gen 5.25
|
37 |
-
MathBench - - - -
|
38 |
-
openai_humaneval 8e312c humaneval_pass@1 gen 17.68
|
39 |
-
humaneval_plus - - - -
|
40 |
-
humanevalx - - - -
|
41 |
-
sanitized_mbpp a447ff score gen 16.34
|
42 |
-
mbpp_plus - - - -
|
43 |
-
mbpp_cn 6fb572 score gen 12.20
|
44 |
-
leval - - - -
|
45 |
-
leval_closed - - - -
|
46 |
-
leval_open - - - -
|
47 |
-
longbench - - - -
|
48 |
-
longbench_single-document-qa - - - -
|
49 |
-
longbench_multi-document-qa - - - -
|
50 |
-
longbench_summarization - - - -
|
51 |
-
longbench_few-shot-learning - - - -
|
52 |
-
longbench_synthetic-tasks - - - -
|
53 |
-
longbench_code-completion - - - -
|
54 |
-
teval - - - -
|
55 |
-
teval_zh - - - -
|
56 |
-
IFEval 3321a3 Prompt-level-strict-accuracy gen 21.81
|
57 |
-
IFEval 3321a3 Inst-level-strict-accuracy gen 34.17
|
58 |
-
IFEval 3321a3 Prompt-level-loose-accuracy gen 23.29
|
59 |
-
IFEval 3321a3 Inst-level-loose-accuracy gen 35.73
|
60 |
-
11/21 16:08:33 - OpenCompass - INFO - write summary to /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-4B/20241121_151204/summary/summary_20241121_151204.txt
|
61 |
-
11/21 16:08:33 - OpenCompass - INFO - write csv to /mnt/petrelfs/wangweiyun/workspace_cz/InternVL/internvl_chat_dev/share_internvl/InternVL2_5-4B/20241121_151204/summary/summary_20241121_151204.csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|