loubnabnl HF staff commited on
Commit
c47d747
1 Parent(s): 839761b

[Community Submission] Model: codefuse-ai/CodeFuse-DeepSeek-33B, Username: codefuse-admin (#51)

Browse files

- add json file (ae72fb2d7b79a4a862d123adea14928f173e17f5)
- add codefuse to the leaderboard (fb3a36264b1aac2c2a41f77cfdff8cee872ddaeb)
- Fix: use humaneval-{lang}-reworded.jsonl files for MultiPL-E instead of humaneval-{lang}-keep.jsonl files (e54d5a19705fce2c83d3a5fd1763885f0c8b5e99)
- add codefuse and rename Models Model (b9e9b1e57e890c725edf40e5872ab4c652521eb8)

Files changed (32) hide show
  1. README.md +1 -0
  2. app.py +1 -1
  3. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/codefuse-ai_CodeFuse-DeepSeek-33b_codefuse-admin.json +1 -0
  4. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_humaneval_CodeFuse-DeepSeek-33b.json +0 -0
  5. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-cpp_CodeFuse-DeepSeek-33b.json +0 -0
  6. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-d_CodeFuse-DeepSeek-33b.json +0 -0
  7. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-java_CodeFuse-DeepSeek-33b.json +0 -0
  8. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-jl_CodeFuse-DeepSeek-33b.json +0 -0
  9. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-js_CodeFuse-DeepSeek-33b.json +0 -0
  10. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-lua_CodeFuse-DeepSeek-33b.json +0 -0
  11. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-php_CodeFuse-DeepSeek-33b.json +0 -0
  12. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-r_CodeFuse-DeepSeek-33b.json +0 -0
  13. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-rkt_CodeFuse-DeepSeek-33b.json +0 -0
  14. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-rs_CodeFuse-DeepSeek-33b.json +0 -0
  15. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-swift_CodeFuse-DeepSeek-33b.json +0 -0
  16. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_humaneval_CodeFuse-DeepSeek-33b.json +44 -0
  17. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-cpp_CodeFuse-DeepSeek-33b.json +44 -0
  18. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-d_CodeFuse-DeepSeek-33b.json +44 -0
  19. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-java_CodeFuse-DeepSeek-33b.json +44 -0
  20. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-jl_CodeFuse-DeepSeek-33b.json +44 -0
  21. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-js_CodeFuse-DeepSeek-33b.json +44 -0
  22. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-lua_CodeFuse-DeepSeek-33b.json +44 -0
  23. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-php_CodeFuse-DeepSeek-33b.json +44 -0
  24. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-r_CodeFuse-DeepSeek-33b.json +44 -0
  25. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-rkt_CodeFuse-DeepSeek-33b.json +44 -0
  26. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-rs_CodeFuse-DeepSeek-33b.json +44 -0
  27. community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-swift_CodeFuse-DeepSeek-33b.json +44 -0
  28. data/code_eval_board.csv +38 -37
  29. data/raw_scores.csv +2 -1
  30. src/add_json_csv.py +3 -2
  31. src/build.py +14 -12
  32. src/utils.py +26 -25
README.md CHANGED
@@ -50,4 +50,5 @@ models:
50
  - deepseek-ai/deepseek-coder-33b-base
51
  - deepseek-ai/deepseek-coder-6.7b-instruct
52
  - deepseek-ai/deepseek-coder-33b-instruct
 
53
  ---
 
50
  - deepseek-ai/deepseek-coder-33b-base
51
  - deepseek-ai/deepseek-coder-6.7b-instruct
52
  - deepseek-ai/deepseek-coder-33b-instruct
53
+ - codefuse-ai/CodeFuse-DeepSeek-33B
54
  ---
app.py CHANGED
@@ -116,7 +116,7 @@ def filter_items(df, leaderboard_table, query):
116
 
117
 
118
  def search_table(df, leaderboard_table, query):
119
- filtered_df = df[(df["Models"].str.contains(query, case=False))]
120
  return filtered_df[leaderboard_table.columns]
121
 
122
 
 
116
 
117
 
118
  def search_table(df, leaderboard_table, query):
119
+ filtered_df = df[(df["Model"].str.contains(query, case=False))]
120
  return filtered_df[leaderboard_table.columns]
121
 
122
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/codefuse-ai_CodeFuse-DeepSeek-33b_codefuse-admin.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task": "multiple-r", "pass@1": 0.40372670807453415}, {"task": "humaneval", "pass@1": 0.7682926829268293}, {"task": "multiple-lua", "pass@1": 0.5279503105590062}, {"task": "multiple-php", "pass@1": 0.577639751552795}, {"task": "multiple-d", "pass@1": 0.24358974358974358}, {"task": "multiple-jl", "pass@1": 0.3836477987421384}, {"task": "multiple-cpp", "pass@1": 0.6521739130434783}, {"task": "multiple-java", "pass@1": 0.6075949367088608}, {"task": "multiple-rs", "pass@1": 0.5384615384615384}, {"task": "multiple-swift", "pass@1": 0.4936708860759494}, {"task": "multiple-js", "pass@1": 0.6645962732919255}, {"task": "multiple-rkt", "pass@1": 0.3416149068322981}], "meta": {"model": "codefuse-ai/CodeFuse-DeepSeek-33B"}}
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_humaneval_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-cpp_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-d_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-java_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-jl_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-js_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-lua_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-php_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-r_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-rkt_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-rs_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/generations_CodeFuse-DeepSeek-33b/generations_multiple-swift_CodeFuse-DeepSeek-33b.json ADDED
The diff for this file is too large to render. See raw diff
 
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_humaneval_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "humaneval": {
3
+ "pass@1": 0.7682926829268293
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n# language: Python\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "humaneval",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_humaneval_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_humaneval_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-cpp_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-cpp": {
3
+ "pass@1": 0.6521739130434783
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: C++\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-cpp",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-cpp_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-cpp_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-d_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-d": {
3
+ "pass@1": 0.24358974358974358
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-d",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-d_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-d_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-java_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-java": {
3
+ "pass@1": 0.6075949367088608
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: Java\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-java",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-java_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-java_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-jl_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-jl": {
3
+ "pass@1": 0.3836477987421384
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n# language: Julia\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-jl",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-jl_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-jl_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-js_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-js": {
3
+ "pass@1": 0.6645962732919255
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: JavaScript\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-js",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-js_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-js_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-lua_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-lua": {
3
+ "pass@1": 0.5279503105590062
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: Lua\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-lua",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-lua_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-lua_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-php_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-php": {
3
+ "pass@1": 0.577639751552795
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: PHP\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-php",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-php_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-php_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-r_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-r": {
3
+ "pass@1": 0.40372670807453415
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n# language: R\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-r",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-r_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-r_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-rkt_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-rkt": {
3
+ "pass@1": 0.3416149068322981
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n; language: Racket\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-rkt",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-rkt_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-rkt_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-rs_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-rs": {
3
+ "pass@1": 0.5384615384615384
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: Rust\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-rs",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-rs_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-rs_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/metrics_CodeFuse-DeepSeek-33b/metrics_multiple-swift_CodeFuse-DeepSeek-33b.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "multiple-swift": {
3
+ "pass@1": 0.4936708860759494
4
+ },
5
+ "config": {
6
+ "prefix": "<s>human\n// language: Swift\n",
7
+ "suffix": "\n<s>bot\n",
8
+ "add_special_tokens": false,
9
+ "do_sample": false,
10
+ "temperature": 0.2,
11
+ "top_k": 0,
12
+ "top_p": 0.95,
13
+ "n_samples": 1,
14
+ "eos": "<\uff5cend\u2581of\u2581sentence\uff5c>",
15
+ "seed": 999999999,
16
+ "model": "codefuse-ai/CodeFuse-DeepSeek-33B",
17
+ "modeltype": "causal",
18
+ "peft_model": null,
19
+ "revision": null,
20
+ "use_auth_token": true,
21
+ "trust_remote_code": true,
22
+ "tasks": "multiple-swift",
23
+ "instruction_tokens": null,
24
+ "batch_size": 1,
25
+ "max_length_generation": 2000,
26
+ "precision": "bf16",
27
+ "load_in_8bit": false,
28
+ "load_in_4bit": false,
29
+ "limit": null,
30
+ "limit_start": 0,
31
+ "postprocess": true,
32
+ "allow_code_execution": true,
33
+ "generation_only": false,
34
+ "load_generations_path": "/app/generations_multiple-swift_CodeFuse-DeepSeek-33B.json",
35
+ "load_data_path": null,
36
+ "metric_output_path": "/app/metrics_CodeFuse-DeepSeek-33B/metrics_multiple-swift_CodeFuse-DeepSeek-33B.json",
37
+ "save_generations": false,
38
+ "save_generations_path": "generations.json",
39
+ "save_references": false,
40
+ "prompt": "prompt",
41
+ "max_memory_per_gpu": null,
42
+ "check_references": false
43
+ }
44
+ }
data/code_eval_board.csv CHANGED
@@ -1,42 +1,43 @@
1
- T,Models,Size (B),Win Rate,Throughput (tokens/s),Seq_length,#Languages,humaneval-python,java,javascript,cpp,php,julia,d,Average score,lua,r,racket,rust,swift,Throughput (tokens/s) bs=50,Peak Memory (MB),models_query,Links,Submission PR
2
- 🔴,DeepSeek-Coder-33b-instruct,33.0,39.58,25.2,16384,86,80.02,52.03,65.13,62.36,52.5,42.92,17.85,49.99,50.92,39.43,31.69,55.56,49.42,,76800.0,DeepSeek-Coder-33b-instruct,https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/42
3
- 🔴,DeepSeek-Coder-7b-instruct,6.7,38.75,51.0,16384,86,80.22,53.34,65.8,59.66,59.4,38.84,21.59,48.17,47.78,38.56,20.87,47.73,44.22,,22922.0,DeepSeek-Coder-7b-instruct,https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/43
4
- 🔶,Phind-CodeLlama-34B-v2,34.0,37.04,15.1,16384,UNK,71.95,54.06,65.34,59.59,56.26,45.12,14.12,48.7,44.27,37.7,28.7,57.67,49.63,0.0,69957.0,Phind-CodeLlama-34B-v2,https://huggingface.co/phind/Phind-CodeLlama-34B-v2,
5
- 🔶,Phind-CodeLlama-34B-v1,34.0,36.12,15.1,16384,UNK,65.85,49.47,64.45,57.81,55.53,43.23,15.5,46.9,42.05,36.71,24.89,54.1,53.27,0.0,69957.0,Phind-CodeLlama-34B-v1,https://huggingface.co/phind/Phind-CodeLlama-34B-v1,
6
- 🔶,Phind-CodeLlama-34B-Python-v1,34.0,35.27,15.1,16384,UNK,70.22,48.72,66.24,55.34,52.05,44.23,13.78,45.25,39.44,37.76,18.88,49.22,47.11,0.0,69957.0,Phind-CodeLlama-34B-Python-v1,https://huggingface.co/phind/Phind-CodeLlama-34B-Python-v1,
 
7
  🔴,DeepSeek-Coder-33b-base,33.0,35.0,25.2,16384,86,52.45,43.77,51.28,51.22,41.76,32.83,17.41,38.07,36.51,26.76,23.37,43.78,35.75,,76800.0,DeepSeek-Coder-33b-base,https://huggingface.co/deepseek-ai/deepseek-coder-33b-base,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/31
8
- 🔶,WizardCoder-Python-34B-V1.0,34.0,33.96,15.1,16384,UNK,70.73,44.94,55.28,47.2,47.2,41.51,15.38,41.95,32.3,39.75,18.63,46.15,44.3,0.0,69957.0,WizardCoder-Python-34B-V1.0,https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0,
9
  🔴,DeepSeek-Coder-7b-base,6.7,31.75,51.0,16384,86,45.83,37.72,45.9,45.53,36.92,28.74,19.74,33.54,33.89,28.99,18.73,34.67,25.8,,22922.0,DeepSeek-Coder-7b-base,https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/32
10
- 🔶,CodeLlama-34b-Instruct,34.0,30.96,15.1,16384,UNK,50.79,41.53,45.85,41.53,36.98,32.65,13.63,35.09,38.87,24.25,18.09,39.26,37.63,0.0,69957.0,CodeLlama-34b-Instruct,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf,
11
- 🔶,WizardCoder-Python-13B-V1.0,13.0,30.58,25.3,16384,UNK,62.19,41.77,48.45,42.86,42.24,38.99,11.54,35.94,32.92,27.33,16.15,34.62,32.28,0.0,28568.0,WizardCoder-Python-13B-V1.0,https://huggingface.co/WizardLM/WizardCoder-Python-13B-V1.0,
12
- 🟢,CodeLlama-34b,34.0,30.35,15.1,16384,UNK,45.11,40.19,41.66,41.42,40.43,31.4,15.27,33.89,37.49,22.71,16.94,38.73,35.28,0.0,69957.0,CodeLlama-34b,https://huggingface.co/codellama/CodeLlama-34b-hf,
13
- 🟢,CodeLlama-34b-Python,34.0,29.65,15.1,16384,UNK,53.29,39.46,44.72,39.09,39.78,31.37,17.29,33.87,31.9,22.35,13.19,39.67,34.3,0.0,69957.0,CodeLlama-34b-Python,https://huggingface.co/codellama/CodeLlama-34b-Python-hf,
14
- 🔶,WizardCoder-15B-V1.0,15.0,28.92,43.7,8192,86,58.12,35.77,41.91,38.95,39.34,33.98,12.14,32.07,27.85,22.53,13.39,33.74,27.06,1470.0,32414.0,WizardCoder-15B-V1.0,https://huggingface.co/WizardLM/WizardCoder-15B-V1.0,
15
- 🔶,CodeLlama-13b-Instruct,13.0,27.88,25.3,16384,UNK,50.6,33.99,40.92,36.36,32.07,32.23,16.29,31.29,31.6,20.14,16.66,32.82,31.75,0.0,28568.0,CodeLlama-13b-Instruct,https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf,
16
- 🟢,CodeLlama-13b,13.0,26.19,25.3,16384,UNK,35.07,32.23,38.26,35.81,32.57,28.01,15.78,28.35,31.26,18.32,13.63,29.72,29.54,0.0,28568.0,CodeLlama-13b,https://huggingface.co/codellama/CodeLlama-13b-hf,
17
- 🟢,CodeLlama-13b-Python,13.0,24.73,25.3,16384,UNK,42.89,33.56,40.66,36.21,34.55,30.4,9.82,28.67,29.9,18.35,12.51,29.32,25.85,0.0,28568.0,CodeLlama-13b-Python,https://huggingface.co/codellama/CodeLlama-13b-Python-hf,
18
- 🔶,CodeLlama-7b-Instruct,7.0,23.69,33.1,16384,UNK,45.65,28.77,33.11,29.03,28.55,27.58,11.81,26.45,30.47,19.7,11.81,24.27,26.66,693.0,15853.0,CodeLlama-7b-Instruct,https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf,
19
- 🟢,CodeLlama-7b,7.0,22.31,33.1,16384,UNK,29.98,29.2,31.8,27.23,25.17,25.6,11.6,24.36,30.36,18.04,11.94,25.82,25.52,693.0,15853.0,CodeLlama-7b,https://huggingface.co/codellama/CodeLlama-7b-hf,
20
- 🔴,CodeShell-7B,7.0,22.31,33.9,8194,24,34.32,30.43,33.17,28.21,30.87,22.08,8.85,24.74,22.39,20.52,17.2,24.55,24.3,639.0,18511.0,CodeShell-7B,https://huggingface.co/WisdomShell/CodeShell-7B,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/16
21
- 🔶,OctoCoder-15B,15.0,21.15,44.4,8192,86,45.3,26.03,32.8,29.32,26.76,24.5,13.35,24.01,22.56,14.39,10.61,24.26,18.24,1520.0,32278.0,OctoCoder-15B,https://huggingface.co/bigcode/octocoder,
22
  🟢,Falcon-180B,180.0,20.9,,2048,,35.37,28.48,31.68,28.57,,24.53,14.1,24.08,26.71,,10.56,25.0,15.82,,,Falcon-180B,https://huggingface.co/tiiuae/falcon-180B,
23
- 🟢,CodeLlama-7b-Python,7.0,20.62,33.1,16384,UNK,40.48,29.15,36.34,30.34,1.08,28.53,8.94,23.5,26.15,18.25,9.04,26.96,26.75,693.0,15853.0,CodeLlama-7b-Python,https://huggingface.co/codellama/CodeLlama-7b-Python-hf,
24
- 🟢,StarCoder-15B,15.0,20.58,43.9,8192,86,33.57,30.22,30.79,31.55,26.08,23.02,13.57,22.74,23.89,15.5,0.07,21.84,22.74,1490.0,33461.0,StarCoder-15B,https://huggingface.co/bigcode/starcoder,
25
- 🟢,StarCoderBase-15B,15.0,20.15,43.8,8192,86,30.35,28.53,31.7,30.56,26.75,21.09,10.01,22.4,26.61,10.18,11.77,24.46,16.74,1460.0,32366.0,StarCoderBase-15B,https://huggingface.co/bigcode/starcoderbase,
26
- 🟢,CodeGeex2-6B,6.0,17.42,32.7,8192,100,33.49,23.46,29.9,28.45,25.27,20.93,8.44,21.23,15.94,14.58,11.75,20.45,22.06,982.0,14110.0,CodeGeex2-6B,https://huggingface.co/THUDM/codegeex2-6b,
27
- 🟢,StarCoderBase-7B,7.0,16.85,46.9,8192,86,28.37,24.44,27.35,23.3,22.12,21.77,8.1,20.17,23.35,14.51,11.08,22.6,15.1,1700.0,16512.0,StarCoderBase-7B,https://huggingface.co/bigcode/starcoderbase-7b,
28
- 🔶,OctoGeeX-7B,7.0,16.65,32.7,8192,100,42.28,19.33,28.5,23.93,25.85,22.94,9.77,20.79,16.19,13.66,12.02,17.94,17.03,982.0,14110.0,OctoGeeX-7B,https://huggingface.co/bigcode/octogeex,
29
- 🔶,WizardCoder-3B-V1.0,3.0,15.73,50.0,8192,86,32.92,24.34,26.16,24.94,24.83,19.6,7.91,20.15,21.75,13.64,9.44,20.56,15.7,1770.0,8414.0,WizardCoder-3B-V1.0,https://huggingface.co/WizardLM/WizardCoder-3B-V1.0,
30
- 🟢,CodeGen25-7B-multi,7.0,15.35,32.6,2048,86,28.7,26.01,26.27,25.75,21.98,19.11,8.84,20.04,23.44,11.59,10.37,21.84,16.62,680.0,15336.0,CodeGen25-7B-multi,https://huggingface.co/Salesforce/codegen25-7b-multi,
31
- 🔶,Refact-1.6B,1.6,14.85,50.0,4096,19,31.1,22.78,22.36,21.12,22.36,13.84,10.26,17.86,15.53,13.04,4.97,18.59,18.35,2340.0,5376.0,Refact-1.6B,https://huggingface.co/smallcloudai/Refact-1_6B-fim,
32
  🔴,DeepSeek-Coder-1b-base,1.0,14.42,,16384,UNK,32.13,27.16,28.46,27.96,22.75,15.17,9.91,19.46,19.44,11.4,9.58,18.13,11.39,,,DeepSeek-Coder-1b-base,https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/33
33
- 🟢,StarCoderBase-3B,3.0,11.65,50.0,8192,86,21.5,19.25,21.32,19.43,18.55,16.1,4.97,15.29,18.04,10.1,7.87,16.32,9.98,1770.0,8414.0,StarCoderBase-3B,https://huggingface.co/bigcode/starcoderbase-3b,
34
- 🔶,WizardCoder-1B-V1.0,1.1,10.35,71.4,8192,86,23.17,19.68,19.13,15.94,14.71,13.85,4.64,13.89,15.52,10.01,6.51,13.91,9.59,2360.0,4586.0,WizardCoder-1B-V1.0,https://huggingface.co/WizardLM/WizardCoder-1B-V1.0,
35
- 🟢,Replit-2.7B,2.7,8.54,42.2,2048,20,20.12,21.39,20.18,20.37,16.14,1.24,6.41,11.62,2.11,7.2,3.22,15.19,5.88,577.0,7176.0,Replit-2.7B,https://huggingface.co/replit/replit-code-v1-3b,
36
- 🟢,CodeGen25-7B-mono,7.0,8.15,34.1,2048,86,33.08,19.75,23.22,18.62,16.75,4.65,4.32,12.1,6.75,4.41,4.07,7.83,1.71,687.0,15336.0,CodeGen25-7B-mono,https://huggingface.co/Salesforce/codegen25-7b-mono,
37
- 🟢,StarCoderBase-1.1B,1.1,8.12,71.4,8192,86,15.17,14.2,13.38,11.68,9.94,11.31,4.65,9.81,12.52,5.73,5.03,10.24,3.92,2360.0,4586.0,StarCoderBase-1.1B,https://huggingface.co/bigcode/starcoderbase-1b,
38
- 🟢,CodeGen-16B-Multi,16.0,7.08,17.2,2048,6,19.26,22.2,19.15,21.0,8.37,0.0,7.68,9.89,8.5,6.45,0.66,4.21,1.25,0.0,32890.0,CodeGen-16B-Multi,https://huggingface.co/Salesforce/codegen-16B-multi,
39
  🟢,Phi-1,1.0,6.25,,2048,1,51.22,10.76,19.25,14.29,12.42,0.63,7.05,12.15,6.21,6.21,3.11,4.49,10.13,,4941.0,Phi-1,https://huggingface.co/microsoft/phi-1,
40
- 🟢,StableCode-3B,3.0,6.04,30.2,16384,7,20.2,19.54,18.98,20.77,3.95,0.0,4.77,8.1,5.14,0.8,0.008,2.03,0.98,718.0,15730.0,StableCode-3B,https://huggingface.co/stabilityai/stablecode-completion-alpha-3b,
41
- 🟢,DeciCoder-1B,1.0,5.81,54.6,2048,3,19.32,15.3,17.85,6.87,2.01,0.0,6.08,5.86,0.0,0.1,0.47,1.72,0.63,2490.0,4436.0,DeciCoder-1B,https://huggingface.co/Deci/DeciCoder-1b,
42
- 🟢,SantaCoder-1.1B,1.1,4.58,50.8,2048,3,18.12,15.0,15.47,6.2,1.5,0.0,0.0,4.92,0.1,0.0,0.0,2.0,0.7,2270.0,4602.0,SantaCoder-1.1B,https://huggingface.co/bigcode/santacoder,
 
1
+ T,Model,Size (B),Win Rate,Throughput (tokens/s),Seq_length,#Languages,humaneval-python,java,javascript,cpp,php,julia,d,Average score,lua,r,racket,rust,swift,Throughput (tokens/s) bs=50,Peak Memory (MB),models_query,Links,Submission PR
2
+ 🔴,CodeFuse-DeepSeek-33b,33.0,40.83,17.5,16384,86,76.83,60.76,66.46,65.22,57.76,38.36,24.36,51.69,52.8,40.37,34.16,53.85,49.37,,75833.0,CodeFuse-DeepSeek-33b,https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/51
3
+ 🔴,DeepSeek-Coder-33b-instruct,33.0,39.83,25.2,16384,86,80.02,52.03,65.13,62.36,52.5,42.92,17.85,49.99,50.92,39.43,31.69,55.56,49.42,,76800.0,DeepSeek-Coder-33b-instruct,https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/42
4
+ 🔴,DeepSeek-Coder-7b-instruct,6.7,38.92,51.0,16384,86,80.22,53.34,65.8,59.66,59.4,38.84,21.59,48.17,47.78,38.56,20.87,47.73,44.22,,22922.0,DeepSeek-Coder-7b-instruct,https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/43
5
+ 🔶,Phind-CodeLlama-34B-v2,34.0,37.35,15.1,16384,UNK,71.95,54.06,65.34,59.59,56.26,45.12,14.12,48.7,44.27,37.7,28.7,57.67,49.63,0.0,69957.0,Phind-CodeLlama-34B-v2,https://huggingface.co/phind/Phind-CodeLlama-34B-v2,
6
+ 🔶,Phind-CodeLlama-34B-v1,34.0,36.42,15.1,16384,UNK,65.85,49.47,64.45,57.81,55.53,43.23,15.5,46.9,42.05,36.71,24.89,54.1,53.27,0.0,69957.0,Phind-CodeLlama-34B-v1,https://huggingface.co/phind/Phind-CodeLlama-34B-v1,
7
+ 🔶,Phind-CodeLlama-34B-Python-v1,34.0,35.42,15.1,16384,UNK,70.22,48.72,66.24,55.34,52.05,44.23,13.78,45.25,39.44,37.76,18.88,49.22,47.11,0.0,69957.0,Phind-CodeLlama-34B-Python-v1,https://huggingface.co/phind/Phind-CodeLlama-34B-Python-v1,
8
  🔴,DeepSeek-Coder-33b-base,33.0,35.0,25.2,16384,86,52.45,43.77,51.28,51.22,41.76,32.83,17.41,38.07,36.51,26.76,23.37,43.78,35.75,,76800.0,DeepSeek-Coder-33b-base,https://huggingface.co/deepseek-ai/deepseek-coder-33b-base,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/31
9
+ 🔶,WizardCoder-Python-34B-V1.0,34.0,34.12,15.1,16384,UNK,70.73,44.94,55.28,47.2,47.2,41.51,15.38,41.95,32.3,39.75,18.63,46.15,44.3,0.0,69957.0,WizardCoder-Python-34B-V1.0,https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0,
10
  🔴,DeepSeek-Coder-7b-base,6.7,31.75,51.0,16384,86,45.83,37.72,45.9,45.53,36.92,28.74,19.74,33.54,33.89,28.99,18.73,34.67,25.8,,22922.0,DeepSeek-Coder-7b-base,https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/32
11
+ 🔶,CodeLlama-34b-Instruct,34.0,31.04,15.1,16384,UNK,50.79,41.53,45.85,41.53,36.98,32.65,13.63,35.09,38.87,24.25,18.09,39.26,37.63,0.0,69957.0,CodeLlama-34b-Instruct,https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf,
12
+ 🔶,WizardCoder-Python-13B-V1.0,13.0,30.73,25.3,16384,UNK,62.19,41.77,48.45,42.86,42.24,38.99,11.54,35.94,32.92,27.33,16.15,34.62,32.28,0.0,28568.0,WizardCoder-Python-13B-V1.0,https://huggingface.co/WizardLM/WizardCoder-Python-13B-V1.0,
13
+ 🟢,CodeLlama-34b,34.0,30.42,15.1,16384,UNK,45.11,40.19,41.66,41.42,40.43,31.4,15.27,33.89,37.49,22.71,16.94,38.73,35.28,0.0,69957.0,CodeLlama-34b,https://huggingface.co/codellama/CodeLlama-34b-hf,
14
+ 🟢,CodeLlama-34b-Python,34.0,29.73,15.1,16384,UNK,53.29,39.46,44.72,39.09,39.78,31.37,17.29,33.87,31.9,22.35,13.19,39.67,34.3,0.0,69957.0,CodeLlama-34b-Python,https://huggingface.co/codellama/CodeLlama-34b-Python-hf,
15
+ 🔶,WizardCoder-15B-V1.0,15.0,29.0,43.7,8192,86,58.12,35.77,41.91,38.95,39.34,33.98,12.14,32.07,27.85,22.53,13.39,33.74,27.06,1470.0,32414.0,WizardCoder-15B-V1.0,https://huggingface.co/WizardLM/WizardCoder-15B-V1.0,
16
+ 🔶,CodeLlama-13b-Instruct,13.0,27.96,25.3,16384,UNK,50.6,33.99,40.92,36.36,32.07,32.23,16.29,31.29,31.6,20.14,16.66,32.82,31.75,0.0,28568.0,CodeLlama-13b-Instruct,https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf,
17
+ 🟢,CodeLlama-13b,13.0,26.27,25.3,16384,UNK,35.07,32.23,38.26,35.81,32.57,28.01,15.78,28.35,31.26,18.32,13.63,29.72,29.54,0.0,28568.0,CodeLlama-13b,https://huggingface.co/codellama/CodeLlama-13b-hf,
18
+ 🟢,CodeLlama-13b-Python,13.0,24.81,25.3,16384,UNK,42.89,33.56,40.66,36.21,34.55,30.4,9.82,28.67,29.9,18.35,12.51,29.32,25.85,0.0,28568.0,CodeLlama-13b-Python,https://huggingface.co/codellama/CodeLlama-13b-Python-hf,
19
+ 🔶,CodeLlama-7b-Instruct,7.0,23.77,33.1,16384,UNK,45.65,28.77,33.11,29.03,28.55,27.58,11.81,26.45,30.47,19.7,11.81,24.27,26.66,693.0,15853.0,CodeLlama-7b-Instruct,https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf,
20
+ 🔴,CodeShell-7B,7.0,22.38,33.9,8194,24,34.32,30.43,33.17,28.21,30.87,22.08,8.85,24.74,22.39,20.52,17.2,24.55,24.3,639.0,18511.0,CodeShell-7B,https://huggingface.co/WisdomShell/CodeShell-7B,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/16
21
+ 🟢,CodeLlama-7b,7.0,22.38,33.1,16384,UNK,29.98,29.2,31.8,27.23,25.17,25.6,11.6,24.36,30.36,18.04,11.94,25.82,25.52,693.0,15853.0,CodeLlama-7b,https://huggingface.co/codellama/CodeLlama-7b-hf,
22
+ 🔶,OctoCoder-15B,15.0,21.23,44.4,8192,86,45.3,26.03,32.8,29.32,26.76,24.5,13.35,24.01,22.56,14.39,10.61,24.26,18.24,1520.0,32278.0,OctoCoder-15B,https://huggingface.co/bigcode/octocoder,
23
  🟢,Falcon-180B,180.0,20.9,,2048,,35.37,28.48,31.68,28.57,,24.53,14.1,24.08,26.71,,10.56,25.0,15.82,,,Falcon-180B,https://huggingface.co/tiiuae/falcon-180B,
24
+ 🟢,CodeLlama-7b-Python,7.0,20.69,33.1,16384,UNK,40.48,29.15,36.34,30.34,1.08,28.53,8.94,23.5,26.15,18.25,9.04,26.96,26.75,693.0,15853.0,CodeLlama-7b-Python,https://huggingface.co/codellama/CodeLlama-7b-Python-hf,
25
+ 🟢,StarCoder-15B,15.0,20.65,43.9,8192,86,33.57,30.22,30.79,31.55,26.08,23.02,13.57,22.74,23.89,15.5,0.07,21.84,22.74,1490.0,33461.0,StarCoder-15B,https://huggingface.co/bigcode/starcoder,
26
+ 🟢,StarCoderBase-15B,15.0,20.23,43.8,8192,86,30.35,28.53,31.7,30.56,26.75,21.09,10.01,22.4,26.61,10.18,11.77,24.46,16.74,1460.0,32366.0,StarCoderBase-15B,https://huggingface.co/bigcode/starcoderbase,
27
+ 🟢,CodeGeex2-6B,6.0,17.5,32.7,8192,100,33.49,23.46,29.9,28.45,25.27,20.93,8.44,21.23,15.94,14.58,11.75,20.45,22.06,982.0,14110.0,CodeGeex2-6B,https://huggingface.co/THUDM/codegeex2-6b,
28
+ 🟢,StarCoderBase-7B,7.0,16.92,46.9,8192,86,28.37,24.44,27.35,23.3,22.12,21.77,8.1,20.17,23.35,14.51,11.08,22.6,15.1,1700.0,16512.0,StarCoderBase-7B,https://huggingface.co/bigcode/starcoderbase-7b,
29
+ 🔶,OctoGeeX-7B,7.0,16.73,32.7,8192,100,42.28,19.33,28.5,23.93,25.85,22.94,9.77,20.79,16.19,13.66,12.02,17.94,17.03,982.0,14110.0,OctoGeeX-7B,https://huggingface.co/bigcode/octogeex,
30
+ 🔶,WizardCoder-3B-V1.0,3.0,15.81,50.0,8192,86,32.92,24.34,26.16,24.94,24.83,19.6,7.91,20.15,21.75,13.64,9.44,20.56,15.7,1770.0,8414.0,WizardCoder-3B-V1.0,https://huggingface.co/WizardLM/WizardCoder-3B-V1.0,
31
+ 🟢,CodeGen25-7B-multi,7.0,15.42,32.6,2048,86,28.7,26.01,26.27,25.75,21.98,19.11,8.84,20.04,23.44,11.59,10.37,21.84,16.62,680.0,15336.0,CodeGen25-7B-multi,https://huggingface.co/Salesforce/codegen25-7b-multi,
32
+ 🔶,Refact-1.6B,1.6,14.92,50.0,4096,19,31.1,22.78,22.36,21.12,22.36,13.84,10.26,17.86,15.53,13.04,4.97,18.59,18.35,2340.0,5376.0,Refact-1.6B,https://huggingface.co/smallcloudai/Refact-1_6B-fim,
33
  🔴,DeepSeek-Coder-1b-base,1.0,14.42,,16384,UNK,32.13,27.16,28.46,27.96,22.75,15.17,9.91,19.46,19.44,11.4,9.58,18.13,11.39,,,DeepSeek-Coder-1b-base,https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base,https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/33
34
+ 🟢,StarCoderBase-3B,3.0,11.73,50.0,8192,86,21.5,19.25,21.32,19.43,18.55,16.1,4.97,15.29,18.04,10.1,7.87,16.32,9.98,1770.0,8414.0,StarCoderBase-3B,https://huggingface.co/bigcode/starcoderbase-3b,
35
+ 🔶,WizardCoder-1B-V1.0,1.1,10.42,71.4,8192,86,23.17,19.68,19.13,15.94,14.71,13.85,4.64,13.89,15.52,10.01,6.51,13.91,9.59,2360.0,4586.0,WizardCoder-1B-V1.0,https://huggingface.co/WizardLM/WizardCoder-1B-V1.0,
36
+ 🟢,Replit-2.7B,2.7,8.62,42.2,2048,20,20.12,21.39,20.18,20.37,16.14,1.24,6.41,11.62,2.11,7.2,3.22,15.19,5.88,577.0,7176.0,Replit-2.7B,https://huggingface.co/replit/replit-code-v1-3b,
37
+ 🟢,CodeGen25-7B-mono,7.0,8.23,34.1,2048,86,33.08,19.75,23.22,18.62,16.75,4.65,4.32,12.1,6.75,4.41,4.07,7.83,1.71,687.0,15336.0,CodeGen25-7B-mono,https://huggingface.co/Salesforce/codegen25-7b-mono,
38
+ 🟢,StarCoderBase-1.1B,1.1,8.19,71.4,8192,86,15.17,14.2,13.38,11.68,9.94,11.31,4.65,9.81,12.52,5.73,5.03,10.24,3.92,2360.0,4586.0,StarCoderBase-1.1B,https://huggingface.co/bigcode/starcoderbase-1b,
39
+ 🟢,CodeGen-16B-Multi,16.0,7.15,17.2,2048,6,19.26,22.2,19.15,21.0,8.37,0.0,7.68,9.89,8.5,6.45,0.66,4.21,1.25,0.0,32890.0,CodeGen-16B-Multi,https://huggingface.co/Salesforce/codegen-16B-multi,
40
  🟢,Phi-1,1.0,6.25,,2048,1,51.22,10.76,19.25,14.29,12.42,0.63,7.05,12.15,6.21,6.21,3.11,4.49,10.13,,4941.0,Phi-1,https://huggingface.co/microsoft/phi-1,
41
+ 🟢,StableCode-3B,3.0,6.12,30.2,16384,7,20.2,19.54,18.98,20.77,3.95,0.0,4.77,8.1,5.14,0.8,0.008,2.03,0.98,718.0,15730.0,StableCode-3B,https://huggingface.co/stabilityai/stablecode-completion-alpha-3b,
42
+ 🟢,DeciCoder-1B,1.0,5.88,54.6,2048,3,19.32,15.3,17.85,6.87,2.01,0.0,6.08,5.86,0.0,0.1,0.47,1.72,0.63,2490.0,4436.0,DeciCoder-1B,https://huggingface.co/Deci/DeciCoder-1b,
43
+ 🟢,SantaCoder-1.1B,1.1,4.65,50.8,2048,3,18.12,15.0,15.47,6.2,1.5,0.0,0.0,4.92,0.1,0.0,0.0,2.0,0.7,2270.0,4602.0,SantaCoder-1.1B,https://huggingface.co/bigcode/santacoder,
data/raw_scores.csv CHANGED
@@ -1,4 +1,4 @@
1
- Models,Size (B),Throughput (tokens/s),Seq_length,#Languages,humaneval-python,java,javascript,cpp,php,julia,d,lua,r,racket,rust,swift,Throughput (tokens/s) bs=50,Peak Memory (MB)
2
  CodeGen-16B-Multi,16.0,17.2,2048,6,19.26,22.2,19.15,21.0,8.37,0.0,7.68,8.5,6.45,0.66,4.21,1.25,0.0,32890
3
  StarCoder-15B,15.0,43.9,8192,86,33.57,30.22,30.79,31.55,26.08,23.02,13.57,23.89,15.5,0.07,21.84,22.74,1490.0,33461
4
  StarCoderBase-15B,15.0,43.8,8192,86,30.35,28.53,31.7,30.56,26.75,21.09,10.01,26.61,10.18,11.77,24.46,16.74,1460.0,32366
@@ -40,3 +40,4 @@ DeepSeek-Coder-7b-base,6.7,51.0,16384,86,45.83,37.72,45.9,45.53,36.92,28.74,19.7
40
  DeepSeek-Coder-33b-base,33,25.2,16384,86,52.45,43.77,51.28,51.22,41.76,32.83,17.41,36.51,26.76,23.37,43.78,35.75,,76800
41
  DeepSeek-Coder-7b-instruct,6.7,51.0,16384,86,80.22,53.34,65.8,59.66,59.4,38.84,21.59,47.78,38.56,20.87,47.73,44.22,,22922
42
  DeepSeek-Coder-33b-instruct,33,25.2,16384,86,80.02,52.03,65.13,62.36,52.5,42.92,17.85,50.92,39.43,31.69,55.56,49.42,,76800
 
 
1
+ Model,Size (B),Throughput (tokens/s),Seq_length,#Languages,humaneval-python,java,javascript,cpp,php,julia,d,lua,r,racket,rust,swift,Throughput (tokens/s) bs=50,Peak Memory (MB)
2
  CodeGen-16B-Multi,16.0,17.2,2048,6,19.26,22.2,19.15,21.0,8.37,0.0,7.68,8.5,6.45,0.66,4.21,1.25,0.0,32890
3
  StarCoder-15B,15.0,43.9,8192,86,33.57,30.22,30.79,31.55,26.08,23.02,13.57,23.89,15.5,0.07,21.84,22.74,1490.0,33461
4
  StarCoderBase-15B,15.0,43.8,8192,86,30.35,28.53,31.7,30.56,26.75,21.09,10.01,26.61,10.18,11.77,24.46,16.74,1460.0,32366
 
40
  DeepSeek-Coder-33b-base,33,25.2,16384,86,52.45,43.77,51.28,51.22,41.76,32.83,17.41,36.51,26.76,23.37,43.78,35.75,,76800
41
  DeepSeek-Coder-7b-instruct,6.7,51.0,16384,86,80.22,53.34,65.8,59.66,59.4,38.84,21.59,47.78,38.56,20.87,47.73,44.22,,22922
42
  DeepSeek-Coder-33b-instruct,33,25.2,16384,86,80.02,52.03,65.13,62.36,52.5,42.92,17.85,50.92,39.43,31.69,55.56,49.42,,76800
43
+ CodeFuse-DeepSeek-33b,33,17.5,16384,86,76.83,60.76,66.46,65.22,57.76,38.36,24.36,52.8,40.37,34.16,53.85,49.37,,75833
src/add_json_csv.py CHANGED
@@ -16,9 +16,10 @@ mapping = {
16
  "multiple-d": "d",
17
  "multiple-swift": "swift"
18
  }
19
- BASE_PATH = "/fsx/loubna/pr/bigcode-models-leaderboard"
20
  # JSON Data (replace this with your actual loaded JSON)
21
- json_path = f"{BASE_PATH}/community_results/deepseek-ai_deepseek-coder-33b-instruct_zqh11/deepseek-ai_deepseek-coder-33b-instruct_zqh11.json"
 
22
  with open(json_path, "r") as f:
23
  json_data = json.load(f)
24
  parsed_data = json_data['results']
 
16
  "multiple-d": "d",
17
  "multiple-swift": "swift"
18
  }
19
+ BASE_PATH = "/fsx/loubna/projects/bigcode-models-leaderboard"
20
  # JSON Data (replace this with your actual loaded JSON)
21
+
22
+ json_path = f"{BASE_PATH}/community_results/codefuse-ai_codefuse-deepseek-33b_codefuse-admin/codefuse-ai_CodeFuse-DeepSeek-33b_codefuse-admin.json"
23
  with open(json_path, "r") as f:
24
  json_data = json.load(f)
25
  parsed_data = json_data['results']
src/build.py CHANGED
@@ -23,7 +23,7 @@ def add_model_readme(df):
23
  df = pd.read_csv("data/raw_scores.csv")
24
  COLS = df.columns.to_list()
25
  # add column models_query with same values a smodels at the end of columns
26
- df.insert(len(COLS), "models_query", df["Models"])
27
  print(f"all cols {df.columns.to_list()}")
28
  # average score
29
  mean_columns = df.iloc[:,5:-3]
@@ -39,7 +39,7 @@ for col in df.columns[6:-2]:
39
  df[col + " rank"] = len(df) - (df[col + " rank"] - 1)
40
  df["Win Rate"] = df.iloc[:, old_size:].mean(axis=1).round(2)
41
  df = df.drop(df.columns[old_size:-1], axis=1)
42
- df = df[["Models", "Size (B)", "Win Rate"] + df.columns[2:-1].tolist()]
43
 
44
  # sort with regard to column win rate
45
  df = df.sort_values(by=["Win Rate"], ascending=False)
@@ -79,27 +79,29 @@ links = {
79
  "DeepSeek-Coder-33b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-base",
80
  "DeepSeek-Coder-7b-instruct": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
81
  "DeepSeek-Coder-33b-instruct": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
 
82
  }
83
 
84
  codellamas = ['CodeLlama-7b', 'CodeLlama-7b-Python', 'CodeLlama-7b-Instruct', 'CodeLlama-13b', 'CodeLlama-13b-Python', 'CodeLlama-13b-Instruct', 'CodeLlama-34b', 'CodeLlama-34b-Python', 'CodeLlama-34b-Instruct']
85
  for codellama in codellamas:
86
  links[codellama] = f"https://huggingface.co/codellama/{codellama}-hf"
87
 
88
- df["Links"] = df["Models"].map(links)
89
 
90
  df.insert(0, "T", "🟢")
91
  patterns = ["WizardCoder", "Octo", "Instruct", "Phind", "Refact"]
92
- df.loc[df["Models"].str.contains('|'.join(patterns)), "T"] = "🔶"
93
- df.loc[df["Models"].str.contains('|'.join(patterns)), "T"] = "🔶"
94
- df.loc[df["Models"].str.contains('|'.join(["CodeShell", "DeepSeek"])), "T"] = "🔴"
95
  # add clumn submission_pr with empty fiels except for CodeShell with link AA
96
  df["Submission PR"] = ""
97
- df.loc[df["Models"].str.contains('|'.join(["CodeShell"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/16"
98
- df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-1b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/33"
99
- df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-7b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/32"
100
- df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-33b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/31"
101
- df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-7b-instruct"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/43"
102
- df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-33b-instruct"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/42"
 
103
 
104
 
105
  # print first 5 rows and 10 cols
 
23
  df = pd.read_csv("data/raw_scores.csv")
24
  COLS = df.columns.to_list()
25
  # add column models_query with same values a smodels at the end of columns
26
+ df.insert(len(COLS), "models_query", df["Model"])
27
  print(f"all cols {df.columns.to_list()}")
28
  # average score
29
  mean_columns = df.iloc[:,5:-3]
 
39
  df[col + " rank"] = len(df) - (df[col + " rank"] - 1)
40
  df["Win Rate"] = df.iloc[:, old_size:].mean(axis=1).round(2)
41
  df = df.drop(df.columns[old_size:-1], axis=1)
42
+ df = df[["Model", "Size (B)", "Win Rate"] + df.columns[2:-1].tolist()]
43
 
44
  # sort with regard to column win rate
45
  df = df.sort_values(by=["Win Rate"], ascending=False)
 
79
  "DeepSeek-Coder-33b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-base",
80
  "DeepSeek-Coder-7b-instruct": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
81
  "DeepSeek-Coder-33b-instruct": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
82
+ "CodeFuse-DeepSeek-33b": "https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B",
83
  }
84
 
85
  codellamas = ['CodeLlama-7b', 'CodeLlama-7b-Python', 'CodeLlama-7b-Instruct', 'CodeLlama-13b', 'CodeLlama-13b-Python', 'CodeLlama-13b-Instruct', 'CodeLlama-34b', 'CodeLlama-34b-Python', 'CodeLlama-34b-Instruct']
86
  for codellama in codellamas:
87
  links[codellama] = f"https://huggingface.co/codellama/{codellama}-hf"
88
 
89
+ df["Links"] = df["Model"].map(links)
90
 
91
  df.insert(0, "T", "🟢")
92
  patterns = ["WizardCoder", "Octo", "Instruct", "Phind", "Refact"]
93
+ df.loc[df["Model"].str.contains('|'.join(patterns)), "T"] = "🔶"
94
+ df.loc[df["Model"].str.contains('|'.join(patterns)), "T"] = "🔶"
95
+ df.loc[df["Model"].str.contains('|'.join(["CodeShell", "DeepSeek", "CodeFuse"])), "T"] = "🔴"
96
  # add clumn submission_pr with empty fiels except for CodeShell with link AA
97
  df["Submission PR"] = ""
98
+ df.loc[df["Model"].str.contains('|'.join(["CodeShell"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/16"
99
+ df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-1b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/33"
100
+ df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-7b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/32"
101
+ df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-33b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/31"
102
+ df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-7b-instruct"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/43"
103
+ df.loc[df["Model"].str.contains('|'.join(["DeepSeek-Coder-33b-instruct"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/42"
104
+ df.loc[df["Model"].str.contains('|'.join(["CodeFuse"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/51"
105
 
106
 
107
  # print first 5 rows and 10 cols
src/utils.py CHANGED
@@ -22,7 +22,7 @@ def fields(raw_class):
22
  @dataclass(frozen=True)
23
  class AutoEvalColumn: # Auto evals column
24
  model_type_symbol = ColumnContent("T", "str", True)
25
- model = ColumnContent("Models", "markdown", True)
26
  win_rate = ColumnContent("Win Rate", "number", True)
27
  average = ColumnContent("Average score", "number", False)
28
  humaneval_python = ColumnContent("humaneval-python", "number", True)
@@ -43,8 +43,8 @@ class AutoEvalColumn: # Auto evals column
43
  peak_memory = ColumnContent("Peak Memory (MB)", "number", False)
44
  seq_length = ColumnContent("Seq_length", "number", False)
45
  link = ColumnContent("Links", "str", False)
46
- dummy = ColumnContent("Models", "str", True)
47
- pr = ColumnContent("Submission PR", "str", False)
48
 
49
 
50
  def model_hyperlink(link, model_name):
@@ -52,8 +52,8 @@ def model_hyperlink(link, model_name):
52
 
53
 
54
  def make_clickable_names(df):
55
- df["Models"] = df.apply(
56
- lambda row: model_hyperlink(row["Links"], row["Models"]), axis=1
57
  )
58
  return df
59
 
@@ -65,24 +65,25 @@ def plot_throughput(df, bs=1):
65
 
66
  df["symbol"] = 2 # Triangle
67
  df["color"] = ""
68
- df.loc[df["Models"].str.contains("StarCoder|SantaCoder"), "color"] = "orange"
69
- df.loc[df["Models"].str.contains("CodeGen"), "color"] = "pink"
70
- df.loc[df["Models"].str.contains("Replit"), "color"] = "purple"
71
- df.loc[df["Models"].str.contains("WizardCoder"), "color"] = "peru"
72
- df.loc[df["Models"].str.contains("CodeGeex"), "color"] = "cornflowerblue"
73
- df.loc[df["Models"].str.contains("StableCode"), "color"] = "cadetblue"
74
- df.loc[df["Models"].str.contains("OctoCoder"), "color"] = "lime"
75
- df.loc[df["Models"].str.contains("OctoGeeX"), "color"] = "wheat"
76
- df.loc[df["Models"].str.contains("Deci"), "color"] = "salmon"
77
- df.loc[df["Models"].str.contains("CodeLlama"), "color"] = "palevioletred"
78
- df.loc[df["Models"].str.contains("CodeGuru"), "color"] = "burlywood"
79
- df.loc[df["Models"].str.contains("Phind"), "color"] = "crimson"
80
- df.loc[df["Models"].str.contains("Falcon"), "color"] = "dimgray"
81
- df.loc[df["Models"].str.contains("Refact"), "color"] = "yellow"
82
- df.loc[df["Models"].str.contains("Phi"), "color"] = "gray"
83
- df.loc[df["Models"].str.contains("CodeShell"), "color"] = "lightskyblue"
84
- df.loc[df["Models"].str.contains("CodeShell"), "color"] = "lightskyblue"
85
- df.loc[df["Models"].str.contains("DeepSeek"), "color"] = "lightgreen"
 
86
  fig = go.Figure()
87
 
88
  for i in df.index:
@@ -96,7 +97,7 @@ def plot_throughput(df, bs=1):
96
  color=df.loc[i, "color"],
97
  symbol=df.loc[i, "symbol"],
98
  ),
99
- name=df.loc[i, "Models"],
100
  hovertemplate="<b>%{text}</b><br><br>"
101
  + f"{throughput_column}: %{{x}}<br>"
102
  + "Average Score: %{y}<br>"
@@ -105,7 +106,7 @@ def plot_throughput(df, bs=1):
105
  + "<br>"
106
  + "Human Eval (Python): "
107
  + str(df.loc[i, "humaneval-python"]),
108
- text=[df.loc[i, "Models"]],
109
  showlegend=True,
110
  )
111
  )
 
22
  @dataclass(frozen=True)
23
  class AutoEvalColumn: # Auto evals column
24
  model_type_symbol = ColumnContent("T", "str", True)
25
+ model = ColumnContent("Model", "markdown", True)
26
  win_rate = ColumnContent("Win Rate", "number", True)
27
  average = ColumnContent("Average score", "number", False)
28
  humaneval_python = ColumnContent("humaneval-python", "number", True)
 
43
  peak_memory = ColumnContent("Peak Memory (MB)", "number", False)
44
  seq_length = ColumnContent("Seq_length", "number", False)
45
  link = ColumnContent("Links", "str", False)
46
+ dummy = ColumnContent("Model", "str", True)
47
+ pr = ColumnContent("Submission PR", "markdown", False)
48
 
49
 
50
  def model_hyperlink(link, model_name):
 
52
 
53
 
54
  def make_clickable_names(df):
55
+ df["Model"] = df.apply(
56
+ lambda row: model_hyperlink(row["Links"], row["Model"]), axis=1
57
  )
58
  return df
59
 
 
65
 
66
  df["symbol"] = 2 # Triangle
67
  df["color"] = ""
68
+ df.loc[df["Model"].str.contains("StarCoder|SantaCoder"), "color"] = "orange"
69
+ df.loc[df["Model"].str.contains("CodeGen"), "color"] = "pink"
70
+ df.loc[df["Model"].str.contains("Replit"), "color"] = "purple"
71
+ df.loc[df["Model"].str.contains("WizardCoder"), "color"] = "peru"
72
+ df.loc[df["Model"].str.contains("CodeGeex"), "color"] = "cornflowerblue"
73
+ df.loc[df["Model"].str.contains("StableCode"), "color"] = "cadetblue"
74
+ df.loc[df["Model"].str.contains("OctoCoder"), "color"] = "lime"
75
+ df.loc[df["Model"].str.contains("OctoGeeX"), "color"] = "wheat"
76
+ df.loc[df["Model"].str.contains("Deci"), "color"] = "salmon"
77
+ df.loc[df["Model"].str.contains("CodeLlama"), "color"] = "palevioletred"
78
+ df.loc[df["Model"].str.contains("CodeGuru"), "color"] = "burlywood"
79
+ df.loc[df["Model"].str.contains("Phind"), "color"] = "crimson"
80
+ df.loc[df["Model"].str.contains("Falcon"), "color"] = "dimgray"
81
+ df.loc[df["Model"].str.contains("Refact"), "color"] = "yellow"
82
+ df.loc[df["Model"].str.contains("Phi"), "color"] = "gray"
83
+ df.loc[df["Model"].str.contains("CodeShell"), "color"] = "lightskyblue"
84
+ df.loc[df["Model"].str.contains("CodeShell"), "color"] = "lightskyblue"
85
+ df.loc[df["Model"].str.contains("DeepSeek"), "color"] = "lightgreen"
86
+ df.loc[df["Model"].str.contains("CodeFuse"), "color"] = "olive"
87
  fig = go.Figure()
88
 
89
  for i in df.index:
 
97
  color=df.loc[i, "color"],
98
  symbol=df.loc[i, "symbol"],
99
  ),
100
+ name=df.loc[i, "Model"],
101
  hovertemplate="<b>%{text}</b><br><br>"
102
  + f"{throughput_column}: %{{x}}<br>"
103
  + "Average Score: %{y}<br>"
 
106
  + "<br>"
107
  + "Human Eval (Python): "
108
  + str(df.loc[i, "humaneval-python"]),
109
+ text=[df.loc[i, "Model"]],
110
  showlegend=True,
111
  )
112
  )