djstrong commited on
Commit
ab0f36d
1 Parent(s): a196a30

upgrade rtansfoemrs

Browse files
Files changed (28) hide show
  1. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23a0fd5602af5241fb2f8d005924180b/metadata.json +1 -0
  2. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{cc0c3d4788195b117302c7e49e5e6d2f → 23a0fd5602af5241fb2f8d005924180b}/output.pkl +2 -2
  3. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/metadata.json +1 -0
  4. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/output.pkl +3 -0
  5. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{74c03d080f7935bad0054064b2595518 → 2ec974c87eca0a06aea982a02ac46031}/output.pkl +2 -2
  6. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/3d66489be6398e5b5b21a6b0a08f15b3/output.pkl +3 -0
  7. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/metadata.json +1 -0
  8. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/output.pkl +3 -0
  9. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/74c03d080f7935bad0054064b2595518/metadata.json +0 -1
  10. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/7bae5d318de18eac6d87e1ec54218733/metadata.json +1 -0
  11. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{81c700664b9416c1564a63e2eeb7b859 → 7bae5d318de18eac6d87e1ec54218733}/output.pkl +2 -2
  12. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/metadata.json +1 -0
  13. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/output.pkl +3 -0
  14. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/97bf2401f2957f6381272e0e8b844a60/metadata.json +1 -0
  15. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{d04e5f9d53ccdec3f2df6f97b1e7e77c → 97bf2401f2957f6381272e0e8b844a60}/output.pkl +2 -2
  16. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/metadata.json +1 -0
  17. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/output.pkl +3 -0
  18. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/metadata.json +1 -0
  19. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/output.pkl +3 -0
  20. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/cc0c3d4788195b117302c7e49e5e6d2f/metadata.json +0 -1
  21. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d04e5f9d53ccdec3f2df6f97b1e7e77c/metadata.json +0 -1
  22. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/metadata.json +1 -0
  23. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/output.pkl +3 -0
  24. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e175e10ba74320a4086e347a0e1a6862/output.pkl +3 -0
  25. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/metadata.json +1 -0
  26. is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/output.pkl +3 -0
  27. requirements.txt +1 -1
  28. src/leaderboard/read_evals.py +14 -4
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23a0fd5602af5241fb2f8d005924180b/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.0002968311309814453, "input_args": {"model_name": "'speakleash/plgchriso/models/bielik_11B-v2_dpo/dpo5-001_e2'", "revision": "''", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799950.899789}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{cc0c3d4788195b117302c7e49e5e6d2f → 23a0fd5602af5241fb2f8d005924180b}/output.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:735bea8c0653447e3de3846c47a8d053ad3d295bb28f26a3a8b60f64f0dbd139
3
- size 352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1fb7b246f8007481abf14bc208069cd51769a48f64455abc1f83d41c9c93d9
3
+ size 213
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.1410212516784668, "input_args": {"model_name": "'piotr-ai/polanka-7b-v0.1'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799919.9540784}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a20e156a5e4fd6ca69b0093e4568506780fa865d012ffefedc4a608cc3c4d9
3
+ size 2050
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{74c03d080f7935bad0054064b2595518 → 2ec974c87eca0a06aea982a02ac46031}/output.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:220dd8434da1cc471c5ca72e3ea60ace7f2ccc1c7d36a66344cd9f1f78f28bce
3
- size 382
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4411e5d8c00ef326aabc5d5009e0e7cfff43b3dcfa542e783b32b2acd39203de
3
+ size 220
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/3d66489be6398e5b5b21a6b0a08f15b3/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:936397f16e56c7fe947168df4949eb52522087e41be00e0468b26d41be6bcf80
3
+ size 1941
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.2852437496185303, "input_args": {"model_name": "'google/gemma-2-2b-it'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799938.6140876}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:451a1a0a65280fcc7988e008e7602fbe8ce816d2ad9e06407bef92e696cf09d6
3
+ size 1951
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/74c03d080f7935bad0054064b2595518/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"duration": 0.0005774497985839844, "input_args": {"model_name": "'model=mistralai/Mistral-Large-Instruct-2407'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1722432813.2754002}
 
 
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/7bae5d318de18eac6d87e1ec54218733/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.0002970695495605469, "input_args": {"model_name": "'speakleash/plgchriso/models/bielik_11B-v2_dpo/dpo5-001-sft7a12k_e3'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799950.961776}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{81c700664b9416c1564a63e2eeb7b859 → 7bae5d318de18eac6d87e1ec54218733}/output.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:083238917fb67de1dadfef99c9b738a75508358938fae27ce2c82fe044052b0a
3
- size 394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40faeafe06c3c74a55e5bc8fef1b2b6a94b87214d5a10c16f00e88942e28a38
3
+ size 222
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 1.3236052989959717, "input_args": {"model_name": "'THUDM/glm-4-9b-chat'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799927.7191012}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:992a9ffe516d3a3b1218edb22f8cb8a0bcb13ff4b95485bd7223a2227b86b557
3
+ size 2562
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/97bf2401f2957f6381272e0e8b844a60/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.21419787406921387, "input_args": {"model_name": "'gpt-3.5-turbo-instruct'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799910.6515977}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{d04e5f9d53ccdec3f2df6f97b1e7e77c → 97bf2401f2957f6381272e0e8b844a60}/output.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e522a77821c652a20e95c402bc415be9b8314f44824d30c22be7f90d20c45a1
3
- size 352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ca0aa1866b90f59e18d92eb331a61a4388338ade36adecaeba8a4461bc0342
3
+ size 338
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.16406464576721191, "input_args": {"model_name": "'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799917.5419252}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42866bd579e2954212d3ad7b9dae26befb527144295d782075ef90b5650e9bb4
3
+ size 19962
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.2595674991607666, "input_args": {"model_name": "'gpt-4-turbo-2024-04-09'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799926.336528}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05fe4b62096374aa9f869cc12c8cbf796a8b4a5e62701c22b4eedd0f01fb2f70
3
+ size 338
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/cc0c3d4788195b117302c7e49e5e6d2f/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"duration": 0.0007023811340332031, "input_args": {"model_name": "'model=gpt-3.5-turbo-instruct'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1722432787.2373662}
 
 
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d04e5f9d53ccdec3f2df6f97b1e7e77c/metadata.json DELETED
@@ -1 +0,0 @@
1
- {"duration": 0.0002880096435546875, "input_args": {"model_name": "'model=gpt-4-turbo-2024-04-09'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1722432800.258018}
 
 
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.20917677879333496, "input_args": {"model_name": "'Nexusflow/Athene-70B'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799943.0636876}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607b128531bc5ccbd39fdac4330c4d3a622235bf77985dc8ca5a4f8e7784bf14
3
+ size 1777
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e175e10ba74320a4086e347a0e1a6862/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcad58286697cca342fbb5964762265c57f099076db96920553d22f2595e79d5
3
+ size 1791
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"duration": 0.00033974647521972656, "input_args": {"model_name": "'speakleash/plgchriso/models/bielik_11B-v2_dpo/dpo5-001_e2'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799951.1384425}
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/output.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1fb7b246f8007481abf14bc208069cd51769a48f64455abc1f83d41c9c93d9
3
+ size 213
requirements.txt CHANGED
@@ -11,6 +11,6 @@ pandas==2.0.0
11
  python-dateutil==2.8.2
12
  requests==2.28.2
13
  tqdm==4.65.0
14
- transformers[cpu]==4.35.2
15
  git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
16
  joblib
 
11
  python-dateutil==2.8.2
12
  requests==2.28.2
13
  tqdm==4.65.0
14
+ transformers[cpu]==4.44.0
15
  git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
16
  joblib
src/leaderboard/read_evals.py CHANGED
@@ -68,6 +68,7 @@ class EvalResult:
68
  org_and_model = org_and_model.replace("models/hf_v7_e2", "APT3-1B-Instruct-e2")
69
 
70
  org_and_model = re.sub(r"^pretrained=", "", org_and_model)
 
71
  org_and_model = org_and_model.replace(",trust_remote_code=True", "")
72
  org_and_model = org_and_model.replace(",parallelize=True", "")
73
  org_and_model = org_and_model.replace(",tokenizer_backend=huggingface", "")
@@ -76,10 +77,19 @@ class EvalResult:
76
  org_and_model = re.sub(",prefix_token_id=\d+", "", org_and_model)
77
  org_and_model = re.sub("/$", "", org_and_model)
78
 
79
- if org_and_model=='speakleash/mistral_7B-v2/spkl-only-e1_333887a5':
80
- org_and_model='speakleash/Bielik-7B-v0.1'
81
- elif org_and_model=='speakleash/mistral_7B-v2/spkl-only_sft_v2/e1_base/spkl-only_v10wa_7e6-e2_bbc67e89':
82
- org_and_model='speakleash/Bielik-7B-Instruct-v0.1'
 
 
 
 
 
 
 
 
 
83
 
84
  if chat_template:
85
  org_and_model += ",chat"
 
68
  org_and_model = org_and_model.replace("models/hf_v7_e2", "APT3-1B-Instruct-e2")
69
 
70
  org_and_model = re.sub(r"^pretrained=", "", org_and_model)
71
+ org_and_model = re.sub(r"^model=", "", org_and_model)
72
  org_and_model = org_and_model.replace(",trust_remote_code=True", "")
73
  org_and_model = org_and_model.replace(",parallelize=True", "")
74
  org_and_model = org_and_model.replace(",tokenizer_backend=huggingface", "")
 
77
  org_and_model = re.sub(",prefix_token_id=\d+", "", org_and_model)
78
  org_and_model = re.sub("/$", "", org_and_model)
79
 
80
+ model_mapping={
81
+ 'speakleash/mistral_7B-v2/spkl-only-e1_333887a5':'speakleash/Bielik-7B-v0.1',
82
+ 'speakleash/mistral_7B-v2/spkl-only_sft_v2/e1_base/spkl-only_v10wa_7e6-e2_bbc67e89':'speakleash/Bielik-7B-Instruct-v0.1',
83
+ 'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8,API': 'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8,API'
84
+ }
85
+ #map org_and_model using model_mapping
86
+ if org_and_model in model_mapping:
87
+ org_and_model=model_mapping[org_and_model]
88
+ # if org_and_model=='speakleash/mistral_7B-v2/spkl-only-e1_333887a5':
89
+ # org_and_model='speakleash/Bielik-7B-v0.1'
90
+ # elif org_and_model=='speakleash/mistral_7B-v2/spkl-only_sft_v2/e1_base/spkl-only_v10wa_7e6-e2_bbc67e89':
91
+ # org_and_model='speakleash/Bielik-7B-Instruct-v0.1'
92
+
93
 
94
  if chat_template:
95
  org_and_model += ",chat"