Spaces:
Sleeping
Sleeping
meg-huggingface
commited on
Commit
•
f712ac1
1
Parent(s):
29ae773
Removing limitgit add src/envs.py
Browse files- src/backend/run_eval_suite.py +5 -2
- src/envs.py +2 -1
src/backend/run_eval_suite.py
CHANGED
@@ -5,7 +5,7 @@ from datetime import datetime
|
|
5 |
|
6 |
from lm_eval import tasks, evaluator, utils
|
7 |
|
8 |
-
from src.envs import
|
9 |
from src.backend.manage_requests import EvalRequest
|
10 |
|
11 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
@@ -13,7 +13,10 @@ logging.getLogger("openai").setLevel(logging.WARNING)
|
|
13 |
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
|
14 |
if limit:
|
15 |
print(
|
16 |
-
"WARNING:
|
|
|
|
|
|
|
17 |
)
|
18 |
|
19 |
task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
|
|
5 |
|
6 |
from lm_eval import tasks, evaluator, utils
|
7 |
|
8 |
+
from src.envs import API
|
9 |
from src.backend.manage_requests import EvalRequest
|
10 |
|
11 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
|
13 |
def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
|
14 |
if limit:
|
15 |
print(
|
16 |
+
"!!!!!! WARNING: NOT A FULL EVALUATION !!!!! Just looking at %d."
|
17 |
+
"The `LIMIT` environment variable and/or `limit` in `run_evaluation` "
|
18 |
+
"SHOULD ONLY BE USED FOR TESTING. "
|
19 |
+
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT." % limit
|
20 |
)
|
21 |
|
22 |
task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
src/envs.py
CHANGED
@@ -13,7 +13,8 @@ TOKEN = os.environ.get("TOKEN")
|
|
13 |
# Change to your org name
|
14 |
OWNER = "Bias-Leaderboard"
|
15 |
DEVICE = "cuda:0" # "cpu" or "cuda:0" if you add compute
|
16 |
-
|
|
|
17 |
# ----------------------------------
|
18 |
|
19 |
# Define some input/output variables.
|
|
|
13 |
# Change to your org name
|
14 |
OWNER = "Bias-Leaderboard"
|
15 |
DEVICE = "cuda:0" # "cpu" or "cuda:0" if you add compute
|
16 |
+
# !!!! None for actual evaluations. !!!!! Use a low val like 20 for debugging
|
17 |
+
LIMIT = None # 20
|
18 |
# ----------------------------------
|
19 |
|
20 |
# Define some input/output variables.
|