Clémentine commited on
Commit
1d6da9d
·
1 Parent(s): 6902167

force cleanup on failure

Browse files
src/backend/run_eval_suite_lighteval.py CHANGED
@@ -3,9 +3,9 @@ import os
3
  import logging
4
  from datetime import datetime
5
 
6
- from lighteval.main_accelerate import main
7
 
8
- from src.envs import RESULTS_REPO, CACHE_PATH
9
  from src.backend.manage_requests import EvalRequest
10
 
11
  logging.getLogger("openai").setLevel(logging.WARNING)
@@ -14,32 +14,42 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
14
  if limit:
15
  print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
16
 
17
- results = main(
18
- endpoint_model_name=f"{eval_request.model}_{eval_request.precision}".lower(),
19
- accelerator= accelerator,
20
- vendor= vendor,
21
- region= region,
22
- instance_size= instance_size,
23
- instance_type= instance_type,
24
- max_samples= limit,
25
- job_id= str(datetime.now()),
26
- push_results_to_hub= True,
27
- save_details= True,
28
- push_details_to_hub= True,
29
- public_run= False,
30
- cache_dir= CACHE_PATH,
31
- results_org= RESULTS_REPO,
32
- output_dir= local_dir,
33
- override_batch_size= batch_size,
34
- custom_tasks= "custom_tasks.py",
35
- tasks= task_names
36
- )
37
-
38
- results["config"]["model_dtype"] = eval_request.precision
39
- results["config"]["model_name"] = eval_request.model
40
- results["config"]["model_sha"] = eval_request.revision
41
-
42
- dumped = json.dumps(results, indent=2)
43
- print(dumped)
 
 
 
 
 
 
 
 
 
 
44
 
45
  return results
 
3
  import logging
4
  from datetime import datetime
5
 
6
+ from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
7
 
8
+ from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN
9
  from src.backend.manage_requests import EvalRequest
10
 
11
  logging.getLogger("openai").setLevel(logging.WARNING)
 
14
  if limit:
15
  print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
16
 
17
+ args = {
18
+ "endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
19
+ "accelerator": accelerator,
20
+ "vendor": vendor,
21
+ "region": region,
22
+ "instance_size": instance_size,
23
+ "instance_type": instance_type,
24
+ "max_samples": limit,
25
+ "job_id": str(datetime.now()),
26
+ "push_results_to_hub": True,
27
+ "save_details": True,
28
+ "push_details_to_hub": True,
29
+ "public_run": False,
30
+ "cache_dir": CACHE_PATH,
31
+ "results_org": RESULTS_REPO,
32
+ "output_dir": local_dir,
33
+ "override_batch_size": batch_size,
34
+ "custom_tasks": "custom_tasks.py",
35
+ "tasks": task_names
36
+ }
37
+
38
+ try:
39
+ results = main(args)
40
+
41
+ results["config"]["model_dtype"] = eval_request.precision
42
+ results["config"]["model_name"] = eval_request.model
43
+ results["config"]["model_sha"] = eval_request.revision
44
+
45
+ dumped = json.dumps(results, indent=2)
46
+ print(dumped)
47
+ except Exception: # if eval failed, we force a cleanup
48
+ env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
49
+
50
+ model_config = create_model_config(args=args, accelerator=accelerator)
51
+ model, _ = load_model(config=model_config, env_config=env_config)
52
+ model.cleanup()
53
+
54
 
55
  return results