Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ayushi0430
commited on
Commit
•
2324bf1
1
Parent(s):
222cf2a
add local run mode
Browse files- app.py +18 -21
- main_backend.py +53 -34
- scripts/create_request_file.py +10 -8
- src/about.py +2 -0
- src/backend/run_eval_suite.py +30 -23
- src/leaderboard/read_evals.py +4 -2
- start.sh +17 -1
app.py
CHANGED
@@ -1,7 +1,3 @@
|
|
1 |
-
# import os
|
2 |
-
# os.environ['CURL_CA_BUNDLE'] = ''
|
3 |
-
#
|
4 |
-
#
|
5 |
import subprocess
|
6 |
|
7 |
subprocess.run(["python", "scripts/fix_harness_import.py"])
|
@@ -33,7 +29,7 @@ from src.display.utils import (
|
|
33 |
WeightType,
|
34 |
Precision
|
35 |
)
|
36 |
-
from src.envs import API, DEVICE, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
37 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
38 |
from src.submission.submit import add_new_eval
|
39 |
|
@@ -46,21 +42,22 @@ def launch_backend():
|
|
46 |
_ = subprocess.run(["python", "main_backend.py"])
|
47 |
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
64 |
|
65 |
_ = subprocess.run(["python", "main_backend.py"])
|
66 |
|
@@ -359,4 +356,4 @@ scheduler = BackgroundScheduler()
|
|
359 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
360 |
scheduler.add_job(launch_backend, "interval", seconds=100) # will only allow one job to be run at the same time
|
361 |
scheduler.start()
|
362 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
|
|
|
|
|
|
|
|
1 |
import subprocess
|
2 |
|
3 |
subprocess.run(["python", "scripts/fix_harness_import.py"])
|
|
|
29 |
WeightType,
|
30 |
Precision
|
31 |
)
|
32 |
+
from src.envs import API, DEVICE, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN, RUN_MODE
|
33 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
34 |
from src.submission.submit import add_new_eval
|
35 |
|
|
|
42 |
_ = subprocess.run(["python", "main_backend.py"])
|
43 |
|
44 |
|
45 |
+
if RUN_MODE != "LOCAL":
|
46 |
+
try:
|
47 |
+
print(f"Downloading {EVAL_REQUESTS_PATH}")
|
48 |
+
snapshot_download(
|
49 |
+
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
50 |
+
token=TOKEN
|
51 |
+
)
|
52 |
+
except Exception:
|
53 |
+
restart_space()
|
54 |
+
try:
|
55 |
+
print(f"Downloading {EVAL_RESULTS_PATH}")
|
56 |
+
snapshot_download(
|
57 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
|
58 |
+
)
|
59 |
+
except Exception:
|
60 |
+
restart_space()
|
61 |
|
62 |
_ = subprocess.run(["python", "main_backend.py"])
|
63 |
|
|
|
356 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
357 |
scheduler.add_job(launch_backend, "interval", seconds=100) # will only allow one job to be run at the same time
|
358 |
scheduler.start()
|
359 |
+
demo.queue(default_concurrency_limit=40).launch(share=True)
|
main_backend.py
CHANGED
@@ -1,18 +1,22 @@
|
|
1 |
import logging
|
|
|
2 |
import pprint
|
3 |
|
4 |
from huggingface_hub import snapshot_download
|
5 |
import subprocess
|
|
|
6 |
subprocess.run(["python", "scripts/fix_harness_import.py"])
|
7 |
|
8 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
9 |
|
10 |
from src.backend.run_eval_suite import run_evaluation
|
11 |
-
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
|
12 |
from src.backend.sort_queue import sort_models_by_priority
|
13 |
|
14 |
-
from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API,
|
|
|
15 |
from src.about import Tasks, NUM_FEWSHOT
|
|
|
16 |
TASKS_HARNESS = [task.value.benchmark for task in Tasks]
|
17 |
|
18 |
logging.basicConfig(level=logging.ERROR)
|
@@ -23,9 +27,11 @@ RUNNING_STATUS = "RUNNING"
|
|
23 |
FINISHED_STATUS = "FINISHED"
|
24 |
FAILED_STATUS = "FAILED"
|
25 |
|
|
|
26 |
# TODO: uncomment
|
27 |
-
|
28 |
-
snapshot_download(repo_id=
|
|
|
29 |
|
30 |
def run_auto_eval():
|
31 |
current_pending_status = [PENDING_STATUS]
|
@@ -33,21 +39,33 @@ def run_auto_eval():
|
|
33 |
# pull the eval dataset from the hub and parse any eval requests
|
34 |
# check completed evals and set them to finished
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
|
53 |
|
@@ -57,27 +75,28 @@ def run_auto_eval():
|
|
57 |
eval_request = eval_requests[0]
|
58 |
pp.pprint(eval_request)
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
67 |
|
68 |
run_evaluation(
|
69 |
-
eval_request=eval_request,
|
70 |
-
task_names=TASKS_HARNESS,
|
71 |
-
num_fewshot=NUM_FEWSHOT,
|
72 |
local_dir=EVAL_RESULTS_PATH_BACKEND,
|
73 |
results_repo=RESULTS_REPO,
|
74 |
-
batch_size=1,
|
75 |
-
device=DEVICE,
|
76 |
-
no_cache=True,
|
77 |
limit=LIMIT
|
78 |
-
|
79 |
logging.info("Shopping finished")
|
80 |
|
81 |
|
82 |
if __name__ == "__main__":
|
83 |
-
run_auto_eval()
|
|
|
1 |
import logging
|
2 |
+
import os
|
3 |
import pprint
|
4 |
|
5 |
from huggingface_hub import snapshot_download
|
6 |
import subprocess
|
7 |
+
|
8 |
subprocess.run(["python", "scripts/fix_harness_import.py"])
|
9 |
|
10 |
logging.getLogger("openai").setLevel(logging.WARNING)
|
11 |
|
12 |
from src.backend.run_eval_suite import run_evaluation
|
13 |
+
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, EvalRequest
|
14 |
from src.backend.sort_queue import sort_models_by_priority
|
15 |
|
16 |
+
from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, \
|
17 |
+
LIMIT, TOKEN, RUN_MODE
|
18 |
from src.about import Tasks, NUM_FEWSHOT
|
19 |
+
|
20 |
TASKS_HARNESS = [task.value.benchmark for task in Tasks]
|
21 |
|
22 |
logging.basicConfig(level=logging.ERROR)
|
|
|
27 |
FINISHED_STATUS = "FINISHED"
|
28 |
FAILED_STATUS = "FAILED"
|
29 |
|
30 |
+
|
31 |
# TODO: uncomment
|
32 |
+
if RUN_MODE != "LOCAL":
|
33 |
+
snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
|
34 |
+
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
|
35 |
|
36 |
def run_auto_eval():
|
37 |
current_pending_status = [PENDING_STATUS]
|
|
|
39 |
# pull the eval dataset from the hub and parse any eval requests
|
40 |
# check completed evals and set them to finished
|
41 |
|
42 |
+
if RUN_MODE != "LOCAL":
|
43 |
+
check_completed_evals(
|
44 |
+
api=API,
|
45 |
+
checked_status=RUNNING_STATUS,
|
46 |
+
completed_status=FINISHED_STATUS,
|
47 |
+
failed_status=FAILED_STATUS,
|
48 |
+
hf_repo=QUEUE_REPO,
|
49 |
+
local_dir=EVAL_REQUESTS_PATH_BACKEND,
|
50 |
+
hf_repo_results=RESULTS_REPO,
|
51 |
+
local_dir_results=EVAL_RESULTS_PATH_BACKEND
|
52 |
+
)
|
53 |
+
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
|
54 |
+
eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO,
|
55 |
+
local_dir=EVAL_REQUESTS_PATH_BACKEND)
|
56 |
+
# Sort the evals by priority (first submitted first run)
|
57 |
+
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
58 |
+
|
59 |
+
else:
|
60 |
+
local_model_name = os.getenv("LOCAL_MODEL_NAME", "hf-internal-testing/tiny-random-gpt2")
|
61 |
+
sample_request = {
|
62 |
+
"model": local_model_name, "json_filepath": "", "base_model": "", "revision": "main",
|
63 |
+
"private": False,
|
64 |
+
"precision": "bfloat16", "weight_type": "Original", "status": "PENDING",
|
65 |
+
"submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0,
|
66 |
+
"params": 0.1, "license": "custom"
|
67 |
+
}
|
68 |
+
eval_requests = [EvalRequest(**sample_request)]
|
69 |
|
70 |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
|
71 |
|
|
|
75 |
eval_request = eval_requests[0]
|
76 |
pp.pprint(eval_request)
|
77 |
|
78 |
+
if RUN_MODE != "LOCAL":
|
79 |
+
set_eval_request(
|
80 |
+
api=API,
|
81 |
+
eval_request=eval_request,
|
82 |
+
set_to_status=RUNNING_STATUS,
|
83 |
+
hf_repo=QUEUE_REPO,
|
84 |
+
local_dir=EVAL_REQUESTS_PATH_BACKEND,
|
85 |
+
)
|
86 |
|
87 |
run_evaluation(
|
88 |
+
eval_request=eval_request,
|
89 |
+
task_names=TASKS_HARNESS,
|
90 |
+
num_fewshot=NUM_FEWSHOT,
|
91 |
local_dir=EVAL_RESULTS_PATH_BACKEND,
|
92 |
results_repo=RESULTS_REPO,
|
93 |
+
batch_size=1,
|
94 |
+
device=DEVICE,
|
95 |
+
no_cache=True,
|
96 |
limit=LIMIT
|
97 |
+
)
|
98 |
logging.info("Shopping finished")
|
99 |
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
+
run_auto_eval()
|
scripts/create_request_file.py
CHANGED
@@ -7,7 +7,7 @@ from datetime import datetime, timezone
|
|
7 |
import click
|
8 |
from colorama import Fore
|
9 |
from huggingface_hub import HfApi, snapshot_download
|
10 |
-
from src.envs import TOKEN, EVAL_REQUESTS_PATH, QUEUE_REPO
|
11 |
|
12 |
precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ", "float32")
|
13 |
model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
|
@@ -34,6 +34,7 @@ def get_model_size(model_info, precision: str):
|
|
34 |
def main():
|
35 |
api = HfApi()
|
36 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
37 |
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", token=TOKEN)
|
38 |
|
39 |
model_name = click.prompt("Enter model name")
|
@@ -90,13 +91,14 @@ def main():
|
|
90 |
with open(out_path, "w") as f:
|
91 |
f.write(json.dumps(eval_entry))
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
100 |
else:
|
101 |
click.echo("aborting...")
|
102 |
|
|
|
7 |
import click
|
8 |
from colorama import Fore
|
9 |
from huggingface_hub import HfApi, snapshot_download
|
10 |
+
from src.envs import TOKEN, EVAL_REQUESTS_PATH, QUEUE_REPO, RUN_MODE
|
11 |
|
12 |
precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ", "float32")
|
13 |
model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
|
|
|
34 |
def main():
|
35 |
api = HfApi()
|
36 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
37 |
+
|
38 |
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", token=TOKEN)
|
39 |
|
40 |
model_name = click.prompt("Enter model name")
|
|
|
91 |
with open(out_path, "w") as f:
|
92 |
f.write(json.dumps(eval_entry))
|
93 |
|
94 |
+
if RUN_MODE != "LOCAL":
|
95 |
+
api.upload_file(
|
96 |
+
path_or_fileobj=out_path,
|
97 |
+
path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1],
|
98 |
+
repo_id=QUEUE_REPO,
|
99 |
+
repo_type="dataset",
|
100 |
+
commit_message=f"Add {model_name} to eval queue",
|
101 |
+
)
|
102 |
else:
|
103 |
click.echo("aborting...")
|
104 |
|
src/about.py
CHANGED
@@ -16,6 +16,8 @@ class Tasks(Enum):
|
|
16 |
# task1 = Task("logiqa", "acc_norm", "LogiQA")
|
17 |
response_subjective_score = Task("response_subjective_score", "response_subjective_score", "Subjective Response Score")
|
18 |
product_id_precision_score = Task("product_id_precision_score", "product_id_precision_score", "Product ID Precision Score")
|
|
|
|
|
19 |
|
20 |
NUM_FEWSHOT = 0 # Change with your few shot
|
21 |
# ---------------------------------------------------
|
|
|
16 |
# task1 = Task("logiqa", "acc_norm", "LogiQA")
|
17 |
response_subjective_score = Task("response_subjective_score", "response_subjective_score", "Subjective Response Score")
|
18 |
product_id_precision_score = Task("product_id_precision_score", "product_id_precision_score", "Product ID Precision Score")
|
19 |
+
mmlu_anatomy = Task("mmlu_flan_n_shot_generative_anatomy", "mmlu_flan_n_shot_generative_anatomy", "MMLU (anatomy)")
|
20 |
+
mmly_astronomy = Task("mmlu_flan_n_shot_generative_astronomy", "mmlu_flan_n_shot_generative_astronomy", "MMLU (astronomy)")
|
21 |
|
22 |
NUM_FEWSHOT = 0 # Change with your few shot
|
23 |
# ---------------------------------------------------
|
src/backend/run_eval_suite.py
CHANGED
@@ -4,7 +4,7 @@ import logging
|
|
4 |
from datetime import datetime
|
5 |
from lm_eval import tasks, evaluator, utils
|
6 |
|
7 |
-
from src.envs import RESULTS_REPO, API
|
8 |
from src.backend.manage_requests import EvalRequest
|
9 |
from src.backend.lamini_evaluator import LaminiEvaluator
|
10 |
from src.backend.harness_evaluator import HarnessEvaluator
|
@@ -19,20 +19,20 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
19 |
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
|
20 |
)
|
21 |
|
22 |
-
|
23 |
-
|
24 |
lamini_evaluator = LaminiEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
|
25 |
batch_size, device, no_cache, limit, write_out=True,
|
26 |
output_base_path='logs')
|
27 |
lamini_results = lamini_evaluator.evaluate()
|
28 |
|
29 |
## task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
|
|
30 |
print(f"Selected Tasks: {task_names}")
|
31 |
harness_evaluator = HarnessEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
|
32 |
-
|
33 |
-
|
34 |
results = harness_evaluator.evaluate(task_names)
|
35 |
results_trimmed = {
|
|
|
36 |
"results": {
|
37 |
"mmlu_flan_n_shot_generative_astronomy": results["results"]["mmlu_flan_n_shot_generative_astronomy"],
|
38 |
"mmlu_flan_n_shot_generative_anatomy": results["results"]["mmlu_flan_n_shot_generative_anatomy"],
|
@@ -40,29 +40,36 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
|
|
40 |
"product_id_precision_score": lamini_results["results"]["product_id_precision_score"],
|
41 |
}
|
42 |
}
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
|
47 |
output = json.dumps(results_trimmed, indent=4)
|
48 |
print("output:", output)
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# TODO: uncomment
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
67 |
|
68 |
return results
|
|
|
4 |
from datetime import datetime
|
5 |
from lm_eval import tasks, evaluator, utils
|
6 |
|
7 |
+
from src.envs import RESULTS_REPO, API, RUN_MODE
|
8 |
from src.backend.manage_requests import EvalRequest
|
9 |
from src.backend.lamini_evaluator import LaminiEvaluator
|
10 |
from src.backend.harness_evaluator import HarnessEvaluator
|
|
|
19 |
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
|
20 |
)
|
21 |
|
|
|
|
|
22 |
lamini_evaluator = LaminiEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
|
23 |
batch_size, device, no_cache, limit, write_out=True,
|
24 |
output_base_path='logs')
|
25 |
lamini_results = lamini_evaluator.evaluate()
|
26 |
|
27 |
## task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
|
28 |
+
task_names = ["mmlu_flan_n_shot_generative_anatomy", "mmlu_flan_n_shot_generative_astronomy"]
|
29 |
print(f"Selected Tasks: {task_names}")
|
30 |
harness_evaluator = HarnessEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
|
31 |
+
batch_size, device, no_cache, limit, write_out=True,
|
32 |
+
output_base_path='logs')
|
33 |
results = harness_evaluator.evaluate(task_names)
|
34 |
results_trimmed = {
|
35 |
+
"config": results["config"],
|
36 |
"results": {
|
37 |
"mmlu_flan_n_shot_generative_astronomy": results["results"]["mmlu_flan_n_shot_generative_astronomy"],
|
38 |
"mmlu_flan_n_shot_generative_anatomy": results["results"]["mmlu_flan_n_shot_generative_anatomy"],
|
|
|
40 |
"product_id_precision_score": lamini_results["results"]["product_id_precision_score"],
|
41 |
}
|
42 |
}
|
43 |
+
results_trimmed["config"]["model_dtype"] = eval_request.precision
|
44 |
+
results_trimmed["config"]["model_name"] = eval_request.model
|
45 |
+
results_trimmed["config"]["model_sha"] = eval_request.revision
|
46 |
|
47 |
output = json.dumps(results_trimmed, indent=4)
|
48 |
print("output:", output)
|
49 |
|
50 |
+
if RUN_MODE == "LOCAL":
|
51 |
+
output_path = os.path.join(local_dir, *eval_request.model.split("/"))
|
52 |
+
os.makedirs(output_path)
|
53 |
+
print(f"output_path: {output_path}, exists: {os.path.exists(output_path)}")
|
54 |
+
with open(f"{output_path}/results_{datetime.now()}.json", "w") as f:
|
55 |
+
f.write(output)
|
56 |
+
else:
|
57 |
+
output_path = os.path.join(local_dir, *eval_request.model.split("/"),
|
58 |
+
f"results_{datetime.now()}.json")
|
59 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
60 |
+
print(f"output_path: {output_path}, exists: {os.path.exists(output_path)}")
|
61 |
+
|
62 |
+
with open(output_path, "w") as f:
|
63 |
+
f.write(output)
|
64 |
|
65 |
# TODO: uncomment
|
66 |
+
if RUN_MODE != "LOCAL":
|
67 |
+
API.upload_file(
|
68 |
+
path_or_fileobj=output_path,
|
69 |
+
path_in_repo=f"{eval_request.model}/results_{datetime.now()}.json",
|
70 |
+
repo_id=results_repo,
|
71 |
+
repo_type="dataset",
|
72 |
+
)
|
73 |
+
print(f"upload to {results_repo} complete")
|
74 |
|
75 |
return results
|
src/leaderboard/read_evals.py
CHANGED
@@ -3,7 +3,7 @@ import json
|
|
3 |
import math
|
4 |
import os
|
5 |
from dataclasses import dataclass
|
6 |
-
from src.envs import EVAL_RESULTS_PATH_BACKEND
|
7 |
import dateutil
|
8 |
import numpy as np
|
9 |
|
@@ -156,7 +156,9 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
156 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
157 |
"""From the path of the results folder root, extract all needed info for results"""
|
158 |
#TODO: comment
|
159 |
-
|
|
|
|
|
160 |
model_result_filepaths = []
|
161 |
print("get_raw_eval_results - results_path", results_path)
|
162 |
|
|
|
3 |
import math
|
4 |
import os
|
5 |
from dataclasses import dataclass
|
6 |
+
from src.envs import EVAL_RESULTS_PATH_BACKEND, RUN_MODE, EVAL_REQUESTS_PATH_BACKEND
|
7 |
import dateutil
|
8 |
import numpy as np
|
9 |
|
|
|
156 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
157 |
"""From the path of the results folder root, extract all needed info for results"""
|
158 |
#TODO: comment
|
159 |
+
if RUN_MODE == "LOCAL":
|
160 |
+
results_path = EVAL_RESULTS_PATH_BACKEND
|
161 |
+
requests_path = EVAL_REQUESTS_PATH_BACKEND
|
162 |
model_result_filepaths = []
|
163 |
print("get_raw_eval_results - results_path", results_path)
|
164 |
|
start.sh
CHANGED
@@ -1,2 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
docker buildx build --platform=linux/amd64 -t ldr .
|
2 |
-
docker run -it --rm -p 7860:7860 --platform=linux/amd64 -e TOKEN=$TOKEN ldr python app.py
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
for ARGUMENT in "$@"
|
4 |
+
do
|
5 |
+
KEY=$(echo $ARGUMENT | cut -f1 -d=)
|
6 |
+
|
7 |
+
KEY_LENGTH=${#KEY}
|
8 |
+
VALUE="${ARGUMENT:$KEY_LENGTH+1}"
|
9 |
+
|
10 |
+
export "$KEY"="$VALUE"
|
11 |
+
done
|
12 |
+
|
13 |
+
|
14 |
+
echo "Run mode is: $RUN_MODE"
|
15 |
+
echo "Model passed is: $LOCAL_MODEL_NAME"
|
16 |
+
|
17 |
docker buildx build --platform=linux/amd64 -t ldr .
|
18 |
+
docker run -it --rm -p 7860:7860 --platform=linux/amd64 -e TOKEN=$TOKEN -e RUN_MODE=$RUN_MODE -e LOCAL_MODEL_NAME=$LOCAL_MODEL_NAME ldr python app.py
|