leaderboard / adhoc.py
ayushi0430's picture
update readme
a4a4c60
import asyncio
from src.backend.manage_requests import EvalRequest
from src.envs import LIMIT, EVAL_RESULTS_PATH_BACKEND, RESULTS_REPO, DEVICE, LOCAL_MODEL_NAME
from src.backend.run_eval_suite import run_evaluation
from src.about import HarnessTasks
async def run_adhoc_eval(eval_request: EvalRequest):
# This job runs lamini tasks and harness tasks
TASKS_HARNESS = [task.value.benchmark for task in HarnessTasks]
await run_evaluation(
eval_request=eval_request,
task_names=TASKS_HARNESS,
num_fewshot=0,
local_dir=EVAL_RESULTS_PATH_BACKEND,
results_repo=RESULTS_REPO,
batch_size=1,
device=DEVICE,
no_cache=True,
limit=LIMIT
)
def main():
# eval_request: EvalRequest(model='meta-llama/Llama-2-7b-chat-hf', private=False, status='FINISHED', json_filepath='', weight_type='Original', model_type='\ud83d\udfe2 : pretrained', precision='bfloat16', base_model='', revision='main', submitted_time='2023-11-21T18:10:08Z', likes=0, params=0.1, license='custom')
vals = {"model": LOCAL_MODEL_NAME, "json_filepath": "", "base_model": "", "revision": "main",
"private": False,
"precision": "bfloat16", "weight_type": "Original", "status": "PENDING",
"submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0,
"params": 0.1, "license": "custom"}
eval_request = EvalRequest(**vals)
print(f"eval_request: {eval_request}")
asyncio.run(run_adhoc_eval(eval_request))
if __name__ == "__main__":
main()