Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use specific python image
2
- FROM registry.hf.space/pingandpasquale-moe-llm-gpu-poor-leaderboard:latest
3
 
4
  RUN pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ moe-infinity --no-cache-dir
5
  # To fix pydantic version
 
1
  # Use specific python image
2
+ FROM registry.hf.space/sparse-generative-ai-open-moe-llm-leaderboard:latest
3
 
4
  RUN pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ moe-infinity --no-cache-dir
5
  # To fix pydantic version
cli/create_request_file.py CHANGED
@@ -9,7 +9,7 @@ from colorama import Fore
9
  from huggingface_hub import HfApi, snapshot_download
10
 
11
  EVAL_REQUESTS_PATH = "eval-queue"
12
- QUEUE_REPO = "PingAndPasquale/requests"
13
 
14
  precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
15
  model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
 
9
  from huggingface_hub import HfApi, snapshot_download
10
 
11
  EVAL_REQUESTS_PATH = "eval-queue"
12
+ QUEUE_REPO = "sparse-generative-ai/requests"
13
 
14
  precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
15
  model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
src/backend/run_eval_suite.py CHANGED
@@ -12,8 +12,7 @@ from src.backend.tasks.cnndm.task_v2 import CNNDMv2
12
  from src.backend.tasks.selfcheckgpt.task import SelfCheckGPT
13
 
14
  from src.backend.huggingface_generate_until import HFLMwithChatTemplate
15
- from src.backend.moe_infinity import MoEHFLM
16
-
17
 
18
  def run_evaluation(
19
  eval_request: EvalRequest,
 
12
  from src.backend.tasks.selfcheckgpt.task import SelfCheckGPT
13
 
14
  from src.backend.huggingface_generate_until import HFLMwithChatTemplate
15
+ from src.backend.moe_infinity import MoEHFLM # MoEInfinity
 
16
 
17
  def run_evaluation(
18
  eval_request: EvalRequest,
src/envs.py CHANGED
@@ -5,15 +5,15 @@ from huggingface_hub import HfApi
5
  # clone / pull the lmeh eval data
6
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
7
 
8
- # REPO_ID = "pminervini/PingAndPasquale"
9
- REPO_ID = "PingAndPasquale/MOE-LLM-GPU-Poor-Leaderboard"
10
 
11
- QUEUE_REPO = "PingAndPasquale/requests"
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
- RESULTS_REPO = "PingAndPasquale/results"
14
 
15
- PRIVATE_QUEUE_REPO = "PingAndPasquale/private-requests"
16
- PRIVATE_RESULTS_REPO = "PingAndPasquale/private-results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
@@ -26,7 +26,7 @@ EVAL_REQUESTS_PATH_OPEN_LLM = os.path.join(CACHE_PATH, "eval-queue-open-llm")
26
  EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
27
  EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
28
 
29
- PATH_TO_COLLECTION = "PingAndPasquale/llm-leaderboard-best-models-652d6c7965a4619fb5c27a03"
30
 
31
  # Rate limit variables
32
  RATE_LIMIT_PERIOD = 7
 
5
  # clone / pull the lmeh eval data
6
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
7
 
8
+ # REPO_ID = "pminervini/sparse-generative-ai"
9
+ REPO_ID = "sparse-generative-ai/open-moe-llm-leaderboard"
10
 
11
+ QUEUE_REPO = "sparse-generative-ai/requests"
12
  QUEUE_REPO_OPEN_LLM = "open-llm-leaderboard/requests"
13
+ RESULTS_REPO = "sparse-generative-ai/results"
14
 
15
+ PRIVATE_QUEUE_REPO = "sparse-generative-ai/private-requests"
16
+ PRIVATE_RESULTS_REPO = "sparse-generative-ai/private-results"
17
 
18
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
19
 
 
26
  EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
27
  EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
28
 
29
+ PATH_TO_COLLECTION = "sparse-generative-ai/llm-leaderboard-best-models-652d6c7965a4619fb5c27a03"
30
 
31
  # Rate limit variables
32
  RATE_LIMIT_PERIOD = 7
src/submission/submit.py CHANGED
@@ -113,13 +113,14 @@ def add_new_eval(
113
  }
114
 
115
  # Check for duplicate submission
116
- if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
117
  return styled_warning("This model has been already submitted.")
118
 
119
  print("Creating eval file")
120
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
121
  os.makedirs(OUT_DIR, exist_ok=True)
122
- out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
 
123
 
124
  with open(out_path, "w") as f:
125
  f.write(json.dumps(eval_entry))
 
113
  }
114
 
115
  # Check for duplicate submission
116
+ if f"{model}_{revision}_{precision}_{inference_framework}" in REQUESTED_MODELS:
117
  return styled_warning("This model has been already submitted.")
118
 
119
  print("Creating eval file")
120
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
121
  os.makedirs(OUT_DIR, exist_ok=True)
122
+ # out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
123
+ out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}_{inference_framework}.json"
124
 
125
  with open(out_path, "w") as f:
126
  f.write(json.dumps(eval_entry))