Zhu Jiajun (jz28583) Claude Opus 4.7 (1M context) commited on
Commit
5ead61d
·
1 Parent(s): d094faf

Add GT_BYPASS_KEY for unlimited submissions + dry mode

Browse files

X-Bypass-Key header matching GT_BYPASS_KEY env (compared via hmac.compare_digest)
unlocks two things:
- skip the IP/day quota gate
- with form param dry=1, score against GT but skip the leaderboard insert
(and skip the submission archive)

graphtestbed/submit.py reads GRAPHTESTBED_BYPASS_KEY env and sends X-Bypass-Key
on every POST when set; --server-dry CLI flag toggles dry=1.

push_to_space.sh now overlays server/space/Dockerfile at root in addition to
README (HF Docker SDK looks for Dockerfile at the repo root).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

graphtestbed/submit.py CHANGED
@@ -64,7 +64,10 @@ def validate_submission(task: str, csv_path: Path) -> dict:
64
  }
65
 
66
 
67
- def submit(task: str, csv_path: Path, agent: str, dry_run: bool = False) -> None:
 
 
 
68
  info = validate_submission(task, csv_path)
69
  print(f"✓ Schema OK (rows={info['n_rows']}, sha256={info['sha256'][:12]}...)")
70
 
@@ -78,13 +81,29 @@ def submit(task: str, csv_path: Path, agent: str, dry_run: bool = False) -> None
78
  except ImportError:
79
  raise SystemExit("Missing dependency: pip install requests")
80
 
81
- print(f" → POST {API_URL}/submit task={task} agent={agent}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  try:
83
  with csv_path.open("rb") as f:
84
  resp = requests.post(
85
  f"{API_URL}/submit",
86
- data={"task": task, "agent": agent},
87
  files={"file": (csv_path.name, f, "text/csv")},
 
88
  timeout=TIMEOUT_SEC,
89
  )
90
  except requests.exceptions.ConnectionError as e:
@@ -124,8 +143,12 @@ def main() -> None:
124
  help="Identifier for this agent (e.g. autopipe-v0.4)")
125
  ap.add_argument("--dry-run", action="store_true",
126
  help="Validate schema locally, don't POST")
 
 
 
127
  args = ap.parse_args()
128
- submit(args.task, args.file, args.agent, dry_run=args.dry_run)
 
129
 
130
 
131
  if __name__ == "__main__":
 
64
  }
65
 
66
 
67
+ def submit(
68
+ task: str, csv_path: Path, agent: str,
69
+ dry_run: bool = False, server_dry: bool = False,
70
+ ) -> None:
71
  info = validate_submission(task, csv_path)
72
  print(f"✓ Schema OK (rows={info['n_rows']}, sha256={info['sha256'][:12]}...)")
73
 
 
81
  except ImportError:
82
  raise SystemExit("Missing dependency: pip install requests")
83
 
84
+ headers = {}
85
+ bypass_key = os.environ.get("GRAPHTESTBED_BYPASS_KEY", "").strip()
86
+ if bypass_key:
87
+ headers["X-Bypass-Key"] = bypass_key
88
+ elif server_dry:
89
+ raise SystemExit(
90
+ "--server-dry needs GRAPHTESTBED_BYPASS_KEY exported (the server "
91
+ "will only honor dry mode for clients holding the bypass key)."
92
+ )
93
+
94
+ data = {"task": task, "agent": agent}
95
+ if server_dry:
96
+ data["dry"] = "1"
97
+
98
+ print(f" → POST {API_URL}/submit task={task} agent={agent}"
99
+ f"{' [bypass]' if bypass_key else ''}{' [dry]' if server_dry else ''}")
100
  try:
101
  with csv_path.open("rb") as f:
102
  resp = requests.post(
103
  f"{API_URL}/submit",
104
+ data=data,
105
  files={"file": (csv_path.name, f, "text/csv")},
106
+ headers=headers,
107
  timeout=TIMEOUT_SEC,
108
  )
109
  except requests.exceptions.ConnectionError as e:
 
143
  help="Identifier for this agent (e.g. autopipe-v0.4)")
144
  ap.add_argument("--dry-run", action="store_true",
145
  help="Validate schema locally, don't POST")
146
+ ap.add_argument("--server-dry", action="store_true",
147
+ help="Score on the server but don't insert into the "
148
+ "leaderboard. Requires GRAPHTESTBED_BYPASS_KEY.")
149
  args = ap.parse_args()
150
+ submit(args.task, args.file, args.agent,
151
+ dry_run=args.dry_run, server_dry=args.server_dry)
152
 
153
 
154
  if __name__ == "__main__":
server/api.py CHANGED
@@ -50,6 +50,7 @@ MANIFEST_PATH = Path(os.environ.get(
50
  Path(__file__).resolve().parents[1] / "datasets" / "manifest.yaml",
51
  ))
52
  QUOTA_PER_DAY = int(os.environ.get("GT_QUOTA", "5"))
 
53
  MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB hard cap
54
 
55
 
@@ -154,6 +155,14 @@ def submit():
154
  file = request.files.get("file")
155
  ip = request.headers.get("X-Forwarded-For", request.remote_addr or "unknown")
156
 
 
 
 
 
 
 
 
 
157
  if not (task and agent and file):
158
  return jsonify({"error": "form fields required: task, agent, file"}), 400
159
 
@@ -162,12 +171,15 @@ def submit():
162
  return jsonify({"error": f"unknown task '{task}'", "known": sorted(manifest)}), 404
163
  cfg = manifest[task]
164
 
165
- quota = _quota_remaining(task, ip)
166
- if quota <= 0:
167
- return jsonify({
168
- "error": f"quota exceeded ({QUOTA_PER_DAY}/day per IP per task)",
169
- "task": task,
170
- }), 429
 
 
 
171
 
172
  raw = file.read()
173
  sub_sha = hashlib.sha256(raw).hexdigest()
@@ -192,21 +204,22 @@ def submit():
192
  run_id = uuid.uuid4().hex[:12]
193
  now = dt.datetime.now(dt.timezone.utc).isoformat()
194
  conn = _db()
195
- conn.execute(
196
- "INSERT INTO submissions VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
197
- (run_id, task, agent, scored["primary"],
198
- json.dumps(scored["secondary"]), sub_sha, scored["n_rows"], ip, now),
199
- )
200
- conn.commit()
 
201
 
202
- # Archive the raw CSV when GT_ARCHIVE_DIR is configured, so the deploy
203
- # host can later prove what each scored entry was. Filename embeds the
204
- # agent + run_id so multiple submissions don't collide.
205
- if ARCHIVE_DIR is not None:
206
- safe_agent = "".join(c if c.isalnum() or c in "-_." else "_" for c in agent)
207
- out = ARCHIVE_DIR / task / f"{safe_agent}-{run_id}.csv"
208
- out.parent.mkdir(parents=True, exist_ok=True)
209
- out.write_bytes(raw)
210
 
211
  # Rank = how many distinct agents have a strictly better best-score on
212
  # this task. The just-inserted row contributes to that count only if the
@@ -231,7 +244,9 @@ def submit():
231
  "secondary": scored["secondary"],
232
  "n_rows": scored["n_rows"],
233
  "leaderboard_rank": rank,
234
- "quota_remaining": quota - 1,
 
 
235
  "submitted_at": now,
236
  })
237
 
 
50
  Path(__file__).resolve().parents[1] / "datasets" / "manifest.yaml",
51
  ))
52
  QUOTA_PER_DAY = int(os.environ.get("GT_QUOTA", "5"))
53
+ BYPASS_KEY = os.environ.get("GT_BYPASS_KEY", "").strip() or None
54
  MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB hard cap
55
 
56
 
 
155
  file = request.files.get("file")
156
  ip = request.headers.get("X-Forwarded-For", request.remote_addr or "unknown")
157
 
158
+ # Bypass: maintainer/CI key skips quota and (optionally with dry=1) the
159
+ # leaderboard insert. Compared with hmac.compare_digest to avoid timing
160
+ # leaks against the hex-string secret.
161
+ sent_key = request.headers.get("X-Bypass-Key", "").strip()
162
+ bypass = bool(BYPASS_KEY and sent_key
163
+ and __import__("hmac").compare_digest(sent_key, BYPASS_KEY))
164
+ dry = bypass and request.form.get("dry") == "1"
165
+
166
  if not (task and agent and file):
167
  return jsonify({"error": "form fields required: task, agent, file"}), 400
168
 
 
171
  return jsonify({"error": f"unknown task '{task}'", "known": sorted(manifest)}), 404
172
  cfg = manifest[task]
173
 
174
+ if bypass:
175
+ quota = -1
176
+ else:
177
+ quota = _quota_remaining(task, ip)
178
+ if quota <= 0:
179
+ return jsonify({
180
+ "error": f"quota exceeded ({QUOTA_PER_DAY}/day per IP per task)",
181
+ "task": task,
182
+ }), 429
183
 
184
  raw = file.read()
185
  sub_sha = hashlib.sha256(raw).hexdigest()
 
204
  run_id = uuid.uuid4().hex[:12]
205
  now = dt.datetime.now(dt.timezone.utc).isoformat()
206
  conn = _db()
207
+ if not dry:
208
+ conn.execute(
209
+ "INSERT INTO submissions VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
210
+ (run_id, task, agent, scored["primary"],
211
+ json.dumps(scored["secondary"]), sub_sha, scored["n_rows"], ip, now),
212
+ )
213
+ conn.commit()
214
 
215
+ # Archive the raw CSV when GT_ARCHIVE_DIR is configured, so the deploy
216
+ # host can later prove what each scored entry was. Filename embeds the
217
+ # agent + run_id so multiple submissions don't collide.
218
+ if ARCHIVE_DIR is not None:
219
+ safe_agent = "".join(c if c.isalnum() or c in "-_." else "_" for c in agent)
220
+ out = ARCHIVE_DIR / task / f"{safe_agent}-{run_id}.csv"
221
+ out.parent.mkdir(parents=True, exist_ok=True)
222
+ out.write_bytes(raw)
223
 
224
  # Rank = how many distinct agents have a strictly better best-score on
225
  # this task. The just-inserted row contributes to that count only if the
 
244
  "secondary": scored["secondary"],
245
  "n_rows": scored["n_rows"],
246
  "leaderboard_rank": rank,
247
+ "quota_remaining": "unlimited" if bypass else (quota - 1),
248
+ "bypass": bypass,
249
+ "dry": dry,
250
  "submitted_at": now,
251
  })
252
 
server/space/README.md CHANGED
@@ -43,6 +43,7 @@ Test labels live only in the companion private dataset repo
43
  | `GT_DATASET_REPO` | no | `lanczos/graphtestbed-gt` | private dataset holding GT + leaderboard backups |
44
  | `GT_BACKUP_INTERVAL` | no | `60` | seconds between sqlite → dataset-repo pushes |
45
  | `GT_QUOTA` | no | `5` | submissions/day/IP/task |
 
46
 
47
  ## Persistence
48
 
 
43
  | `GT_DATASET_REPO` | no | `lanczos/graphtestbed-gt` | private dataset holding GT + leaderboard backups |
44
  | `GT_BACKUP_INTERVAL` | no | `60` | seconds between sqlite → dataset-repo pushes |
45
  | `GT_QUOTA` | no | `5` | submissions/day/IP/task |
46
+ | `GT_BYPASS_KEY` | no | — | shared secret; clients sending it as `X-Bypass-Key` header skip quota and may pass `dry=1` to score without inserting |
47
 
48
  ## Persistence
49
 
server/space/push_to_space.sh CHANGED
@@ -18,9 +18,12 @@ trap 'git checkout "$BRANCH" >/dev/null 2>&1 || true; \
18
  git branch -D "$TEMP" >/dev/null 2>&1 || true' EXIT
19
 
20
  git checkout -b "$TEMP"
 
 
21
  cp server/space/README.md README.md
22
- git add README.md
23
- git commit --no-verify -m "deploy: overlay server/space/README.md as Space root"
 
24
  git push -f space "$TEMP:main"
25
  echo
26
  echo "pushed to space/main"
 
18
  git branch -D "$TEMP" >/dev/null 2>&1 || true' EXIT
19
 
20
  git checkout -b "$TEMP"
21
+ # HF Docker SDK looks for Dockerfile at the repo root; our canonical copy
22
+ # lives in server/space/. Overlay both for the deploy.
23
  cp server/space/README.md README.md
24
+ cp server/space/Dockerfile Dockerfile
25
+ git add README.md Dockerfile
26
+ git commit --no-verify -m "deploy: overlay server/space/{README,Dockerfile} at root"
27
  git push -f space "$TEMP:main"
28
  echo
29
  echo "pushed to space/main"