JustinTX commited on
Commit
d7b3a74
·
verified ·
1 Parent(s): d28330f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ccevolve/baselines/ac1/theta_evolve.py +0 -0
  2. ccevolve/baselines/ac1/ttt_discover.py +0 -0
  3. ccevolve/baselines/openevolve_evaluation/__pycache__/ac1.cpython-313.pyc +0 -0
  4. ccevolve/baselines/openevolve_evaluation/__pycache__/circle_packing.cpython-313.pyc +0 -0
  5. ccevolve/baselines/openevolve_evaluation/__pycache__/erdos.cpython-313.pyc +0 -0
  6. ccevolve/baselines/openevolve_evaluation/ac1.py +87 -0
  7. ccevolve/baselines/openevolve_evaluation/circle_packing.py +340 -0
  8. ccevolve/baselines/openevolve_evaluation/erdos.py +76 -0
  9. ccevolve/baselines/publish_results/ac1/result.json +0 -0
  10. ccevolve/baselines/publish_results/circle_packing/result.json +1 -0
  11. ccevolve/baselines/publish_results/erdos/result.json +1 -0
  12. ccevolve/baselines/results/alphaevolve_ac/__pycache__/alphaevolve_v2.cpython-313.pyc +0 -0
  13. ccevolve/baselines/results/alphaevolve_ac/__pycache__/ttt_discover.cpython-313.pyc +0 -0
  14. ccevolve/baselines/results/alphaevolve_ac/alphaevolve_v2.py +32 -0
  15. ccevolve/baselines/results/alphaevolve_ac/autoevolve.py +0 -0
  16. ccevolve/baselines/results/alphaevolve_ac/ttt_discover.py +37 -0
  17. ccevolve/baselines/results/circle_packing/__pycache__/autoevolve.cpython-313.pyc +0 -0
  18. ccevolve/baselines/results/circle_packing/__pycache__/thetaevolve.cpython-313.pyc +0 -0
  19. ccevolve/baselines/results/circle_packing/autoevolve.py +219 -0
  20. ccevolve/baselines/results/circle_packing/thetaevolve.py +230 -0
  21. ccevolve/baselines/results/erdos_min_overlap/__pycache__/autoevolve.cpython-313.pyc +0 -0
  22. ccevolve/baselines/results/erdos_min_overlap/__pycache__/ttt_discover.cpython-313.pyc +0 -0
  23. ccevolve/baselines/results/erdos_min_overlap/autoevolve.py +153 -0
  24. ccevolve/baselines/results/erdos_min_overlap/ttt_discover.py +23 -0
  25. ccevolve/baselines/results/erdos_min_overlap/yyq/run_slsqp_n750.py +285 -0
  26. ccevolve/baselines/shinkaevolve/.githooks/pre-push +14 -0
  27. ccevolve/baselines/shinkaevolve/.github/workflows/ci.yml +41 -0
  28. ccevolve/baselines/shinkaevolve/.github/workflows/claude-code-review.yml +44 -0
  29. ccevolve/baselines/shinkaevolve/.github/workflows/claude.yml +50 -0
  30. ccevolve/baselines/shinkaevolve/.gitignore +193 -0
  31. ccevolve/baselines/shinkaevolve/LICENSE +201 -0
  32. ccevolve/baselines/shinkaevolve/README.md +402 -0
  33. ccevolve/baselines/shinkaevolve/configs/cluster/gcp.yaml +6 -0
  34. ccevolve/baselines/shinkaevolve/configs/cluster/local.yaml +6 -0
  35. ccevolve/baselines/shinkaevolve/configs/cluster/remote.yaml +1 -0
  36. ccevolve/baselines/shinkaevolve/configs/config.yaml +17 -0
  37. ccevolve/baselines/shinkaevolve/configs/database/island_large.yaml +21 -0
  38. ccevolve/baselines/shinkaevolve/configs/database/island_medium.yaml +15 -0
  39. ccevolve/baselines/shinkaevolve/configs/database/island_small.yaml +13 -0
  40. ccevolve/baselines/shinkaevolve/configs/evolution/large_budget.yaml +39 -0
  41. ccevolve/baselines/shinkaevolve/configs/evolution/medium_budget.yaml +40 -0
  42. ccevolve/baselines/shinkaevolve/configs/evolution/small_budget.yaml +19 -0
  43. ccevolve/baselines/shinkaevolve/configs/task/circle_packing.yaml +37 -0
  44. ccevolve/baselines/shinkaevolve/configs/task/novelty_generator.yaml +52 -0
  45. ccevolve/baselines/shinkaevolve/configs/variant/circle_packing_example.yaml +8 -0
  46. ccevolve/baselines/shinkaevolve/configs/variant/default.yaml +1 -0
  47. ccevolve/baselines/shinkaevolve/configs/variant/novelty_generator_example.yaml +8 -0
  48. ccevolve/baselines/shinkaevolve/docs/agentic_usage.md +193 -0
  49. ccevolve/baselines/shinkaevolve/docs/async_evolution.md +71 -0
  50. ccevolve/baselines/shinkaevolve/docs/configuration.md +388 -0
ccevolve/baselines/ac1/theta_evolve.py ADDED
File without changes
ccevolve/baselines/ac1/ttt_discover.py ADDED
File without changes
ccevolve/baselines/openevolve_evaluation/__pycache__/ac1.cpython-313.pyc ADDED
Binary file (3.6 kB). View file
 
ccevolve/baselines/openevolve_evaluation/__pycache__/circle_packing.cpython-313.pyc ADDED
Binary file (12.6 kB). View file
 
ccevolve/baselines/openevolve_evaluation/__pycache__/erdos.cpython-313.pyc ADDED
Binary file (3.57 kB). View file
 
ccevolve/baselines/openevolve_evaluation/ac1.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===--------------------------------------------------------------------------------------===#
2
+ #
3
+ # This file implements the evaluator for the first autocorrelation inequality problem.
4
+ #
5
+ # ===--------------------------------------------------------------------------------------===#
6
+ #
7
+ # Some of the code in this file is adapted from:
8
+ #
9
+ # google-deepmind/alphaevolve_results:
10
+ # Licensed under the Apache License v2.0.
11
+ #
12
+ # ===--------------------------------------------------------------------------------------===#
13
+
14
+ import sys
15
+ import os
16
+ from importlib import __import__
17
+ import time
18
+ import numpy as np
19
+
20
+ # known bounds
21
+ BENCHMARK = 1.5052939684401607
22
+
23
+
24
+ def verify_autocorrelation_solution(f_values: np.ndarray, c1_achieved: float, n_points: int):
25
+ """Verify the autocorrelation solution for UPPER BOUND optimization"""
26
+
27
+ # Check shape
28
+ if f_values.shape != (n_points,):
29
+ raise ValueError(f"Expected function values shape {(n_points,)}. Got {f_values.shape}.")
30
+
31
+ # Check non-negativity
32
+ if np.any(f_values < 0.0):
33
+ raise ValueError("Function must be non-negative.")
34
+
35
+ # Recompute C1 to verify
36
+ dx = 0.5 / n_points
37
+ f_nonneg = np.maximum(f_values, 0.0)
38
+
39
+ # Compute the FULL autoconvolution
40
+ autoconv = np.convolve(f_nonneg, f_nonneg, mode="full") * dx
41
+
42
+ # The rest of the calculation can be simplified as we now take the max over the whole result
43
+ integral_sq = (np.sum(f_nonneg) * dx) ** 2
44
+
45
+ if integral_sq < 1e-8:
46
+ raise ValueError("Function integral is too small.")
47
+
48
+ # The max of the full autoconv is the correct value
49
+ computed_c1 = float(np.max(autoconv / integral_sq))
50
+
51
+ # Verify consistency
52
+ delta = abs(computed_c1 - c1_achieved)
53
+ if delta > 1e-6:
54
+ raise ValueError(
55
+ f"C1 mismatch: reported {c1_achieved:.6f}, computed {computed_c1:.6f}, delta: {delta:.6f}"
56
+ )
57
+
58
+
59
+ def evaluate(program_path: str):
60
+ try:
61
+ abs_program_path = os.path.abspath(program_path)
62
+ program_dir = os.path.dirname(abs_program_path)
63
+ module_name = os.path.splitext(os.path.basename(program_path))[0]
64
+
65
+ try:
66
+ sys.path.insert(0, program_dir)
67
+ program = __import__(module_name)
68
+ start_time = time.time()
69
+ f_values, c1_achieved, loss, n_points = program.run()
70
+ end_time = time.time()
71
+ eval_time = end_time - start_time
72
+ except Exception as err:
73
+ raise err
74
+ finally:
75
+ if program_dir in sys.path:
76
+ sys.path.remove(program_dir)
77
+
78
+ verify_autocorrelation_solution(f_values, c1_achieved, n_points)
79
+ return {
80
+ "c1": float(c1_achieved),
81
+ "combined_score": BENCHMARK / float(c1_achieved),
82
+ "loss": float(loss),
83
+ "n_points": int(n_points),
84
+ "eval_time": float(eval_time),
85
+ }
86
+ except Exception as e:
87
+ return {"combined_score": 0.0, "error": str(e)}
ccevolve/baselines/openevolve_evaluation/circle_packing.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluator for circle packing example (n=26) with improved timeout handling
3
+ """
4
+
5
+ import importlib.util
6
+ import numpy as np
7
+ import time
8
+ import os
9
+ import signal
10
+ import subprocess
11
+ import tempfile
12
+ import traceback
13
+ import sys
14
+ import pickle
15
+
16
+
17
+ class TimeoutError(Exception):
18
+ pass
19
+
20
+
21
+ def timeout_handler(signum, frame):
22
+ """Handle timeout signal"""
23
+ raise TimeoutError("Function execution timed out")
24
+
25
+
26
+ def validate_packing(centers, radii):
27
+ """
28
+ Validate that circles don't overlap and are inside the unit square
29
+
30
+ Args:
31
+ centers: np.array of shape (n, 2) with (x, y) coordinates
32
+ radii: np.array of shape (n) with radius of each circle
33
+
34
+ Returns:
35
+ True if valid, False otherwise
36
+ """
37
+ n = centers.shape[0]
38
+
39
+ # Check for NaN values
40
+ if np.isnan(centers).any():
41
+ print("NaN values detected in circle centers")
42
+ return False
43
+
44
+ if np.isnan(radii).any():
45
+ print("NaN values detected in circle radii")
46
+ return False
47
+
48
+ # Check if radii are nonnegative and not nan
49
+ for i in range(n):
50
+ if radii[i] < 0:
51
+ print(f"Circle {i} has negative radius {radii[i]}")
52
+ return False
53
+ elif np.isnan(radii[i]):
54
+ print(f"Circle {i} has nan radius")
55
+ return False
56
+
57
+ # Check if circles are inside the unit square
58
+ for i in range(n):
59
+ x, y = centers[i]
60
+ r = radii[i]
61
+ if x - r < -1e-6 or x + r > 1 + 1e-6 or y - r < -1e-6 or y + r > 1 + 1e-6:
62
+ print(f"Circle {i} at ({x}, {y}) with radius {r} is outside the unit square")
63
+ return False
64
+
65
+ # Check for overlaps
66
+ for i in range(n):
67
+ for j in range(i + 1, n):
68
+ dist = np.sqrt(np.sum((centers[i] - centers[j]) ** 2))
69
+ if dist < radii[i] + radii[j] - 1e-6: # Allow for tiny numerical errors
70
+ print(f"Circles {i} and {j} overlap: dist={dist}, r1+r2={radii[i]+radii[j]}")
71
+ return False
72
+
73
+ return True
74
+
75
+
76
+ def run_with_timeout(program_path, timeout_seconds=20):
77
+ """
78
+ Run the program in a separate process with timeout
79
+ using a simple subprocess approach
80
+
81
+ Args:
82
+ program_path: Path to the program file
83
+ timeout_seconds: Maximum execution time in seconds
84
+
85
+ Returns:
86
+ centers, radii, sum_radii tuple from the program
87
+ """
88
+ # Create a temporary file to execute
89
+ with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
90
+ # Write a script that executes the program and saves results
91
+ script = f"""
92
+ import sys
93
+ import numpy as np
94
+ import os
95
+ import pickle
96
+ import traceback
97
+
98
+ # Add the directory to sys.path
99
+ sys.path.insert(0, os.path.dirname('{program_path}'))
100
+
101
+ # Debugging info
102
+ print(f"Running in subprocess, Python version: {{sys.version}}")
103
+ print(f"Program path: {program_path}")
104
+
105
+ try:
106
+ # Import the program
107
+ spec = __import__('importlib.util').util.spec_from_file_location("program", '{program_path}')
108
+ program = __import__('importlib.util').util.module_from_spec(spec)
109
+ spec.loader.exec_module(program)
110
+
111
+ # Run the packing function
112
+ print("Calling run_packing()...")
113
+ centers, radii, sum_radii = program.run_packing()
114
+ print(f"run_packing() returned successfully: sum_radii = {{sum_radii}}")
115
+
116
+ # Save results to a file
117
+ results = {{
118
+ 'centers': centers,
119
+ 'radii': radii,
120
+ 'sum_radii': sum_radii
121
+ }}
122
+
123
+ with open('{temp_file.name}.results', 'wb') as f:
124
+ pickle.dump(results, f)
125
+ print(f"Results saved to {temp_file.name}.results")
126
+
127
+ except Exception as e:
128
+ # If an error occurs, save the error instead
129
+ print(f"Error in subprocess: {{str(e)}}")
130
+ traceback.print_exc()
131
+ with open('{temp_file.name}.results', 'wb') as f:
132
+ pickle.dump({{'error': str(e)}}, f)
133
+ print(f"Error saved to {temp_file.name}.results")
134
+ """
135
+ temp_file.write(script.encode())
136
+ temp_file_path = temp_file.name
137
+
138
+ results_path = f"{temp_file_path}.results"
139
+
140
+ try:
141
+ # Run the script with timeout
142
+ process = subprocess.Popen(
143
+ [sys.executable, temp_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
144
+ )
145
+
146
+ try:
147
+ stdout, stderr = process.communicate(timeout=timeout_seconds)
148
+ exit_code = process.returncode
149
+
150
+ # Always print output for debugging purposes
151
+ print(f"Subprocess stdout: {stdout.decode()}")
152
+ if stderr:
153
+ print(f"Subprocess stderr: {stderr.decode()}")
154
+
155
+ # Still raise an error for non-zero exit codes, but only after printing the output
156
+ if exit_code != 0:
157
+ raise RuntimeError(f"Process exited with code {exit_code}")
158
+
159
+ # Load the results
160
+ if os.path.exists(results_path):
161
+ with open(results_path, "rb") as f:
162
+ results = pickle.load(f)
163
+
164
+ # Check if an error was returned
165
+ if "error" in results:
166
+ raise RuntimeError(f"Program execution failed: {results['error']}")
167
+
168
+ return results["centers"], results["radii"], results["sum_radii"]
169
+ else:
170
+ raise RuntimeError("Results file not found")
171
+
172
+ except subprocess.TimeoutExpired:
173
+ # Kill the process if it times out
174
+ process.kill()
175
+ process.wait()
176
+ raise TimeoutError(f"Process timed out after {timeout_seconds} seconds")
177
+
178
+ finally:
179
+ # Clean up temporary files
180
+ if os.path.exists(temp_file_path):
181
+ os.unlink(temp_file_path)
182
+ if os.path.exists(results_path):
183
+ os.unlink(results_path)
184
+
185
+
186
+ def evaluate(program_path):
187
+ """
188
+ Evaluate the program by running it once and checking the sum of radii
189
+
190
+ Args:
191
+ program_path: Path to the program file
192
+
193
+ Returns:
194
+ Dictionary of metrics
195
+ """
196
+ # Target value from the paper
197
+ TARGET_VALUE = 2.635 # AlphaEvolve result for n=26
198
+
199
+ try:
200
+ # For constructor-based approaches, a single evaluation is sufficient
201
+ # since the result is deterministic
202
+ start_time = time.time()
203
+
204
+ # Use subprocess to run with timeout
205
+ centers, radii, reported_sum = run_with_timeout(
206
+ program_path, timeout_seconds=600 # Single timeout
207
+ )
208
+
209
+ end_time = time.time()
210
+ eval_time = end_time - start_time
211
+
212
+ # Ensure centers and radii are numpy arrays
213
+ if not isinstance(centers, np.ndarray):
214
+ centers = np.array(centers)
215
+ if not isinstance(radii, np.ndarray):
216
+ radii = np.array(radii)
217
+
218
+ # Check for NaN values before validation
219
+ if np.isnan(centers).any() or np.isnan(radii).any():
220
+ print("NaN values detected in solution")
221
+ return {
222
+ "sum_radii": 0.0,
223
+ "target_ratio": 0.0,
224
+ "validity": 0.0,
225
+ "eval_time": float(time.time() - start_time),
226
+ "combined_score": 0.0,
227
+ }
228
+
229
+ # Validate solution
230
+ valid = validate_packing(centers, radii)
231
+
232
+ # Check shape and size
233
+ shape_valid = centers.shape == (26, 2) and radii.shape == (26,)
234
+ if not shape_valid:
235
+ print(
236
+ f"Invalid shapes: centers={centers.shape}, radii={radii.shape}, expected (26, 2) and (26,)"
237
+ )
238
+ valid = False
239
+
240
+ # Calculate sum
241
+ sum_radii = np.sum(radii) if valid else 0.0
242
+
243
+ # Make sure reported_sum matches the calculated sum
244
+ if abs(sum_radii - reported_sum) > 1e-6:
245
+ print(f"Warning: Reported sum {reported_sum} doesn't match calculated sum {sum_radii}")
246
+
247
+ # Target ratio (how close we are to the target)
248
+ target_ratio = sum_radii / TARGET_VALUE if valid else 0.0
249
+
250
+ # Validity score
251
+ validity = 1.0 if valid else 0.0
252
+
253
+ # Combined score - higher is better
254
+ combined_score = target_ratio * validity
255
+
256
+ print(
257
+ f"Evaluation: valid={valid}, sum_radii={sum_radii:.6f}, target={TARGET_VALUE}, ratio={target_ratio:.6f}, time={eval_time:.2f}s"
258
+ )
259
+
260
+ return {
261
+ "sum_radii": float(sum_radii),
262
+ "target_ratio": float(target_ratio),
263
+ "validity": float(validity),
264
+ "eval_time": float(eval_time),
265
+ "combined_score": float(combined_score),
266
+ }
267
+
268
+ except Exception as e:
269
+ print(f"Evaluation failed completely: {str(e)}")
270
+ traceback.print_exc()
271
+ return {
272
+ "sum_radii": 0.0,
273
+ "target_ratio": 0.0,
274
+ "validity": 0.0,
275
+ "eval_time": 0.0,
276
+ "combined_score": 0.0,
277
+ }
278
+
279
+
280
+ # Stage-based evaluation for cascade evaluation
281
+ def evaluate_stage1(program_path):
282
+ """
283
+ First stage evaluation - quick validation check
284
+ """
285
+ try:
286
+ # Use the simplified subprocess approach
287
+ try:
288
+ centers, radii, sum_radii = run_with_timeout(program_path, timeout_seconds=600)
289
+
290
+ # Ensure centers and radii are numpy arrays
291
+ if not isinstance(centers, np.ndarray):
292
+ centers = np.array(centers)
293
+ if not isinstance(radii, np.ndarray):
294
+ radii = np.array(radii)
295
+
296
+ # Validate solution (shapes and constraints)
297
+ shape_valid = centers.shape == (26, 2) and radii.shape == (26,)
298
+ if not shape_valid:
299
+ print(f"Invalid shapes: centers={centers.shape}, radii={radii.shape}")
300
+ return {"validity": 0.0, "error": "Invalid shapes"}
301
+
302
+ valid = validate_packing(centers, radii)
303
+
304
+ # Calculate sum
305
+ actual_sum = np.sum(radii) if valid else 0.0
306
+
307
+ # Target from paper
308
+ target = 2.635
309
+
310
+ # Simple combined score for stage 1
311
+ combined_score = (actual_sum / target) if valid else 0.0
312
+
313
+ # Return evaluation metrics
314
+ return {
315
+ "validity": 1.0 if valid else 0.0,
316
+ "sum_radii": float(actual_sum),
317
+ "target_ratio": float(actual_sum / target if valid else 0.0),
318
+ "combined_score": float(combined_score),
319
+ }
320
+
321
+ except TimeoutError as e:
322
+ print(f"Stage 1 evaluation timed out: {e}")
323
+ return {"validity": 0.0, "combined_score": 0.0, "error": "Timeout"}
324
+ except Exception as e:
325
+ print(f"Stage 1 evaluation failed: {e}")
326
+ print(traceback.format_exc())
327
+ return {"validity": 0.0, "combined_score": 0.0, "error": str(e)}
328
+
329
+ except Exception as e:
330
+ print(f"Stage 1 evaluation failed completely: {e}")
331
+ print(traceback.format_exc())
332
+ return {"validity": 0.0, "combined_score": 0.0, "error": str(e)}
333
+
334
+
335
+ def evaluate_stage2(program_path):
336
+ """
337
+ Second stage evaluation - full evaluation
338
+ """
339
+ # Full evaluation as in the main evaluate function
340
+ return evaluate(program_path)
ccevolve/baselines/openevolve_evaluation/erdos.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===--------------------------------------------------------------------------------------===#
2
+ #
3
+ # This file implements the evaluator for the erdos minimum overlap problem.
4
+ #
5
+ # ===--------------------------------------------------------------------------------------===#
6
+ #
7
+ # Some of the code in this file is adapted from:
8
+ #
9
+ # google-deepmind/alphaevolve_results:
10
+ # Licensed under the Apache License v2.0.
11
+ #
12
+ # ===--------------------------------------------------------------------------------------===#
13
+
14
+ import sys
15
+ import os
16
+ from importlib import __import__
17
+ import time
18
+ import numpy as np
19
+
20
+ # Known bounds
21
+ BENCHMARK = 0.38092303510845016
22
+
23
+
24
+ def verify_c5_solution(h_values: np.ndarray, c5_achieved: float, n_points: int):
25
+ """Verifies the C5 upper bound solution."""
26
+
27
+ if h_values.shape != (n_points,):
28
+ raise ValueError(f"Expected h shape ({n_points},), got {h_values.shape}")
29
+
30
+ # Verify h(x) in [0, 1] constraint
31
+ if np.any(h_values < 0) or np.any(h_values > 1):
32
+ raise ValueError(f"h(x) is not in [0, 1]. Range: [{h_values.min()}, {h_values.max()}]")
33
+
34
+ # Verify integral of h = 1 constraint
35
+ dx = 2.0 / n_points
36
+ integral_h = np.sum(h_values) * dx
37
+ if not np.isclose(integral_h, 1.0, atol=1e-3):
38
+ raise ValueError(f"Integral of h is not close to 1. Got: {integral_h:.6f}")
39
+
40
+ # Re-calculate the C5 bound using np.correlate
41
+ j_values = 1.0 - h_values
42
+ correlation = np.correlate(h_values, j_values, mode="full") * dx
43
+ computed_c5 = np.max(correlation)
44
+
45
+ # Check for consistency
46
+ if not np.isclose(computed_c5, c5_achieved, atol=1e-4):
47
+ raise ValueError(f"C5 mismatch: reported {c5_achieved:.6f}, computed {computed_c5:.6f}")
48
+
49
+
50
+ def evaluate(program_path: str):
51
+ try:
52
+ abs_program_path = os.path.abspath(program_path)
53
+ program_dir = os.path.dirname(abs_program_path)
54
+ module_name = os.path.splitext(os.path.basename(program_path))[0]
55
+
56
+ try:
57
+ sys.path.insert(0, program_dir)
58
+ program = __import__(module_name)
59
+ start_time = time.time()
60
+ h_values, c5_bound, n_points = program.run()
61
+ end_time = time.time()
62
+ eval_time = end_time - start_time
63
+ finally:
64
+ if program_dir in sys.path:
65
+ sys.path.remove(program_dir)
66
+
67
+ verify_c5_solution(h_values, c5_bound, n_points)
68
+
69
+ return {
70
+ "c5_bound": float(c5_bound),
71
+ "combined_score": BENCHMARK / float(c5_bound),
72
+ "n_points": int(n_points),
73
+ "eval_time": float(eval_time),
74
+ }
75
+ except Exception as e:
76
+ return {"combined_score": 0.0, "error": str(e)}
ccevolve/baselines/publish_results/ac1/result.json ADDED
The diff for this file is too large to render. See raw diff
 
ccevolve/baselines/publish_results/circle_packing/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"centers": [[0.08492534575127668, 0.08492657571409049], [0.7053906691925911, 0.13022074725038413], [0.7052542534295343, 0.38692425715166484], [0.2730944149315324, 0.5960424559684276], [0.29739066166496847, 0.381665717293942], [0.5044687577013272, 0.2753425065800255], [0.2976912785900781, 0.13325832539369448], [0.7283703052338706, 0.597635075434669], [0.8932097474949776, 0.27478328813255026], [0.10346731288246502, 0.48259524358610695], [0.915360563728979, 0.08463954135204378], [0.5966413562146634, 0.7424170947102542], [0.9042676151583225, 0.6832585822006733], [0.11115620597060179, 0.8888437963028395], [0.6868843076059599, 0.907608467077919], [0.7602895587140661, 0.7636736469882999], [0.10518281287628983, 0.27395248478667283], [0.8892209400381915, 0.8892210186512614], [0.09615144729564991, 0.6820799308446232], [0.3140570532085678, 0.9074078854327591], [0.2406477004514101, 0.762958823180339], [0.40478038814105444, 0.7420494354022069], [0.502716131654123, 0.07886035472214374], [0.5013320942408167, 0.5299633800368685], [0.896939672430696, 0.48460085562028055], [0.5005716741594683, 0.9060726917677375]], "radii": [0.08492596483294529, 0.13022111198853648, 0.1120771451201432, 0.10060075466609822, 0.11514927153304841, 0.1176300421856162, 0.13325861876623546, 0.0998984085435745, 0.10679050770064492, 0.10346755919834748, 0.08463974299762259, 0.09584250187785098, 0.09573260103692847, 0.11115650827741799, 0.09239173200438427, 0.06918085773352274, 0.1051828926621466, 0.11077923055468525, 0.09615163640648851, 0.0925923743869864, 0.06944042451514205, 0.09601921163335797, 0.07886059847650898, 0.1370105915362888, 0.10306060313049623, 0.0939275468025497], "sum_radii": 2.635988438567568}
ccevolve/baselines/publish_results/erdos/result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"h_values": [0.0, 1.1527162751906292e-18, 4.2848817048761965e-16, 4.944657472746598e-16, 4.522627637433821e-17, 0.0, 5.396644176426988e-17, 3.562996473215318e-17, 2.1918733133220227e-17, 1.518444318254353e-17, 5.248923454798303e-17, 1.5969575984598022e-17, 2.032256801691199e-17, 7.149058703683116e-18, 0.0, 0.0, 3.476698165090633e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 1.4674811612349867e-17, 0.0, 0.0, 0.0, 1.2989873404047873e-18, 0.0, 0.0, 0.0, 9.313237266825492e-18, 7.1484667060474e-18, 0.0, 1.2695044369059674e-18, 1.452416553100508e-17, 1.9504044285074376e-18, 0.0, 0.0, 4.3107374654330325e-18, 0.0, 7.640830985733165e-17, 0.0, 9.924822252110805e-18, 5.1254625282486505e-17, 7.469985448269523e-19, 7.753329466376983e-19, 0.0, 1.7843034090555548e-17, 0.0, 8.553238544951306e-18, 0.0, 0.0, 2.3887342390135504e-18, 3.167222409882421e-18, 1.4808745200032019e-18, 0.0, 0.0, 1.0216228217787974e-18, 6.088977753185887e-17, 0.0, 8.507773097952248e-18, 2.903028589188885e-18, 7.771440794574391e-18, 4.574998658045719e-18, 0.0, 1.4936498665108845e-16, 0.0, 0.0, 1.131664510790056e-19, 0.0, 5.031814166789703e-18, 0.0, 0.0, 2.4389462539335253e-17, 6.36500334847932e-18, 0.0, 0.0, 0.0, 9.701685748119624e-18, 0.0, 8.837050425347588e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0263856140822566e-14, 9.880732581589538e-17, 0.02720663831051338, 0.3709443550342773, 0.6732222889447188, 0.9162397641516181, 0.9999999999999998, 0.9999999999999999, 0.9999999999998351, 1.0, 0.999999999999978, 0.9991038125345832, 1.0, 0.6318312374554389, 0.4145753718035304, 0.31730742427561104, 0.35930174268740034, 0.37875364492568764, 0.34979977095156956, 0.3763335749565272, 0.3593904339534435, 0.4194958662312289, 0.6254109685739201, 0.9998627560820905, 0.9998383174144215, 1.0, 1.0, 0.999999999999862, 1.0, 0.9999999999999527, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999999999999517, 0.9172164317630382, 0.8181984755163688, 0.850445569179217, 0.8251201739420608, 0.7615096622637517, 0.7230019413307552, 0.7205579773331665, 0.6516323627343404, 0.5306212678977489, 0.47184632946190513, 0.3771031151471623, 0.18611010509385856, 0.1195675599536843, 0.03466907972321246, 0.0, 8.688840840724848e-05, 1.5103350891911715e-16, 1.387488676428808e-17, 1.3136269016621934e-16, 1.1989326850740009e-17, 1.9751656051878247e-18, 4.88209388045731e-17, 3.5528362063662995e-17, 6.453478119513138e-17, 7.015682271025045e-14, 0.0, 4.980595622508374e-15, 0.0, 1.5182217205435963e-17, 3.160074976904975e-17, 2.398641534178426e-16, 4.894991025556779e-14, 2.2695413018665708e-14, 0.1107512916535236, 0.2502731466674957, 0.3792810760566654, 0.40602163306501327, 0.27241816408261643, 0.2745327416600986, 0.4030365160963809, 0.48356034219893257, 0.44949423334939903, 0.44383298780772074, 0.46380806326105134, 0.5178594766723258, 0.5727167321934746, 0.6534976238502187, 0.7099045638308842, 0.8297509189507833, 0.9202379162842954, 0.9105136458529328, 0.836853090989805, 0.7914831822076821, 0.7962530611631173, 0.8369714852807917, 0.8298924687415713, 0.77127041026943, 0.7479135205421161, 0.7253207126617698, 0.6925749824828807, 0.7686820015768164, 0.8543918649869469, 0.7453008258663814, 0.7165864760832991, 0.6639528304360738, 0.6111396146518666, 0.5953474441485056, 0.5263675903940221, 0.42527432155375866, 0.2075766851763665, 0.09477308494746546, 0.05841339140000739, 0.12316468105005468, 0.11141799538473662, 0.17547413815507293, 0.2515921216337819, 0.3777905955055918, 0.3874964108404398, 0.42328592866156634, 0.5454409193910157, 0.5918351489635748, 0.5012087505416976, 0.5542659970268067, 0.6490945701461203, 0.7197685483090321, 0.7490209880539741, 0.8236489210659006, 0.8446900729131029, 0.8591211794609475, 0.9418742082349554, 0.9355324497955347, 0.9258578739435035, 0.9516874359512171, 0.9466741677712763, 0.9128482154308736, 0.9007806544500343, 0.7741941007252837, 0.7350390915370671, 0.7464080627090433, 0.6858801259127707, 0.6038642089245673, 0.6088956669480794, 0.6892657163902064, 0.5789110348455488, 0.5159158225180083, 0.45197884703067537, 0.34112351798666857, 0.314845566964207, 0.33874948707658314, 0.31393276792229163, 0.31630760080864906, 0.313422949822826, 0.2618380831718699, 0.24627899212264445, 0.23372835782349202, 0.2153233245930035, 0.2581821816571189, 0.4123807720302573, 0.4575311407083257, 0.45605028073116594, 0.4594911678065438, 0.3926316715713298, 0.41835378964158343, 0.4755240232748554, 0.5112300688719342, 0.5467959531421843, 0.5855807350368958, 0.6217002672142702, 0.6730603634142927, 0.727959968086935, 0.6970027389939352, 0.6774919792954953, 0.7012156114287625, 0.7567529867365306, 0.7514265519337857, 0.6878912721287203, 0.6824802347463303, 0.7838714635263452, 0.7844625273363881, 0.6671746475442923, 0.5483598261283201, 0.5341573740148166, 0.6144029437461066, 0.6614961791493068, 0.6833495447413981, 0.7053363161425173, 0.7660298923340056, 0.7106785115264552, 0.7347749595628971, 0.6701914834700179, 0.6350353405820705, 0.647256359001826, 0.6641950765233507, 0.6555508008331699, 0.6885012462246304, 0.7013326831574103, 0.7355408012651445, 0.742349622235665, 0.7760577195668366, 0.7959302684929837, 0.891652260158865, 0.9333840517958847, 0.8919659227217148, 0.9007502148471992, 0.8415336645399394, 0.8101888134913154, 0.8640149570774714, 0.8018223497068592, 0.7666226502157574, 0.8133022987419379, 0.8448920608403464, 0.80227216820442, 0.7603331027383122, 0.7151262545361313, 0.7743630739858646, 0.8069439349688281, 0.7956511883498985, 0.7644472855444522, 0.6788202614052338, 0.6122594581385654, 0.6088526311232334, 0.6071391763689917, 0.5419164259384359, 0.5712110857312472, 0.6274962765287059, 0.7037523819018685, 0.705877565803754, 0.6888519139101618, 0.6504720841802988, 0.762949771953996, 0.7436376973208669, 0.6923626006951951, 0.8056149392078731, 0.8121384362465183, 0.7307374777240436, 0.6483940220454896, 0.6179889664380773, 0.6787889488462431, 0.895246917240192, 0.9361537322573857, 0.9161022029200224, 0.9559933872051212, 0.9422804046395075, 0.9625822053816675, 0.9999999999999998, 1.0, 0.9999999999999999, 1.0, 0.8666693931026307, 0.7657930367589941, 0.7289735647381446, 0.804623463025317, 0.8987809419645681, 0.6981608997347425, 0.6223154400554802, 0.6415520548674681, 0.8032484360550358, 0.8857724767432882, 0.999140801725113, 0.9813420490024077, 0.9999999999999969, 1.0, 1.0, 1.0, 0.9999999999999999, 1.0, 1.0, 0.9999999999999999, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.999999999998281, 0.9999999999994525, 0.9286091653999239, 0.9951108855817664, 0.9970486047955434, 0.999999999999998, 0.9949752063008288, 0.9168628468949253, 0.998887321679824, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9448701455871423, 0.9443615788782155, 0.968401516134412, 0.9187287140640396, 0.8161864144323664, 0.8807244414346665, 0.9282070504291443, 0.9822398889982787, 0.9798322956408515, 0.9466453495565268, 0.9880246109706285, 1.0, 0.9999999999999999, 1.0, 1.0, 1.0, 1.0, 0.9999999999999999, 0.9999999999999939, 0.9999999999999483, 1.0, 1.0, 0.9999999999998905, 0.9999999999999999, 1.0, 0.9983044030905353, 0.9781680337659098, 1.0, 0.9396200780767053, 0.5540573784473901, 0.5492264781830739, 0.5969741544876648, 0.5839434435219741, 0.6275788229121195, 0.7386442135175447, 0.7599317122899554, 0.7407733005830232, 0.6351058075810676, 0.6791945496287947, 0.7084181019228745, 0.8492179855458349, 0.9854797403548432, 0.9957006280961855, 0.9255445786279334, 0.7799816479550051, 0.9437487182135397, 0.9999999999953625, 0.9987408711402945, 0.8724451655181586, 0.6563843270091697, 0.8490681579457935, 0.9858150954178104, 1.0, 1.0, 0.9963037831263968, 0.988861680741402, 0.9999999999999362, 1.0, 1.0, 0.9999999999999999, 0.999999999999999, 0.9999999999999539, 0.9206569069627271, 0.8184181082971949, 0.6170058037560329, 0.6840858772681946, 0.7629497719535369, 0.7270673158576282, 0.6122566822328966, 0.70587756580357, 0.7037523819022492, 0.6274962765284868, 0.5712110857302942, 0.5419164259390419, 0.6071391763693792, 0.608852631122915, 0.6122594581386792, 0.6788202614053206, 0.7178217248231634, 0.5602384866242008, 0.7385314164538993, 0.7675969595209532, 0.8094415238716742, 0.8561959967245836, 0.9773915559697259, 0.8828768395411262, 0.7961161955488242, 0.743790834188193, 0.7957748345664212, 0.8435440495129198, 0.8241593163256276, 0.7672253245233901, 0.6880942750054243, 0.8685904023495887, 0.5485118841028126, 0.8102718599385071, 0.99919621328128, 0.9959055319585645, 0.9137650475351414, 0.9241043916598412, 0.7013326831577182, 0.6885012462243018, 0.6697597180561661, 0.6797209478929844, 0.6175215704090729, 0.6350353405821417, 0.6701914834699547, 0.7347749595630116, 0.7106785115264808, 0.7660298923339571, 0.7053363161425221, 0.6833495447412232, 0.6614961791494549, 0.6144029437461482, 0.5341573740149542, 0.5483598261281989, 0.6671746475447018, 0.7844625273361067, 0.7838714635261756, 0.6824802347461495, 0.6878912721291668, 0.7514265519336433, 0.7567529867365888, 0.7012156114284906, 0.6774919792954961, 0.6970027389940294, 0.7279599680871585, 0.6730603634142429, 0.6217002672142555, 0.5855807350368376, 0.5467959531421654, 0.51123006887193, 0.4755240232748179, 0.4183537896415867, 0.392631671571516, 0.4594911678062879, 0.456050280731449, 0.4575311407082562, 0.4123807720301793, 0.25818218165723394, 0.2153233245926967, 0.23372835782356932, 0.24627899212291077, 0.2618380831716562, 0.31342294982309443, 0.31630760080849835, 0.31393276792211305, 0.3387494870766819, 0.31484556696421206, 0.3411235179865862, 0.4519788470306179, 0.5159158225181028, 0.5789110348456671, 0.689265716390189, 0.6088956669480029, 0.6038642089243605, 0.6858801259130775, 0.7464080627089182, 0.7350390915368167, 0.7741941007252752, 0.9007806544503163, 0.9128482154309963, 0.9466741677712744, 0.9516874359508593, 0.9258578739435037, 0.935532449795648, 0.941874208235038, 0.8591211794612864, 0.8446900729128126, 0.8236489210659426, 0.7490209880539167, 0.7197685483090017, 0.6490945701462099, 0.5542659970266975, 0.5012087505418862, 0.5918351489635016, 0.5454409193907903, 0.4232859286615358, 0.3874964108406427, 0.37779059550577365, 0.25159212163357014, 0.17547413815490223, 0.11141799538476309, 0.12316468105015264, 0.05841339140009569, 0.09477308494765119, 0.20757668517627056, 0.4252743215537598, 0.5263675903938441, 0.5953474441486182, 0.6111396146518552, 0.6639528304360369, 0.7165864760832931, 0.7453008258663684, 0.854391864986753, 0.7686820015771316, 0.6925749824825344, 0.7253207126619011, 0.747913520542261, 0.771270410269418, 0.8298924687415665, 0.8369714852807637, 0.7962530611629492, 0.7914831822077764, 0.836853090989996, 0.9105136458526722, 0.9202379162844375, 0.8297509189511524, 0.7099045638304473, 0.6534976238505458, 0.5727167321932118, 0.5178594766724834, 0.46380806326074675, 0.4438329878078759, 0.4494942333493162, 0.4835603421992221, 0.4030365160963206, 0.2745327416601051, 0.27241816408231684, 0.4060216330652133, 0.3792810760566261, 0.25027314666741646, 0.11075129165355235, 3.280700122970618e-14, 0.0, 3.355928135955214e-16, 8.794526487252299e-14, 0.0, 4.422904988919039e-17, 1.2645940708140502e-15, 0.0, 2.1974860852920506e-17, 3.343445240703907e-17, 0.0, 0.0, 0.0, 2.702058528474298e-18, 7.50245285410879e-19, 1.798789102283262e-17, 7.453881342733273e-14, 8.688840816390827e-05, 4.037355112135189e-15, 0.03466907972309328, 0.11956755995399129, 0.18611010509372314, 0.3771031151473078, 0.4718463294617272, 0.5306212678978262, 0.6516323627340843, 0.7205579773332378, 0.7230019413308585, 0.7615096622638118, 0.8251201739420254, 0.8504455691791811, 0.8181984755165032, 0.9172164317628706, 0.9999999999999116, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999999999999667, 1.0, 0.9998383174145226, 0.999862756082101, 0.625410968573572, 0.4194958662314324, 0.35939043395353054, 0.3763335749564169, 0.34979977095171244, 0.37875364492557934, 0.35930174268736353, 0.31730742427550157, 0.41457537180377874, 0.6318312374553057, 1.0, 0.9991038125346491, 1.0, 0.9999999999998364, 0.9999999999999999, 0.9999999999998823, 1.0, 0.916239764151901, 0.6732222889444891, 0.3709443550342557, 0.027206638310624084, 0.0, 1.2070873610742342e-16, 0.0, 0.0, 0.0, 0.0, 2.6054368302198376e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.691009978940108e-18, 0.0, 3.1678571990772945e-17, 0.0, 0.0, 0.0, 1.5727150231013263e-17, 1.0881659342740212e-18, 0.0, 0.0, 1.8129138431798476e-18, 0.0, 0.0, 2.2166096467935877e-17, 0.0, 0.0, 0.0, 0.0, 8.624826370608499e-18, 0.0, 1.9358404386231776e-18, 1.1435012028341401e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 1.3199158777996921e-17, 0.0, 1.8639311877141036e-17, 0.0, 1.3919779156303478e-18, 3.183603431336725e-17, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.6005184896498734e-19, 0.0, 3.6475518594108435e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 1.659777236143532e-19, 0.0, 0.0, 0.0, 0.0, 0.0, 9.161589293277308e-19, 0.0, 0.0, 0.0, 0.0, 0.0, 1.4410100387426308e-18, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0753915200798738e-17, 0.0, 0.0, 0.0], "n_points": 750, "c5_bound": 0.3808694472025862}
ccevolve/baselines/results/alphaevolve_ac/__pycache__/alphaevolve_v2.cpython-313.pyc ADDED
Binary file (1.97 kB). View file
 
ccevolve/baselines/results/alphaevolve_ac/__pycache__/ttt_discover.cpython-313.pyc ADDED
Binary file (2.2 kB). View file
 
ccevolve/baselines/results/alphaevolve_ac/alphaevolve_v2.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AlphaEvolve v2 AC1 baseline (C1=1.50316355, 1319 coefficients).
2
+
3
+ For ShinkaEvolve evaluator: run() -> list[float]
4
+ For OpenEvolve evaluator: run() -> (f_values, c1_achieved, loss, n_points)
5
+ - Call run_openevolve() for the 4-tuple format.
6
+ """
7
+ import sys
8
+ import os
9
+ import numpy as np
10
+
11
+ _DIR = os.path.dirname(os.path.abspath(__file__))
12
+ sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
13
+ from ac1_data import alphaevolve_v2_150316
14
+
15
+
16
+ def run(seed=42, budget_s=1000, **kwargs):
17
+ """Return sequence for ShinkaEvolve evaluator (list[float])."""
18
+ return list(alphaevolve_v2_150316)
19
+
20
+
21
+ def run_openevolve():
22
+ """Return 4-tuple for OpenEvolve evaluator."""
23
+ f_values = np.array(alphaevolve_v2_150316, dtype=np.float64)
24
+ f_values = np.maximum(f_values, 0.0)
25
+ n_points = len(f_values)
26
+
27
+ dx = 0.5 / n_points
28
+ autoconv = np.convolve(f_values, f_values, mode="full") * dx
29
+ integral_sq = (np.sum(f_values) * dx) ** 2
30
+ c1_achieved = float(np.max(autoconv / integral_sq))
31
+
32
+ return f_values, c1_achieved, c1_achieved, n_points
ccevolve/baselines/results/alphaevolve_ac/autoevolve.py ADDED
The diff for this file is too large to render. See raw diff
 
ccevolve/baselines/results/alphaevolve_ac/ttt_discover.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """TTT-Discover best AC1 solution (C1=1.50286290, 30000 coefficients).
2
+
3
+ For ShinkaEvolve evaluator: run() -> list[float]
4
+ For OpenEvolve evaluator: run() -> (f_values, c1_achieved, loss, n_points)
5
+ - Call run_openevolve() for the 4-tuple format.
6
+ """
7
+ import json
8
+ import os
9
+ import numpy as np
10
+
11
+ _DIR = os.path.dirname(os.path.abspath(__file__))
12
+ _SEQ_PATH = os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics", "ttt_ac1_sequence.json")
13
+
14
+
15
+ def _load_sequence():
16
+ with open(_SEQ_PATH) as f:
17
+ return json.load(f)["sequence"]
18
+
19
+
20
+ def run(seed=42, budget_s=1000, **kwargs):
21
+ """Return sequence for ShinkaEvolve evaluator (list[float])."""
22
+ return _load_sequence()
23
+
24
+
25
+ def run_openevolve():
26
+ """Return 4-tuple for OpenEvolve evaluator."""
27
+ sequence = _load_sequence()
28
+ f_values = np.array(sequence, dtype=np.float64)
29
+ f_values = np.maximum(f_values, 0.0)
30
+ n_points = len(f_values)
31
+
32
+ dx = 0.5 / n_points
33
+ autoconv = np.convolve(f_values, f_values, mode="full") * dx
34
+ integral_sq = (np.sum(f_values) * dx) ** 2
35
+ c1_achieved = float(np.max(autoconv / integral_sq))
36
+
37
+ return f_values, c1_achieved, c1_achieved, n_points
ccevolve/baselines/results/circle_packing/__pycache__/autoevolve.cpython-313.pyc ADDED
Binary file (11 kB). View file
 
ccevolve/baselines/results/circle_packing/__pycache__/thetaevolve.cpython-313.pyc ADDED
Binary file (9.44 kB). View file
 
ccevolve/baselines/results/circle_packing/autoevolve.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ """SLSQP joint optimization with reproducible perturbation chain for n=26."""
3
+
4
+ import numpy as np
5
+ from scipy.optimize import linprog, minimize
6
+ from scipy.sparse import lil_matrix, csc_matrix
7
+ import time
8
+
9
+ N = 26
10
+ MARGIN = 1e-8
11
+ BMIN = 0.005
12
+ BMAX = 0.995
13
+
14
+ # Seed that leads to a reproducible high-scoring chain
15
+ _SEED = np.array([
16
+ [0.084926286074, 0.084926236706],
17
+ [0.705390498154, 0.130221194108],
18
+ [0.705253930887, 0.386923493800],
19
+ [0.273094296403, 0.596042687085],
20
+ [0.297390403867, 0.381665818511],
21
+ [0.504468240801, 0.275342634092],
22
+ [0.297690440835, 0.133258620698],
23
+ [0.728370160006, 0.597634786010],
24
+ [0.893209886670, 0.274783207738],
25
+ [0.103467204481, 0.482595589098],
26
+ [0.915360544635, 0.084639482877],
27
+ [0.596641206440, 0.742417080046],
28
+ [0.904267673512, 0.683258502018],
29
+ [0.111156104767, 0.888843831884],
30
+ [0.686884245279, 0.907608435098],
31
+ [0.760289491485, 0.763673523985],
32
+ [0.105182556717, 0.273952815690],
33
+ [0.889220993776, 0.889220938637],
34
+ [0.096151356929, 0.682080016825],
35
+ [0.314057021248, 0.907407904511],
36
+ [0.240647595615, 0.762958855016],
37
+ [0.404780297719, 0.742049451645],
38
+ [0.502715506849, 0.078860328158],
39
+ [0.501331889714, 0.529963409543],
40
+ [0.896939457959, 0.484600766576],
41
+ [0.500571643244, 0.906072663408],
42
+ ])
43
+
44
+
45
+ def _build_lp():
46
+ n = N
47
+ n_pairs = n * (n - 1) // 2
48
+ A = lil_matrix((n_pairs + n, n))
49
+ row = 0
50
+ for i in range(n):
51
+ for j in range(i + 1, n):
52
+ A[row, i] = 1.0
53
+ A[row, j] = 1.0
54
+ row += 1
55
+ for i in range(n):
56
+ A[n_pairs + i, i] = 1.0
57
+ pi = np.array([i for i in range(n) for j in range(i + 1, n)])
58
+ pj = np.array([j for i in range(n) for j in range(i + 1, n)])
59
+ return csc_matrix(A), n_pairs, pi, pj
60
+
61
+
62
+ _A, _NP, _PI, _PJ = _build_lp()
63
+ _C = -np.ones(N)
64
+ _BND = [(0, None)] * N
65
+ _PI_A = np.array([i for i in range(N) for j in range(i + 1, N)])
66
+ _PJ_A = np.array([j for i in range(N) for j in range(i + 1, N)])
67
+ _NPAIRS = len(_PI_A)
68
+ _SLSQP_BOUNDS = [(BMIN, BMAX)] * (2 * N) + [(0, 0.5)] * N
69
+
70
+
71
+ def lp_radii(centers):
72
+ b = np.empty(_NP + N)
73
+ dx = centers[_PI, 0] - centers[_PJ, 0]
74
+ dy = centers[_PI, 1] - centers[_PJ, 1]
75
+ b[:_NP] = np.sqrt(dx * dx + dy * dy) - MARGIN
76
+ brd = np.minimum(np.minimum(centers[:, 0], 1.0 - centers[:, 0]),
77
+ np.minimum(centers[:, 1], 1.0 - centers[:, 1])) - MARGIN
78
+ b[_NP:] = np.maximum(brd, 0.0)
79
+ res = linprog(_C, A_ub=_A, b_ub=b, bounds=_BND, method='highs')
80
+ if res.success:
81
+ r = np.maximum(res.x, 0.0)
82
+ return r, float(np.sum(r))
83
+ r = np.maximum(brd + MARGIN, 0.0)
84
+ for k in range(_NP):
85
+ if r[_PI[k]] + r[_PJ[k]] > b[k]:
86
+ s = b[k] / (r[_PI[k]] + r[_PJ[k]] + 1e-15)
87
+ r[_PI[k]] *= s
88
+ r[_PJ[k]] *= s
89
+ r = np.maximum(r, 0.0)
90
+ return r, float(np.sum(r))
91
+
92
+
93
+ def _joint_obj(z):
94
+ return -np.sum(z[2 * N:])
95
+
96
+
97
+ def _joint_cons(z):
98
+ centers = z[:2 * N].reshape(N, 2)
99
+ radii = z[2 * N:]
100
+ dx = centers[_PI_A, 0] - centers[_PJ_A, 0]
101
+ dy = centers[_PI_A, 1] - centers[_PJ_A, 1]
102
+ pair_cons = np.sqrt(dx * dx + dy * dy) - radii[_PI_A] - radii[_PJ_A]
103
+ border_cons = np.empty(4 * N)
104
+ border_cons[0::4] = centers[:, 0] - radii
105
+ border_cons[1::4] = centers[:, 1] - radii
106
+ border_cons[2::4] = 1.0 - centers[:, 0] - radii
107
+ border_cons[3::4] = 1.0 - centers[:, 1] - radii
108
+ return np.concatenate([pair_cons, border_cons])
109
+
110
+
111
+ def _validate(centers, radii, atol=1e-6):
112
+ for i in range(N):
113
+ x, y = centers[i]
114
+ r = radii[i]
115
+ if x - r < -atol or x + r > 1 + atol or y - r < -atol or y + r > 1 + atol:
116
+ return False
117
+ for i in range(N):
118
+ for j in range(i + 1, N):
119
+ dist = np.sqrt(np.sum((centers[i] - centers[j]) ** 2))
120
+ if dist < radii[i] + radii[j] - atol:
121
+ return False
122
+ return True
123
+
124
+
125
+ def _slsqp_optimize(centers_init):
126
+ r_init, _ = lp_radii(centers_init)
127
+ z = np.zeros(3 * N)
128
+ z[:2 * N] = centers_init.flatten()
129
+ z[2 * N:] = r_init
130
+ res = minimize(_joint_obj, z, method='SLSQP',
131
+ constraints={'type': 'ineq', 'fun': _joint_cons},
132
+ bounds=_SLSQP_BOUNDS,
133
+ options={'maxiter': 5000, 'ftol': 1e-14})
134
+ score = -res.fun
135
+ centers = res.x[:2 * N].reshape(N, 2)
136
+ radii = res.x[2 * N:]
137
+ if _validate(centers, radii):
138
+ return centers, radii, score
139
+ r_lp, s_lp = lp_radii(centers)
140
+ if _validate(centers, r_lp):
141
+ return centers, r_lp, s_lp
142
+ return None
143
+
144
+
145
+ _DIRS = [(-0.03, 0.01), (-0.03, -0.01), (0.03, 0.01), (0.03, -0.01),
146
+ (-0.01, 0.03), (-0.01, -0.03), (0.01, 0.03), (0.01, -0.03),
147
+ (-0.03, 0.025), (0.03, -0.025), (-0.025, 0.03), (0.025, -0.03),
148
+ (-0.05, 0.0), (0.05, 0.0), (0.0, -0.05), (0.0, 0.05)]
149
+
150
+
151
+ def construct_packing():
152
+ t0 = time.time()
153
+ bc = _SEED.copy()
154
+ br, bsc = lp_radii(bc)
155
+
156
+ # 1. SLSQP from seed (~3s)
157
+ result = _slsqp_optimize(bc)
158
+ if result is not None:
159
+ c, r, s = result
160
+ if s > bsc:
161
+ bsc, bc, br = s, c, r
162
+
163
+ # 2. Reproducible perturbation chain that reaches higher basin (~6s)
164
+ # Move circle 0 by (-0.03, +0.01) then SLSQP
165
+ trial = bc.copy()
166
+ trial[0] = np.clip(bc[0] + [-0.03, 0.01], BMIN, BMAX)
167
+ result = _slsqp_optimize(trial)
168
+ if result is not None:
169
+ c, r, s = result
170
+ if s > bsc:
171
+ bsc, bc, br = s, c, r
172
+
173
+ # Move circle 0 by (-0.03, +0.025) then SLSQP
174
+ trial = bc.copy()
175
+ trial[0] = np.clip(bc[0] + [-0.03, 0.025], BMIN, BMAX)
176
+ result = _slsqp_optimize(trial)
177
+ if result is not None:
178
+ c, r, s = result
179
+ if s > bsc:
180
+ bsc, bc, br = s, c, r
181
+
182
+ # 3. Targeted coordinate descent: try each circle with key perturbations
183
+ for ci in range(N):
184
+ for dx, dy in _DIRS:
185
+ trial = bc.copy()
186
+ trial[ci] = np.clip(bc[ci] + [dx, dy], BMIN, BMAX)
187
+ result = _slsqp_optimize(trial)
188
+ if result is not None:
189
+ c, r, s = result
190
+ if s > bsc:
191
+ bsc, bc, br = s, c, r
192
+ if time.time() - t0 > 200:
193
+ break
194
+
195
+ # 4. Random perturbation with remaining time
196
+ rng = np.random.RandomState(42)
197
+ while time.time() - t0 < 300:
198
+ sigma = rng.choice([0.003, 0.005, 0.008, 0.01, 0.015, 0.02, 0.03, 0.05])
199
+ perturbed = bc + rng.randn(N, 2) * sigma
200
+ perturbed = np.clip(perturbed, BMIN, BMAX)
201
+ result = _slsqp_optimize(perturbed)
202
+ if result is not None:
203
+ c, r, s = result
204
+ if s > bsc:
205
+ bsc, bc, br = s, c, r
206
+
207
+ return bc, br
208
+
209
+
210
+ # EVOLVE-BLOCK-END
211
+
212
+
213
+ # This part remains fixed (not evolved)
214
+ def run_packing():
215
+ """Run the circle packing constructor for n=26"""
216
+ centers, radii = construct_packing()
217
+ # Calculate the sum of radii
218
+ sum_radii = np.sum(radii)
219
+ return centers, radii, sum_radii
ccevolve/baselines/results/circle_packing/thetaevolve.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ThetaEvolve best circle packing solution (ShinkaEvolve entry).
2
+
3
+ Evolved code: SLSQP optimization + Simulated Annealing with ring rotations.
4
+ Best score: sum_radii = 2.63598566 (with np.random.seed(7))
5
+ Source: thetaevolve/Results/CirclePacking/programs/ShinkaEvolve.py
6
+ """
7
+ import numpy as np
8
+ from scipy.optimize import minimize, Bounds
9
+
10
+
11
+ # EVOLVE-BLOCK-START
12
+
13
+
14
+ def construct_packing():
15
+ """
16
+ Constructs an arrangement of 26 circles by combining a meta-heuristic
17
+ search with a powerful SLSQP optimizer for refinement.
18
+ """
19
+ n = 26
20
+
21
+ # --- Helper functions for the optimizer ---
22
+ def objective_func(x):
23
+ """The function to be minimized: the negative sum of radii."""
24
+ return -np.sum(x[:n])
25
+
26
+ def constraints_func(x):
27
+ """
28
+ Computes constraint violations. For SLSQP, each value must be >= 0.
29
+ """
30
+ radii = x[:n]
31
+ centers = x[n:].reshape((n, 2))
32
+
33
+ containment = np.concatenate(
34
+ [
35
+ centers[:, 0] - radii,
36
+ centers[:, 1] - radii,
37
+ 1 - centers[:, 0] - radii,
38
+ 1 - centers[:, 1] - radii,
39
+ ]
40
+ )
41
+
42
+ overlap = []
43
+ for i in range(n):
44
+ for j in range(i + 1, n):
45
+ dist = np.linalg.norm(centers[i] - centers[j])
46
+ overlap.append(dist - (radii[i] + radii[j]))
47
+
48
+ return np.concatenate([containment, np.array(overlap)])
49
+
50
+ def _compute_initial_radii(centers):
51
+ """
52
+ Computes a valid set of initial radii for a given set of centers
53
+ to create a feasible starting point (x0) for the optimizer.
54
+ """
55
+ radii = np.min(
56
+ [centers[:, 0], centers[:, 1], 1 - centers[:, 0], 1 - centers[:, 1]], axis=0
57
+ )
58
+
59
+ for _ in range(100):
60
+ improved = False
61
+ for i in range(n):
62
+ for j in range(i + 1, n):
63
+ dist = np.linalg.norm(centers[i] - centers[j])
64
+ if radii[i] + radii[j] > dist:
65
+ excess = (radii[i] + radii[j] - dist) * 0.501
66
+ total_r = radii[i] + radii[j]
67
+ if total_r > 1e-9:
68
+ radii[i] -= excess * (radii[i] / total_r)
69
+ radii[j] -= excess * (radii[j] / total_r)
70
+ improved = True
71
+ if not improved:
72
+ break
73
+ return np.maximum(radii, 1e-6)
74
+
75
+ # --- 1. Generate a single high-quality initial guess ---
76
+ centers_init = np.zeros((n, 2))
77
+ inset = 0.06
78
+ centers_init[0:4] = [
79
+ [inset, inset],
80
+ [1 - inset, inset],
81
+ [inset, 1 - inset],
82
+ [1 - inset, 1 - inset],
83
+ ]
84
+ centers_init[4:8] = [[0.5, inset], [0.5, 1 - inset], [inset, 0.5], [1 - inset, 0.5]]
85
+ centers_init[8] = [0.5, 0.5]
86
+
87
+ golden_angle = np.pi * (3 - np.sqrt(5))
88
+ cx, cy = 0.5, 0.5
89
+ inner_r, outer_r = 0.23, 0.48
90
+ inner_idx, outer_idx = np.arange(9, 15), np.arange(15, 26)
91
+
92
+ for i, idx in enumerate(inner_idx):
93
+ angle = i * golden_angle
94
+ centers_init[idx] = [cx + inner_r * np.cos(angle), cy + inner_r * np.sin(angle)]
95
+ for i, idx in enumerate(outer_idx):
96
+ angle = i * golden_angle * 1.003
97
+ centers_init[idx] = [cx + outer_r * np.cos(angle), cy + outer_r * np.sin(angle)]
98
+
99
+ centers_init += np.random.uniform(-0.01, 0.01, size=(n, 2))
100
+ centers_init = np.clip(centers_init, 0.01, 0.99)
101
+
102
+ # --- 2. Define bounds and constraints for the solver ---
103
+ bounds = Bounds([0.0] * n + [0.0] * (2 * n), [0.5] * n + [1.0] * (2 * n))
104
+ constraints = {"type": "ineq", "fun": constraints_func}
105
+
106
+ # --- 3. Initial baseline optimization ---
107
+ radii_init = _compute_initial_radii(centers_init)
108
+ x0 = np.concatenate([radii_init, centers_init.flatten()])
109
+
110
+ result = minimize(
111
+ objective_func,
112
+ x0,
113
+ method="SLSQP",
114
+ bounds=bounds,
115
+ constraints=constraints,
116
+ options={"maxiter": 600, "ftol": 1e-8, "disp": False},
117
+ )
118
+
119
+ # Initialize current and best solutions for SA
120
+ best_x = result.x.copy()
121
+ current_x = result.x.copy()
122
+ best_score = -result.fun
123
+ current_score = -result.fun
124
+
125
+ # --- 4. Simulated Annealing loop: Perturb and refine with acceptance criterion ---
126
+ sa_iterations = 250
127
+ temperature = 0.05
128
+ initial_temperature = temperature
129
+ cooling_rate = 0.995
130
+ perturb_step = 0.04
131
+ initial_perturb_step = perturb_step
132
+ step_decay = 0.999
133
+ last_improve = 0
134
+ stagnation_limit = sa_iterations // 4
135
+
136
+ for iter_idx in range(sa_iterations):
137
+ candidate_centers = current_x[n:].reshape((n, 2)).copy()
138
+
139
+ # Select a move type: 70% local, 30% global ring rotation
140
+ if np.random.rand() < 0.7:
141
+ num_to_move = np.random.randint(2, 6)
142
+ indices = np.random.choice(n, num_to_move, replace=False)
143
+ candidate_centers[indices] += np.random.normal(
144
+ 0, perturb_step, size=(num_to_move, 2)
145
+ )
146
+ else:
147
+ idx_to_rotate = inner_idx if np.random.rand() < 0.5 else outer_idx
148
+ center_point = candidate_centers[8]
149
+ angle = np.random.normal(0, 0.15)
150
+ rel_pos = candidate_centers[idx_to_rotate] - center_point
151
+ cos_a, sin_a = np.cos(angle), np.sin(angle)
152
+ rotated = np.column_stack(
153
+ [
154
+ cos_a * rel_pos[:, 0] - sin_a * rel_pos[:, 1],
155
+ sin_a * rel_pos[:, 0] + cos_a * rel_pos[:, 1],
156
+ ]
157
+ )
158
+ candidate_centers[idx_to_rotate] = center_point + rotated
159
+
160
+ candidate_centers = np.clip(candidate_centers, 0.01, 0.99)
161
+
162
+ x0_candidate = np.concatenate(
163
+ [_compute_initial_radii(candidate_centers), candidate_centers.flatten()]
164
+ )
165
+ refine_result = minimize(
166
+ objective_func,
167
+ x0_candidate,
168
+ method="SLSQP",
169
+ bounds=bounds,
170
+ constraints=constraints,
171
+ options={"maxiter": 150, "ftol": 1e-6, "disp": False},
172
+ )
173
+
174
+ new_score = -refine_result.fun
175
+
176
+ if new_score > current_score or (
177
+ temperature > 1e-7
178
+ and np.random.rand() < np.exp((new_score - current_score) / temperature)
179
+ ):
180
+ current_score = new_score
181
+ current_x = refine_result.x.copy()
182
+ if new_score > best_score:
183
+ best_score = new_score
184
+ best_x = refine_result.x.copy()
185
+ last_improve = iter_idx
186
+
187
+ temperature *= cooling_rate
188
+ perturb_step *= step_decay
189
+ if temperature < 1e-7:
190
+ temperature = 1e-7
191
+ if perturb_step < 1e-5:
192
+ perturb_step = 1e-5
193
+ if iter_idx - last_improve > stagnation_limit:
194
+ temperature = initial_temperature
195
+ perturb_step = initial_perturb_step
196
+ last_improve = iter_idx
197
+
198
+ # --- 5. Final Polishing Run on the best found solution ---
199
+ final_result = minimize(
200
+ objective_func,
201
+ best_x,
202
+ method="SLSQP",
203
+ bounds=bounds,
204
+ constraints=constraints,
205
+ options={"maxiter": 1000, "ftol": 1e-9, "disp": False},
206
+ )
207
+
208
+ if -final_result.fun > best_score:
209
+ best_x = final_result.x.copy()
210
+
211
+ # --- 6. Unpack and return the best result ---
212
+ final_radii = best_x[:n]
213
+ final_centers = best_x[n:].reshape((n, 2))
214
+ return final_centers, final_radii
215
+
216
+
217
+ # EVOLVE-BLOCK-END
218
+
219
+
220
+ def run_packing():
221
+ """Run the circle packing constructor for n=26"""
222
+ centers, radii = construct_packing()
223
+ sum_radii = np.sum(radii)
224
+ return centers, radii, sum_radii
225
+
226
+
227
+ if __name__ == "__main__":
228
+ np.random.seed(7)
229
+ centers, radii, sum_radii = run_packing()
230
+ print(f"Sum of radii: {sum_radii:.10f}")
ccevolve/baselines/results/erdos_min_overlap/__pycache__/autoevolve.cpython-313.pyc ADDED
Binary file (7.13 kB). View file
 
ccevolve/baselines/results/erdos_min_overlap/__pycache__/ttt_discover.cpython-313.pyc ADDED
Binary file (1.63 kB). View file
 
ccevolve/baselines/results/erdos_min_overlap/autoevolve.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ """Erdos minimum overlap optimizer: LP-optimized solution + subgradient refinement."""
3
+
4
+ import os
5
+ import time
6
+ import numpy as np
7
+
8
+
9
+ def _project_to_feasible(h_values: np.ndarray) -> np.ndarray:
10
+ """Project h onto [0,1]^n with sum(h)=n/2."""
11
+ h = np.asarray(h_values, dtype=np.float64).copy()
12
+ n = h.size
13
+ target = n / 2.0
14
+ h = np.clip(h, 0.0, 1.0)
15
+ s = float(np.sum(h))
16
+ if s <= 1e-12:
17
+ return np.full(n, 0.5, dtype=np.float64)
18
+ h *= target / s
19
+ for _ in range(50):
20
+ h = np.clip(h, 0.0, 1.0)
21
+ delta = target - float(np.sum(h))
22
+ if abs(delta) < 1e-12:
23
+ break
24
+ free = (h > 1e-14) & (h < 1.0 - 1e-14)
25
+ if not np.any(free):
26
+ break
27
+ h[free] += delta / float(np.sum(free))
28
+ return np.clip(h, 0.0, 1.0)
29
+
30
+
31
+ def _compute_c5(h_values: np.ndarray) -> float:
32
+ n = h_values.size
33
+ dx = 2.0 / n
34
+ return float(np.max(np.correlate(h_values, 1.0 - h_values, mode="full") * dx))
35
+
36
+
37
+ def run(seed: int = 42, budget_s: float = 10.0, **kwargs):
38
+ """
39
+ Return (h_values, c5_bound, n_points) for Erdos minimum overlap.
40
+
41
+ Strategy: Load LP-optimized solution, then refine with subgradient
42
+ descent using multiple step sizes, followed by stochastic search.
43
+ """
44
+ del kwargs
45
+ start = time.time()
46
+ rng = np.random.default_rng(seed)
47
+
48
+ # --- Load pre-optimized solution ---
49
+ npy_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "best_h.npy")
50
+ try:
51
+ h_best = np.load(npy_path)
52
+ n_points = h_best.size
53
+ h_best = _project_to_feasible(h_best)
54
+ except Exception:
55
+ n_points = 1000
56
+ h_best = np.full(n_points, 0.5, dtype=np.float64)
57
+
58
+ c5_best = _compute_c5(h_best)
59
+ dx = 2.0 / n_points
60
+
61
+ deadline = start + 0.95 * budget_s
62
+
63
+ # --- Phase 1: Subgradient descent (fast improvements) ---
64
+ h = h_best.copy()
65
+ top_k = 30
66
+ stale = 0
67
+ lr_list = [1e-3, 5e-4, 2e-4, 1e-4, 5e-5, 2e-5, 1e-5, 5e-6, 2e-6, 1e-6]
68
+
69
+ while time.time() < deadline - 1.0:
70
+ overlap = np.correlate(h, 1.0 - h, mode="full") * dx
71
+ c5 = float(np.max(overlap))
72
+ top_idx = np.argsort(overlap)[-top_k:]
73
+ grad = np.zeros(n_points)
74
+ for idx in top_idx:
75
+ lag = idx - (n_points - 1)
76
+ g = np.zeros(n_points)
77
+ if 0 <= lag < n_points:
78
+ g[lag:] += (1 - h[:n_points - lag]) * dx
79
+ g[:n_points - lag] -= h[lag:] * dx
80
+ elif lag < 0:
81
+ L = -lag
82
+ if L < n_points:
83
+ g[:n_points - L] += (1 - h[L:]) * dx
84
+ g[L:] -= h[:n_points - L] * dx
85
+ grad += g
86
+ grad /= top_k
87
+ grad -= np.mean(grad)
88
+ gnorm = np.linalg.norm(grad)
89
+ if gnorm < 1e-15:
90
+ break
91
+
92
+ found = False
93
+ for lr in lr_list:
94
+ h_try = _project_to_feasible(h - lr * grad)
95
+ c5_try = _compute_c5(h_try)
96
+ if c5_try < c5_best:
97
+ h_best = h_try.copy()
98
+ c5_best = c5_try
99
+ h = h_try
100
+ found = True
101
+ stale = 0
102
+ break
103
+
104
+ if not found:
105
+ stale += 1
106
+ if stale > 3:
107
+ # Try random perturbation to escape
108
+ noise = rng.normal(0, 1e-5, n_points)
109
+ noise -= np.mean(noise)
110
+ h = _project_to_feasible(h + noise)
111
+ stale = 0
112
+
113
+ # --- Phase 2: Stochastic search (remaining time) ---
114
+ h = h_best.copy()
115
+ amp = 1e-5
116
+ no_imp = 0
117
+
118
+ while time.time() < deadline:
119
+ s = rng.random()
120
+ candidate = h.copy()
121
+ if s < 0.4:
122
+ n_swap = int(rng.integers(1, 6))
123
+ up = rng.choice(n_points, size=n_swap, replace=False)
124
+ down = rng.choice(n_points, size=n_swap, replace=False)
125
+ a = rng.uniform(amp * 0.5, amp * 3)
126
+ candidate[up] += a
127
+ candidate[down] -= a
128
+ elif s < 0.7:
129
+ k = int(rng.integers(1, 10))
130
+ idx = rng.choice(n_points, size=k, replace=False)
131
+ candidate[idx] += rng.normal(0, amp * 2, size=k)
132
+ else:
133
+ candidate += rng.normal(0, amp * 0.03, size=n_points)
134
+
135
+ candidate = _project_to_feasible(candidate)
136
+ c5_cand = _compute_c5(candidate)
137
+ if c5_cand < c5_best:
138
+ h_best = candidate
139
+ c5_best = c5_cand
140
+ h = candidate
141
+ no_imp = 0
142
+ else:
143
+ no_imp += 1
144
+ if no_imp > 3000:
145
+ amp *= 0.5
146
+ if amp < 1e-9:
147
+ amp = 1e-5
148
+ no_imp = 0
149
+
150
+ return h_best, float(c5_best), int(n_points)
151
+
152
+
153
+ # EVOLVE-BLOCK-END
ccevolve/baselines/results/erdos_min_overlap/ttt_discover.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """TTT-Discover best Erdos solution (C5=0.38087532, 600 samples)."""
2
+ import json
3
+ import os
4
+ import numpy as np
5
+
6
+ _DIR = os.path.dirname(os.path.abspath(__file__))
7
+ _SEQ_PATH = os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics", "ttt_erdos_sequence.json")
8
+
9
+
10
+ def run(seed=42, budget_s=1000, **kwargs):
11
+ """Return pre-computed best solution."""
12
+ with open(_SEQ_PATH) as f:
13
+ h_values = np.array(json.load(f)["sequence"], dtype=np.float64)
14
+
15
+ n_points = len(h_values)
16
+ target_sum = n_points / 2.0
17
+ h_values = h_values * (target_sum / np.sum(h_values))
18
+
19
+ dx = 2.0 / n_points
20
+ correlation = np.correlate(h_values, 1.0 - h_values, mode="full") * dx
21
+ c5_bound = float(np.max(correlation))
22
+
23
+ return h_values, c5_bound, n_points
ccevolve/baselines/results/erdos_min_overlap/yyq/run_slsqp_n750.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ SLSQP optimization for the Erdos minimum overlap problem at n=750,
4
+ warm-started from the n=600 solution.
5
+ """
6
+
7
+ import numpy as np
8
+ from scipy.optimize import minimize
9
+ import time
10
+ import sys
11
+
12
+ # ---- Parameters ----
13
+ n = 750
14
+ dx = 1.0 / n
15
+
16
+ # ---- Step 1: Load n=600 solution ----
17
+ print("Loading n=600 solution...")
18
+ h600 = np.load("/workspace/h_n600_from450.npy")
19
+ n600 = len(h600)
20
+ print(f" Loaded array of length {n600}")
21
+
22
+ # ---- Step 2: Upsample to n=750 via linear interpolation ----
23
+ print(f"Upsampling from {n600} to {n}...")
24
+ x_old = np.linspace(0, 1, n600, endpoint=False)
25
+ x_new = np.linspace(0, 1, n, endpoint=False)
26
+ h0 = np.interp(x_new, x_old, h600)
27
+ h0 = np.clip(h0, 0.0, 1.0)
28
+ # Adjust sum to n/2
29
+ h0 *= (n / 2.0) / np.sum(h0)
30
+ h0 = np.clip(h0, 0.0, 1.0)
31
+ # Fine-tune sum
32
+ deficit = n / 2.0 - np.sum(h0)
33
+ if deficit > 0:
34
+ idx = np.where(h0 < 1.0)[0]
35
+ room = 1.0 - h0[idx]
36
+ scale = min(1.0, deficit / np.sum(room))
37
+ h0[idx] += room * scale
38
+ elif deficit < 0:
39
+ idx = np.where(h0 > 0.0)[0]
40
+ room = h0[idx]
41
+ scale = min(1.0, -deficit / np.sum(room))
42
+ h0[idx] -= room * scale
43
+ print(f" sum(h0) = {np.sum(h0):.10f}, target = {n/2.0}")
44
+
45
+ # ---- Overlap function ----
46
+ def compute_overlap(h, s):
47
+ """Compute overlap for shift s: dot(h[shifted], 1-h[unshifted]) * dx"""
48
+ if s >= 0:
49
+ ln = n - s
50
+ if ln <= 0:
51
+ return 0.0
52
+ return np.dot(h[s:s+ln], 1.0 - h[:ln]) * dx
53
+ else:
54
+ ss = -s
55
+ ln = n - ss
56
+ if ln <= 0:
57
+ return 0.0
58
+ return np.dot(h[:ln], 1.0 - h[ss:ss+ln]) * dx
59
+
60
+ def compute_all_overlaps(h):
61
+ """Compute overlaps for all shifts."""
62
+ overlaps = np.zeros(2*n - 1)
63
+ for i, s in enumerate(range(-(n-1), n)):
64
+ overlaps[i] = compute_overlap(h, s)
65
+ return overlaps
66
+
67
+ def compute_C5(h):
68
+ """Compute C5 = max overlap over all shifts, scaled."""
69
+ overlaps = compute_all_overlaps(h)
70
+ return np.max(overlaps)
71
+
72
+ # ---- Step 3: Subgradient warm-up for 60 seconds ----
73
+ print("\n=== Subgradient warm-up (60 seconds) ===")
74
+ h = h0.copy()
75
+ t_start = time.time()
76
+ lr = 0.001
77
+ best_obj = compute_C5(h)
78
+ best_h = h.copy()
79
+ iteration = 0
80
+
81
+ while time.time() - t_start < 60.0:
82
+ # Find worst-case shift
83
+ overlaps = compute_all_overlaps(h)
84
+ worst_idx = np.argmax(overlaps)
85
+ worst_s = worst_idx - (n - 1)
86
+ worst_val = overlaps[worst_idx]
87
+
88
+ if worst_val < best_obj:
89
+ best_obj = worst_val
90
+ best_h = h.copy()
91
+
92
+ # Compute subgradient of the overlap at worst shift s
93
+ grad = np.zeros(n)
94
+ s = worst_s
95
+ if s >= 0:
96
+ ln = n - s
97
+ if ln > 0:
98
+ grad[s:s+ln] += (1.0 - h[:ln]) * dx
99
+ grad[:ln] -= h[s:s+ln] * dx
100
+ else:
101
+ ss = -s
102
+ ln = n - ss
103
+ if ln > 0:
104
+ grad[:ln] += (1.0 - h[ss:ss+ln]) * dx
105
+ grad[ss:ss+ln] -= h[:ln] * dx
106
+
107
+ # Gradient descent step
108
+ h -= lr * grad
109
+
110
+ # Project: clip to [0,1]
111
+ h = np.clip(h, 0.0, 1.0)
112
+
113
+ # Project: adjust sum to n/2
114
+ deficit = n / 2.0 - np.sum(h)
115
+ if abs(deficit) > 1e-12:
116
+ if deficit > 0:
117
+ idx = np.where(h < 1.0)[0]
118
+ if len(idx) > 0:
119
+ room = 1.0 - h[idx]
120
+ scale = min(1.0, deficit / np.sum(room))
121
+ h[idx] += room * scale
122
+ else:
123
+ idx = np.where(h > 0.0)[0]
124
+ if len(idx) > 0:
125
+ room = h[idx]
126
+ scale = min(1.0, -deficit / np.sum(room))
127
+ h[idx] -= room * scale
128
+
129
+ iteration += 1
130
+ if iteration % 100 == 0:
131
+ elapsed = time.time() - t_start
132
+ print(f" iter {iteration:6d} | worst overlap = {worst_val:.12f} | best = {best_obj:.12f} | shift = {worst_s:+5d} | t = {elapsed:.1f}s")
133
+
134
+ elapsed = time.time() - t_start
135
+ print(f" Subgradient done: {iteration} iterations in {elapsed:.1f}s")
136
+ print(f" Best C5 = {best_obj:.12f}")
137
+
138
+ h = best_h.copy()
139
+
140
+ # ---- Step 4: SLSQP optimization ----
141
+ print(f"\n=== SLSQP optimization (n={n}, {2*n-1} inequality constraints) ===")
142
+ print("Setting up constraints...")
143
+
144
+ # Initial t value
145
+ t0 = compute_C5(h) + 1e-8
146
+ x0 = np.concatenate([h, [t0]])
147
+ print(f" Initial t = {t0:.12f}")
148
+
149
+ # Bounds
150
+ bounds = [(0.0, 1.0)] * n + [(None, None)]
151
+
152
+ # Equality constraint: sum(h) = n/2
153
+ def eq_fun(x):
154
+ return np.sum(x[:n]) - n / 2.0
155
+
156
+ def eq_jac(x):
157
+ j = np.zeros(n + 1)
158
+ j[:n] = 1.0
159
+ return j
160
+
161
+ eq_constraint = {'type': 'eq', 'fun': eq_fun, 'jac': eq_jac}
162
+
163
+ # Inequality constraints: t - overlap_s >= 0 for each shift s
164
+ def make_ineq(s):
165
+ def fun(x):
166
+ hh = x[:n]
167
+ t = x[n]
168
+ if s >= 0:
169
+ ln = n - s
170
+ if ln > 0:
171
+ return t - np.dot(hh[s:s+ln], 1.0 - hh[:ln]) * dx
172
+ else:
173
+ return t
174
+ else:
175
+ ss = -s
176
+ ln = n - ss
177
+ if ln > 0:
178
+ return t - np.dot(hh[:ln], 1.0 - hh[ss:ss+ln]) * dx
179
+ else:
180
+ return t
181
+
182
+ def jac(x):
183
+ hh = x[:n]
184
+ g = np.zeros(n + 1)
185
+ g[n] = 1.0
186
+ if s >= 0:
187
+ ln = n - s
188
+ if ln > 0:
189
+ g[s:s+ln] -= (1.0 - hh[:ln]) * dx
190
+ g[:ln] += hh[s:s+ln] * dx
191
+ else:
192
+ ss = -s
193
+ ln = n - ss
194
+ if ln > 0:
195
+ g[:ln] -= (1.0 - hh[ss:ss+ln]) * dx
196
+ g[ss:ss+ln] += hh[:ln] * dx
197
+ return g
198
+
199
+ return {'type': 'ineq', 'fun': fun, 'jac': jac}
200
+
201
+ print("Building inequality constraints...")
202
+ t_build_start = time.time()
203
+ ineq_constraints = []
204
+ shifts = list(range(-(n-1), n))
205
+ for s in shifts:
206
+ ineq_constraints.append(make_ineq(s))
207
+ print(f" Built {len(ineq_constraints)} inequality constraints in {time.time() - t_build_start:.1f}s")
208
+
209
+ all_constraints = [eq_constraint] + ineq_constraints
210
+
211
+ # Objective: minimize t
212
+ def objective(x):
213
+ return x[n]
214
+
215
+ def obj_jac(x):
216
+ g = np.zeros(n + 1)
217
+ g[n] = 1.0
218
+ return g
219
+
220
+ # Callback for progress
221
+ iter_count = [0]
222
+ last_print = [time.time()]
223
+
224
+ last_save = [time.time()]
225
+
226
+ def callback(x):
227
+ iter_count[0] += 1
228
+ now = time.time()
229
+ if now - last_print[0] >= 30.0: # Print every 30 seconds
230
+ t_val = x[n]
231
+ sum_h = np.sum(x[:n])
232
+ overlaps = compute_all_overlaps(x[:n])
233
+ max_ov = np.max(overlaps)
234
+ print(f" SLSQP iter {iter_count[0]:5d} | t = {t_val:.12f} | max_overlap = {max_ov:.12f} | sum(h) = {sum_h:.6f}")
235
+ sys.stdout.flush()
236
+ last_print[0] = now
237
+ # Save intermediate result every 5 minutes
238
+ if now - last_save[0] >= 300.0:
239
+ h_snap = np.clip(x[:n], 0.0, 1.0)
240
+ np.save("/workspace/h_n750_from600.npy", h_snap)
241
+ last_save[0] = now
242
+
243
+ print("Starting SLSQP optimization...")
244
+ print(f" maxiter=5000, ftol=1e-15")
245
+ sys.stdout.flush()
246
+
247
+ t_opt_start = time.time()
248
+ result = minimize(
249
+ objective,
250
+ x0,
251
+ jac=obj_jac,
252
+ method='SLSQP',
253
+ bounds=bounds,
254
+ constraints=all_constraints,
255
+ callback=callback,
256
+ options={'maxiter': 5000, 'ftol': 1e-15, 'disp': True}
257
+ )
258
+ t_opt_end = time.time()
259
+
260
+ print(f"\n=== SLSQP finished in {t_opt_end - t_opt_start:.1f}s ===")
261
+ print(f" Status: {result.status} - {result.message}")
262
+ print(f" Iterations: {result.nit}")
263
+ print(f" Final t = {result.x[n]:.15f}")
264
+
265
+ h_opt = result.x[:n]
266
+ h_opt = np.clip(h_opt, 0.0, 1.0)
267
+
268
+ # ---- Save result ----
269
+ np.save("/workspace/h_n750_from600.npy", h_opt)
270
+ print(f"\nSaved to /workspace/h_n750_from600.npy")
271
+
272
+ # ---- Compute and print C5 ----
273
+ C5 = compute_C5(h_opt)
274
+ print(f"\n{'='*50}")
275
+ print(f" C5 (max overlap) = {C5:.15f}")
276
+ print(f" sum(h) = {np.sum(h_opt):.10f} (target: {n/2.0})")
277
+ print(f"{'='*50}")
278
+
279
+ # Also print the worst few shifts
280
+ overlaps = compute_all_overlaps(h_opt)
281
+ worst_indices = np.argsort(overlaps)[-10:][::-1]
282
+ print("\nTop 10 worst shifts:")
283
+ for idx in worst_indices:
284
+ s = idx - (n - 1)
285
+ print(f" shift {s:+5d}: overlap = {overlaps[idx]:.15f}")
ccevolve/baselines/shinkaevolve/.githooks/pre-push ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ repo_root="$(git rev-parse --show-toplevel)"
5
+ cd "$repo_root"
6
+
7
+ echo "[pre-push] ruff"
8
+ uv run ruff check tests --exclude tests/file.py
9
+
10
+ echo "[pre-push] mypy"
11
+ uv run mypy --follow-imports=skip --ignore-missing-imports tests/test_*.py tests/conftest.py
12
+
13
+ echo "[pre-push] pytest + coverage"
14
+ uv run --with pytest-cov pytest -q --cov=shinka --cov-report=term-missing
ccevolve/baselines/shinkaevolve/.github/workflows/ci.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ checks:
9
+ name: Ruff, Mypy, Pytest
10
+ runs-on: ubuntu-latest
11
+ timeout-minutes: 20
12
+
13
+ steps:
14
+ - name: Checkout
15
+ uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.11"
21
+
22
+ - name: Set up uv
23
+ uses: astral-sh/setup-uv@v5
24
+
25
+ - name: Install project
26
+ run: uv sync --dev
27
+
28
+ - name: Ruff
29
+ run: uv run ruff check tests --exclude tests/file.py
30
+
31
+ - name: Mypy
32
+ run: uv run mypy --follow-imports=skip --ignore-missing-imports tests/test_*.py tests/conftest.py
33
+
34
+ - name: Pytest + Coverage
35
+ run: uv run --with pytest-cov pytest -q --cov=shinka --cov-report=term-missing --cov-report=xml:coverage.xml
36
+
37
+ - name: Upload Coverage XML
38
+ uses: actions/upload-artifact@v4
39
+ with:
40
+ name: coverage-xml
41
+ path: coverage.xml
ccevolve/baselines/shinkaevolve/.github/workflows/claude-code-review.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Claude Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, ready_for_review, reopened]
6
+ # Optional: Only run on specific file changes
7
+ # paths:
8
+ # - "src/**/*.ts"
9
+ # - "src/**/*.tsx"
10
+ # - "src/**/*.js"
11
+ # - "src/**/*.jsx"
12
+
13
+ jobs:
14
+ claude-review:
15
+ # Optional: Filter by PR author
16
+ # if: |
17
+ # github.event.pull_request.user.login == 'external-contributor' ||
18
+ # github.event.pull_request.user.login == 'new-developer' ||
19
+ # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
20
+
21
+ runs-on: ubuntu-latest
22
+ permissions:
23
+ contents: read
24
+ pull-requests: read
25
+ issues: read
26
+ id-token: write
27
+
28
+ steps:
29
+ - name: Checkout repository
30
+ uses: actions/checkout@v4
31
+ with:
32
+ fetch-depth: 1
33
+
34
+ - name: Run Claude Code Review
35
+ id: claude-review
36
+ uses: anthropics/claude-code-action@v1
37
+ with:
38
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
39
+ plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
40
+ plugins: 'code-review@claude-code-plugins'
41
+ prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
42
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
43
+ # or https://code.claude.com/docs/en/cli-reference for available options
44
+
ccevolve/baselines/shinkaevolve/.github/workflows/claude.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Claude Code
2
+
3
+ on:
4
+ issue_comment:
5
+ types: [created]
6
+ pull_request_review_comment:
7
+ types: [created]
8
+ issues:
9
+ types: [opened, assigned]
10
+ pull_request_review:
11
+ types: [submitted]
12
+
13
+ jobs:
14
+ claude:
15
+ if: |
16
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
17
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
18
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
19
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20
+ runs-on: ubuntu-latest
21
+ permissions:
22
+ contents: read
23
+ pull-requests: read
24
+ issues: read
25
+ id-token: write
26
+ actions: read # Required for Claude to read CI results on PRs
27
+ steps:
28
+ - name: Checkout repository
29
+ uses: actions/checkout@v4
30
+ with:
31
+ fetch-depth: 1
32
+
33
+ - name: Run Claude Code
34
+ id: claude
35
+ uses: anthropics/claude-code-action@v1
36
+ with:
37
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
38
+
39
+ # This is an optional setting that allows Claude to read CI results on PRs
40
+ additional_permissions: |
41
+ actions: read
42
+
43
+ # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
44
+ # prompt: 'Update the pull request description to include a summary of changes.'
45
+
46
+ # Optional: Add claude_args to customize behavior and configuration
47
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
48
+ # or https://code.claude.com/docs/en/cli-reference for available options
49
+ # claude_args: '--allowed-tools Bash(gh pr:*)'
50
+
ccevolve/baselines/shinkaevolve/.gitignore ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ .DS_Store
6
+ results/
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # UV
100
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ uv.lock
104
+
105
+ # poetry
106
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
108
+ # commonly ignored for libraries.
109
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110
+ #poetry.lock
111
+
112
+ # pdm
113
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114
+ #pdm.lock
115
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
116
+ # in version control.
117
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
118
+ .pdm.toml
119
+ .pdm-python
120
+ .pdm-build/
121
+
122
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
123
+ __pypackages__/
124
+
125
+ # Celery stuff
126
+ celerybeat-schedule
127
+ celerybeat.pid
128
+
129
+ # SageMath parsed files
130
+ *.sage.py
131
+
132
+ # Environments
133
+ .env
134
+ .venv
135
+ env/
136
+ venv/
137
+ ENV/
138
+ env.bak/
139
+ venv.bak/
140
+
141
+ # Spyder project settings
142
+ .spyderproject
143
+ .spyproject
144
+
145
+ # Rope project settings
146
+ .ropeproject
147
+
148
+ # mkdocs documentation
149
+ /site
150
+
151
+ # mypy
152
+ .mypy_cache/
153
+ .dmypy.json
154
+ dmypy.json
155
+
156
+ # Pyre type checker
157
+ .pyre/
158
+
159
+ # pytype static type analyzer
160
+ .pytype/
161
+
162
+ # Cython debug symbols
163
+ cython_debug/
164
+
165
+ # PyCharm
166
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
167
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
168
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
169
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
170
+ #.idea/
171
+
172
+ # Ruff stuff:
173
+ .ruff_cache/
174
+
175
+ # PyPI configuration file
176
+ .pypirc
177
+
178
+ # Local environment variants (keep shareable template files tracked)
179
+ .env.*
180
+ !.env.example
181
+
182
+ # SQLite artifacts generated by local runs
183
+ *.db
184
+ *.sqlite
185
+ *.sqlite3
186
+ *.db-journal
187
+ *.sqlite-journal
188
+ *.sqlite-wal
189
+ *.sqlite-shm
190
+
191
+ # Local editor metadata
192
+ .idea/
193
+ .vscode/
ccevolve/baselines/shinkaevolve/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2020 Rémi Louf
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
ccevolve/baselines/shinkaevolve/README.md ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h1 align="center">
2
+ <a href="shinka/favicon.png?raw=true"><img src="shinka/favicon.png?raw=true" width="180" /></a><br>
3
+ <b><code>ShinkaEvolve</code>: Towards Open-Ended and Sample-Efficient Program Evolution 🧬</b><br>
4
+ </h1>
5
+
6
+ <p align="center">
7
+ <img src="https://img.shields.io/badge/python-%3E%3D3.10-blue" />
8
+ <a href="https://github.com/SakanaAI/ShinkaEvolve/blob/master/LICENSE.md"><img src="https://img.shields.io/badge/license-Apache2.0-blue.svg" /></a>
9
+ <a href="https://github.com/astral-sh/ruff"><img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json" /></a>
10
+ <a href="https://arxiv.org/abs/2509.19349"><img src="http://img.shields.io/badge/paper-arxiv.2509.19349-B31B1B.svg" /></a>
11
+ <a href="https://sakana.ai/shinka-evolve/"><img src="https://img.shields.io/badge/Blog%20%7C%20SakanaAI-0A66C2.svg" /></a>
12
+ <a href="https://colab.research.google.com/github/SakanaAI/ShinkaEvolve/blob/main/examples/shinka_tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" /></a>
13
+ </p>
14
+
15
+
16
+ [`shinka`](https://sakana.ai/shinka-evolve/) is a framework that combines Large Language Models (LLMs) with evolutionary algorithms to drive scientific discovery. By leveraging the creative capabilities of LLMs and the optimization power of evolutionary search, `shinka` enables automated exploration and improvement of scientific code. The system is inspired by the [AI Scientist](https://sakana.ai/ai-scientist/), [AlphaEvolve](https://deepmind.google/discover/blog/alphaevolve-a-gemini-powered-coding-agent-for-designing-advanced-algorithms/) and the [Darwin Goedel Machine](https://sakana.ai/dgm/): It maintains a population of programs that evolve over generations, with an ensemble of LLMs acting as intelligent mutation operators that suggest code improvements.
17
+
18
+ ---
19
+
20
+ **Mar 2026 Update**: Refactored evolution API and unified runner `ShinkaEvolveRunner` (replacing legacy `EvolutionRunner`/`AsyncEvolutionRunner` references).
21
+
22
+ **Feb 2026 Update**: Added [agent skill files](docs/agentic_usage.md) for using `shinka` within coding agents for task generation ([`shinka-setup`](skills/shinka-setup/SKILL.md)), evolution ([`shinka-run`](skills/shinka-run/SKILL.md)), and result inspection ([`shinka-inspect`](skills/shinka-inspect/SKILL.md)).
23
+
24
+ **Jan 2026 Update**: ShinkaEvolve was accepted at ICLR 2026 and we have [released v1.1](docs/release_notes.md) with many new features.
25
+
26
+ **Nov 2025 Update**: Rob gave several public talks about our ShinkaEvolve effort ([Official](https://x.com/SakanaAILabs/status/1989352976792846356?s=20), [AutoML Seminar](https://www.youtube.com/watch?v=dAOIer_1INo)).
27
+
28
+ **Oct 2025 Update** ShinkaEvolve supported Team Unagi in winning the [ICFP 2025 Programming Contest](https://sakana.ai/icfp-2025/).
29
+
30
+ ---
31
+
32
+ The framework supports **parallel evaluation of candidates** locally or on a Slurm cluster. It maintains an archive of successful solutions, enabling knowledge transfer between different evolutionary islands. `shinka` is particularly well-suited for scientific tasks where there is a verifier available and the goal is to optimize performance metrics while maintaining code correctness and readability.
33
+
34
+ ![](docs/media/conceptual.png)
35
+
36
+ ## Documentation 📝
37
+
38
+ | Guide | Description | What You'll Learn |
39
+ |-------|-------------|-------------------|
40
+ | 🚀 **[Getting Started](docs/getting_started.md)** | Installation, basic usage, and examples | Setup, first evolution run, core concepts |
41
+ | 📓 **[Tutorial](examples/shinka_tutorial.ipynb)** | Interactive walkthrough of Shinka features | Hands-on examples, configuration, best practices |
42
+ | ⚙️ **[Configuration](docs/configuration.md)** | Comprehensive configuration reference | All config options, optimization settings, advanced features |
43
+ | 🎨 **[WebUI](docs/webui.md)** | Interactive visualization and monitoring | Real-time tracking, result analysis, debugging tools |
44
+ | ⚡ **[Async Evolution](docs/async_evolution.md)** | High-performance async pipeline (5-10x speedup) | Concurrent processing, proposal/eval concurrency tuning |
45
+ | 🧠 **[Local LLM](docs/support_local_llm.md)** | How to connect and use local LLMs with Shinka | Running open-source models, integration tips, performance notes |
46
+ | 🤖 **[Agentic Usage](docs/agentic_usage.md)** | Run Shinka with Claude/Codex skills | CLI install, skill placement, setup/run workflows |
47
+
48
+
49
+ ## Installation & Quick Start 🚀
50
+
51
+ ```bash
52
+ # Clone repository
53
+ git clone https://github.com/SakanaAI/ShinkaEvolve
54
+
55
+ # Install uv if you haven't already
56
+ curl -LsSf https://astral.sh/uv/install.sh | sh
57
+
58
+ # Create environment and install Shinka
59
+ cd ShinkaEvolve
60
+ uv venv --python 3.11
61
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
62
+ uv pip install -e .
63
+
64
+ # Run your first evolution experiment
65
+ shinka_launch variant=circle_packing_example
66
+ ```
67
+
68
+ For detailed installation instructions and usage examples, see the [Getting Started Guide](docs/getting_started.md).
69
+
70
+ ## Examples 📖
71
+
72
+ | Example | Description | Environment Setup |
73
+ |---------|-------------|-------------------|
74
+ | ⭕ [Circle Packing](examples/circle_packing) | Optimize circle packing to maximize radii. | `LocalJobConfig` |
75
+ | 🎮 [Game 2048](examples/game_2048) | Optimize a policy for the Game of 2048. | `LocalJobConfig` |
76
+ | ∑ [Julia Prime Counting](examples/julia_prime_counting) | Optimize a Julia solver for prime-count queries. | `LocalJobConfig` |
77
+ | ✨ [Novelty Generator](examples/novelty_generator) | Generate creative, surprising outputs (e.g., ASCII art). | `LocalJobConfig` |
78
+
79
+
80
+ ## `shinka` Run with Python API 🐍
81
+
82
+ For the simplest setup with default settings, you only need to specify the evaluation program:
83
+
84
+ ```python
85
+ from shinka.core import ShinkaEvolveRunner, EvolutionConfig
86
+ from shinka.database import DatabaseConfig
87
+ from shinka.launch import LocalJobConfig
88
+
89
+ # Minimal - only specify what's required
90
+ job_conf = LocalJobConfig(eval_program_path="evaluate.py")
91
+ db_conf = DatabaseConfig()
92
+ evo_conf = EvolutionConfig(init_program_path="initial.py")
93
+
94
+ runner = ShinkaEvolveRunner(
95
+ evo_config=evo_conf,
96
+ job_config=job_conf,
97
+ db_config=db_conf,
98
+ max_evaluation_jobs=2,
99
+ max_proposal_jobs=1, # sync-like proposal behavior
100
+ )
101
+ runner.run()
102
+ ```
103
+
104
+ <details>
105
+ <summary><strong>EvolutionConfig Parameters</strong> (click to expand)</summary>
106
+
107
+ Class defaults below come from `shinka/core/config.py` (`EvolutionConfig`). Hydra presets and CLI overrides can replace these values.
108
+
109
+ | Key | Default Value | Type | Explanation |
110
+ |-----|---------------|------|-------------|
111
+ | `task_sys_msg` | `None` | `Optional[str]` | System message describing the optimization task |
112
+ | `patch_types` | `["diff"]` | `List[str]` | Types of patches to generate: "diff", "full", "cross" |
113
+ | `patch_type_probs` | `[1.0]` | `List[float]` | Probabilities for each patch type |
114
+ | `num_generations` | `10` | `int` | Number of evolution generations to run |
115
+ | `max_proposal_jobs` | `1` | `int` | Maximum number of concurrent proposal generation jobs |
116
+ | `max_db_workers` | `4` | `int` | Maximum number of async DB worker threads |
117
+ | `max_patch_resamples` | `3` | `int` | Max times to resample a patch if it fails |
118
+ | `max_patch_attempts` | `5` | `int` | Max attempts to generate a valid patch |
119
+ | `job_type` | `"local"` | `str` | Job execution type: "local", "slurm_docker", "slurm_conda" |
120
+ | `language` | `"python"` | `str` | Programming language for evolution |
121
+ | `llm_models` | `["azure-gpt-4.1-mini"]` | `List[str]` | List of LLM models for code generation |
122
+ | `llm_dynamic_selection` | `None` | `Optional[Union[str, BanditBase]]` | Dynamic model selection strategy |
123
+ | `llm_dynamic_selection_kwargs` | `{}` | `dict` | Kwargs for dynamic selection |
124
+ | `llm_kwargs` | `{}` | `dict` | Additional kwargs for LLM calls |
125
+ | `meta_rec_interval` | `None` | `Optional[int]` | Interval for meta-recommendations |
126
+ | `meta_llm_models` | `None` | `Optional[List[str]]` | LLM models for meta-recommendations |
127
+ | `meta_llm_kwargs` | `{}` | `dict` | Kwargs for meta-recommendation LLMs |
128
+ | `meta_max_recommendations` | `5` | `int` | Max number of meta-recommendations |
129
+ | `sample_single_meta_rec` | `True` | `bool` | Sample a single recommendation from meta output when enabled |
130
+ | `embedding_model` | `None` | `Optional[str]` | Model for code embeddings |
131
+ | `init_program_path` | `"initial.py"` | `Optional[str]` | Path to initial program to evolve |
132
+ | `results_dir` | `None` | `Optional[str]` | Directory to save results (auto-generated if None) |
133
+ | `max_novelty_attempts` | `3` | `int` | Max attempts for novelty generation |
134
+ | `code_embed_sim_threshold` | `1.0` | `float` | Similarity threshold for code embeddings |
135
+ | `novelty_llm_models` | `None` | `Optional[List[str]]` | LLM models for novelty judgment |
136
+ | `novelty_llm_kwargs` | `{}` | `dict` | Kwargs for novelty LLMs |
137
+ | `use_text_feedback` | `False` | `bool` | Whether to use text feedback in evolution |
138
+ | `max_api_costs` | `None` | `Optional[float]` | Total API budget cap (USD); async runner stops new proposals at cap |
139
+ | `inspiration_sort_order` | `"ascending"` | `str` | Inspiration ordering (`"ascending"`, `"chronological"`, `"none"`) |
140
+ | `evolve_prompts` | `False` | `bool` | Enable meta-prompt evolution loop |
141
+ | `prompt_patch_types` | `["diff", "full"]` | `List[str]` | Patch formats used for prompt evolution |
142
+ | `prompt_patch_type_probs` | `[0.7, 0.3]` | `List[float]` | Sampling probabilities for prompt patch formats |
143
+ | `prompt_evolution_interval` | `None` | `Optional[int]` | Prompt-evolution cadence in generations (`None` disables periodic updates) |
144
+ | `prompt_archive_size` | `10` | `int` | Size of system-prompt archive |
145
+ | `prompt_llm_models` | `None` | `Optional[List[str]]` | LLM models for prompt evolution (`None` falls back to `llm_models`) |
146
+ | `prompt_llm_kwargs` | `{}` | `dict` | Extra kwargs for prompt-evolution LLM calls |
147
+ | `prompt_ucb_exploration_constant` | `1.0` | `float` | UCB exploration constant for prompt sampling |
148
+ | `prompt_epsilon` | `0.1` | `float` | Epsilon-greedy exploration probability for prompt sampling |
149
+ | `prompt_evo_top_k_programs` | `3` | `int` | Number of top programs used as context in prompt evolution |
150
+ | `prompt_percentile_recompute_interval` | `20` | `int` | Generations between prompt percentile recomputations |
151
+
152
+ </details>
153
+
154
+ <details>
155
+ <summary><strong>DatabaseConfig Parameters</strong> (click to expand)</summary>
156
+
157
+ Class defaults below come from `shinka/database/dbase.py` (`DatabaseConfig`). Hydra presets and CLI overrides can replace these values.
158
+
159
+ | Key | Default Value | Type | Explanation |
160
+ |-----|---------------|------|-------------|
161
+ | `db_path` | `None` | `Optional[str]` | Database file path (auto-generated if None) |
162
+ | `num_islands` | `4` | `int` | Number of evolution islands for diversity |
163
+ | `archive_size` | `100` | `int` | Global archive size cap |
164
+ | `elite_selection_ratio` | `0.3` | `float` | Proportion of elite programs for inspiration |
165
+ | `num_archive_inspirations` | `5` | `int` | Number of archive programs to use as inspiration |
166
+ | `num_top_k_inspirations` | `2` | `int` | Number of top-k programs for inspiration |
167
+ | `migration_interval` | `10` | `int` | Generations between island migrations |
168
+ | `migration_rate` | `0.1` | `float` | Proportion of island population to migrate |
169
+ | `island_elitism` | `True` | `bool` | Keep best programs on their original islands |
170
+ | `enforce_island_separation` | `True` | `bool` | Enforce full separation between islands |
171
+ | `island_selection_strategy` | `"uniform"` | `str` | Island sampler (`"uniform"`, `"equal"`, `"proportional"`, `"weighted"`) |
172
+ | `enable_dynamic_islands` | `False` | `bool` | Enable stagnation-triggered island spawning |
173
+ | `stagnation_threshold` | `100` | `int` | Generations without improvement before spawning a new island |
174
+ | `island_spawn_strategy` | `"initial"` | `str` | New-island seed strategy (`"initial"`, `"best"`, `"archive_random"`) |
175
+ | `island_spawn_subtree_size` | `1` | `int` | Number of programs copied when spawning an island |
176
+ | `parent_selection_strategy` | `"power_law"` | `str` | Parent selection: "weighted", "power_law", "beam_search" |
177
+ | `exploitation_alpha` | `1.0` | `float` | Power-law exponent (0=uniform, 1=power-law) |
178
+ | `exploitation_ratio` | `0.2` | `float` | Chance to pick parent from archive |
179
+ | `parent_selection_lambda` | `10.0` | `float` | Sharpness of sigmoid for weighted selection |
180
+ | `num_beams` | `5` | `int` | Number of beams for beam search selection |
181
+ | `archive_selection_strategy` | `"fitness"` | `str` | Archive replacement strategy (`"fitness"` or `"crowding"`) |
182
+ | `archive_criteria` | `{"combined_score": 1.0}` | `Dict[str, float]` | Weighted ranking criteria used by fitness archive updates |
183
+
184
+ </details>
185
+
186
+ <details>
187
+ <summary><strong>JobConfig Parameters</strong> (click to expand)</summary>
188
+
189
+ **LocalJobConfig** (for local execution):
190
+ | Key | Default Value | Type | Explanation |
191
+ |-----|---------------|------|-------------|
192
+ | `eval_program_path` | `"evaluate.py"` | `Optional[str]` | Path to evaluation script |
193
+ | `extra_cmd_args` | `{}` | `Dict[str, Any]` | Additional command line arguments |
194
+ | `time` | `None` | `Optional[str]` | Time limit for job execution |
195
+ | `conda_env` | `None` | `Optional[str]` | Conda environment to run jobs in |
196
+
197
+ **SlurmDockerJobConfig** (for SLURM with Docker):
198
+ | Key | Default Value | Type | Explanation |
199
+ |-----|---------------|------|-------------|
200
+ | `eval_program_path` | `"evaluate.py"` | `Optional[str]` | Path to evaluation script |
201
+ | `extra_cmd_args` | `{}` | `Dict[str, Any]` | Additional command line arguments |
202
+ | `image` | `"ubuntu:latest"` | `str` | Docker image to use |
203
+ | `image_tar_path` | `None` | `Optional[str]` | Path to Docker image tar file |
204
+ | `docker_flags` | `""` | `str` | Additional Docker flags |
205
+ | `partition` | `"gpu"` | `str` | SLURM partition to use |
206
+ | `time` | `"01:00:00"` | `str` | Job time limit |
207
+ | `cpus` | `1` | `int` | Number of CPUs to request |
208
+ | `gpus` | `1` | `int` | Number of GPUs to request |
209
+ | `mem` | `"8G"` | `Optional[str]` | Memory to request |
210
+
211
+ **SlurmCondaJobConfig** (for SLURM with Conda):
212
+ | Key | Default Value | Type | Explanation |
213
+ |-----|---------------|------|-------------|
214
+ | `eval_program_path` | `"evaluate.py"` | `Optional[str]` | Path to evaluation script |
215
+ | `extra_cmd_args` | `{}` | `Dict[str, Any]` | Additional command line arguments |
216
+ | `conda_env` | `""` | `str` | Conda environment name |
217
+ | `modules` | `[]` | `Optional[List[str]]` | Environment modules to load |
218
+ | `partition` | `"gpu"` | `str` | SLURM partition to use |
219
+ | `time` | `"01:00:00"` | `str` | Job time limit |
220
+ | `cpus` | `1` | `int` | Number of CPUs to request |
221
+ | `gpus` | `1` | `int` | Number of GPUs to request |
222
+ | `mem` | `"8G"` | `Optional[str]` | Memory to request |
223
+
224
+ </details>
225
+
226
+ ### Evaluation Setup & Initial Solution 🏃
227
+
228
+ To use `ShinkaEvolveRunner`, you need two key files: The **`evaluate.py`** script defines how to test and score your programs - it runs multiple evaluations, validates results, and aggregates them into metrics that guide the `shinka` evolution loop. The **`initial.py`** file contains your starting solution with the core algorithm that will be iteratively improved by LLMs across generations.
229
+
230
+ <table>
231
+ <tr>
232
+ <td width="50%">
233
+
234
+ **`evaluate.py` - Evaluation Script**
235
+
236
+ ```python
237
+ from shinka.core import run_shinka_eval
238
+
239
+ def main(program_path: str,
240
+ results_dir: str):
241
+ metrics, correct, err = run_shinka_eval(
242
+ program_path=program_path,
243
+ results_dir=results_dir,
244
+ experiment_fn_name="run_experiment",
245
+ num_runs=3, # Multi-evals to aggreg.
246
+ run_workers=1, # >1 enables per-run process parallelism
247
+ get_experiment_kwargs=get_kwargs,
248
+ aggregate_metrics_fn=aggregate_fn,
249
+ validate_fn=validate_fn, # Optional
250
+ )
251
+
252
+ def get_kwargs(run_idx: int) -> dict:
253
+ return {"param1": "value", "param2": 42}
254
+
255
+ def aggregate_fn(results: list) -> dict:
256
+ score = results[0]
257
+ text = results[1]
258
+ return {
259
+ "combined_score": float(score),
260
+ "public": {...}, # shinka-visible
261
+ "private": {...}, # shinka-invisible
262
+ "extra_data": {...}, # store as pkl
263
+ "text_feedback": text, # str fb
264
+ }
265
+
266
+ if __name__ == "__main__":
267
+ # argparse program path & dir
268
+ main(program_path, results_dir)
269
+ ```
270
+
271
+ </td>
272
+ <td width="50%">
273
+
274
+ **`initial.py` - Starting Solution**
275
+
276
+ ```python
277
+ # EVOLVE-BLOCK-START
278
+ def advanced_algo():
279
+ # This will be evolved
280
+ return solution
281
+ # EVOLVE-BLOCK-END
282
+
283
+ def run_experiment(**kwargs):
284
+ """Main called by evaluator"""
285
+ result = solve_problem(kwargs)
286
+ return result
287
+
288
+ def solve_problem(params):
289
+ solution = advanced_algo()
290
+ return solution
291
+ ```
292
+
293
+ **Key Points:**
294
+ - Eval name matches `experiment_fn_name`
295
+ - Use `EVOLVE-BLOCK-START` and `EVOLVE-BLOCK-END` to mark evolution sections
296
+ - Return format matches validation expectations
297
+ - Dependencies must be available in env
298
+ - Results can be unpacked for metrics
299
+ - Auto-stores several results in `results_dir`
300
+ - Can add text feedback in `shinka` loop
301
+ - Higher `combined_score` values indicate better performance (maximization)
302
+
303
+ </td>
304
+ </tr>
305
+ </table>
306
+
307
+
308
+ ## `shinka` Launcher with Hydra 🚀
309
+
310
+ `shinka` Launcher utilizes [Hydra](https://hydra.cc/) to configure and launch evolutionary experiments effortlessly. It supports concise configuration via Hydra's powerful override syntax, making it easy to manage and iterate scientific explorations.
311
+
312
+ ```bash
313
+ # Run with pre-configured variant
314
+ shinka_launch variant=circle_packing_example
315
+
316
+ # Run with custom parameters
317
+ shinka_launch \
318
+ task=circle_packing \
319
+ database=island_large \
320
+ evolution=small_budget \
321
+ cluster=local \
322
+ evo_config.num_generations=20
323
+ ```
324
+
325
+ For comprehensive configuration options and advanced usage, see the [Configuration Guide](docs/configuration.md).
326
+
327
+ ## `shinka_run` Agent CLI 🤖
328
+
329
+ `shinka_run` is a task-directory launcher for async evolution. It is designed for agent workflows and does not require Hydra config files.
330
+
331
+ ```bash
332
+ # Inspect full interface (detailed help)
333
+ shinka_run --help
334
+
335
+ # Minimal run
336
+ shinka_run \
337
+ --task-dir examples/circle_packing \
338
+ --results_dir results/circle_agent_run \
339
+ --num_generations 20
340
+
341
+ # Run with keyword overrides
342
+ shinka_run \
343
+ --task-dir examples/circle_packing \
344
+ --results_dir results/circle_agent_custom \
345
+ --num_generations 50 \
346
+ --max-evaluation-jobs 6 \
347
+ --set db.num_islands=3 \
348
+ --set job.time=00:10:00 \
349
+ --set evo.llm_models='["gpt-5-mini","gpt-5-nano"]'
350
+
351
+ # Load optional YAML config (relative to --task-dir), then override via --set
352
+ shinka_run \
353
+ --task-dir examples/circle_packing \
354
+ --config-fname shinka_small.yaml \
355
+ --results_dir results/circle_agent_from_yaml \
356
+ --num_generations 50 \
357
+ --set db.num_islands=3
358
+ ```
359
+
360
+ `--task-dir` must contain `evaluate.py` and `initial.<ext>`.
361
+ `--config-fname` can define `evo/db/job` (or `evo_config/db_config/job_config`) plus `max_evaluation_jobs/max_proposal_jobs/max_db_workers` and `verbose/debug`.
362
+ Precedence: config YAML < `--set` < authoritative flags.
363
+ `--results_dir` and `--num_generations` are authoritative and always override config/`--set` values for `evo.results_dir` and `evo.num_generations`.
364
+
365
+
366
+ ## Interactive WebUI 🎨
367
+
368
+ Monitor your evolution experiments in real-time with Shinka's interactive web interface! The WebUI provides live visualization of the evolutionary process, genealogy trees, and performance metrics.
369
+
370
+ ![WebUI Screenshot](docs/media/webui.png)
371
+
372
+ ### Quick Start
373
+
374
+ Launch the WebUI alongside your evolution experiment:
375
+
376
+ ```bash
377
+ # Start your evolution experiment
378
+ shinka_launch variant=circle_packing_example
379
+
380
+ # In another terminal, launch the WebUI
381
+ shinka_visualize --port 8888 --open
382
+ ```
383
+
384
+ For detailed WebUI documentation, see the [WebUI Guide](docs/webui.md).
385
+
386
+ ## Related Open-Source Projects 🧑‍🔧
387
+
388
+ - [OpenEvolve](https://github.com/codelion/openevolve): An open-source implementation of AlphaEvolve
389
+ - [LLM4AD](https://github.com/Optima-CityU/llm4ad): A Platform for Algorithm Design with Large Language Model
390
+
391
+ ## Citation ✍️
392
+
393
+ If you use `ShinkaEvolve` in your research, please cite it as follows:
394
+
395
+ ```
396
+ @article{lange2025shinka,
397
+ title={ShinkaEvolve: Towards Open-Ended And Sample-Efficient Program Evolution},
398
+ author={Lange, Robert Tjarko and Imajuku, Yuki and Cetin, Edoardo},
399
+ journal={arXiv preprint arXiv:2509.19349},
400
+ year={2025}
401
+ }
402
+ ```
ccevolve/baselines/shinkaevolve/configs/cluster/gcp.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ defaults:
2
+ - remote
3
+ - _self_
4
+
5
+ distributed_job_config:
6
+ partition: "a3,aisci"
ccevolve/baselines/shinkaevolve/configs/cluster/local.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ job_config:
2
+ _target_: shinka.launch.LocalJobConfig
3
+ eval_program_path: ${distributed_job_config.eval_program_path}
4
+
5
+ evo_config:
6
+ job_type: "local"
ccevolve/baselines/shinkaevolve/configs/cluster/remote.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ job_config: ${distributed_job_config}
ccevolve/baselines/shinkaevolve/configs/config.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - database@_global_: island_small
4
+ - evolution@_global_: small_budget
5
+ - task@_global_: circle_packing
6
+ - cluster@_global_: local
7
+ - variant@_global_: circle_packing_example
8
+
9
+ verbose: false
10
+ results_dir: results
11
+ run_name: ${now:%Y.%m.%d}${now:%H%M%S}
12
+
13
+ output_dir: ${results_dir}/${exp_name}/${run_name}${variant_suffix}
14
+
15
+ hydra:
16
+ run:
17
+ dir: ${output_dir}
ccevolve/baselines/shinkaevolve/configs/database/island_large.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from circle packing example TODO: update
2
+ db_config:
3
+ _target_: shinka.database.DatabaseConfig
4
+ db_path: "evolution_db.sqlite"
5
+ num_islands: 5
6
+ archive_size: 40
7
+ # inspiration parameters
8
+ elite_selection_ratio: 0.3
9
+ num_archive_inspirations: 4
10
+ num_top_k_inspirations: 2
11
+ # island migration parameters
12
+ migration_interval: 10
13
+ migration_rate: 0.1
14
+ island_elitism: true
15
+ # parent selection parameters
16
+ parent_selection_strategy: "weighted"
17
+ # power-law parent selection parameters
18
+ exploitation_alpha: 1.0
19
+ exploitation_ratio: 0.2
20
+ # weighted tree parent selection parameters
21
+ parent_selection_lambda: 10.0
ccevolve/baselines/shinkaevolve/configs/database/island_medium.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from mad example TODO: update
2
+ db_config:
3
+ _target_: shinka.database.DatabaseConfig
4
+ db_path: "evolution_db.sqlite"
5
+ num_islands: 2
6
+ archive_size: 40
7
+ exploitation_ratio: 0.2
8
+ elite_selection_ratio: 0.3
9
+ num_archive_inspirations: 4
10
+ num_top_k_inspirations: 2
11
+ migration_interval: 10
12
+ migration_rate: 0.0
13
+ island_elitism: true
14
+ parent_selection_strategy: "weighted"
15
+ parent_selection_lambda: 10.0
ccevolve/baselines/shinkaevolve/configs/database/island_small.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from mad example TODO: update
2
+ db_config:
3
+ _target_: shinka.database.DatabaseConfig
4
+ db_path: "evolution_db.sqlite"
5
+ num_islands: 2
6
+ archive_size: 20
7
+ exploitation_ratio: 0.2
8
+ elite_selection_ratio: 0.3
9
+ num_archive_inspirations: 4
10
+ num_top_k_inspirations: 2
11
+ migration_interval: 10
12
+ migration_rate: 0.1
13
+ island_elitism: true
ccevolve/baselines/shinkaevolve/configs/evolution/large_budget.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_evaluation_jobs: 6
2
+
3
+ evo_config:
4
+ _target_: shinka.core.EvolutionConfig
5
+ patch_types:
6
+ - "diff"
7
+ - "full"
8
+ - "cross"
9
+ patch_type_probs:
10
+ - 0.4
11
+ - 0.4
12
+ - 0.2
13
+ num_generations: 300
14
+ max_proposal_jobs: 1
15
+ max_db_workers: 4
16
+ max_patch_resamples: 3
17
+ max_patch_attempts: 3
18
+ llm_models:
19
+ - "gpt-4.1"
20
+ - "gpt-4.1-mini"
21
+ - "gpt-4.1-nano"
22
+ - "us.anthropic.claude-sonnet-4-20250514-v1:0"
23
+ - "o4-mini"
24
+ llm_dynamic_selection: ucb
25
+ llm_kwargs:
26
+ temperatures:
27
+ - 0.0
28
+ - 0.5
29
+ - 1.0
30
+ max_tokens: 16384
31
+ meta_rec_interval: 10
32
+ meta_llm_models:
33
+ - "gpt-4.1"
34
+ meta_llm_kwargs:
35
+ temperatures:
36
+ - 0.0
37
+ embedding_model: "text-embedding-3-small"
38
+ results_dir: ${output_dir}
39
+
ccevolve/baselines/shinkaevolve/configs/evolution/medium_budget.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_evaluation_jobs: 10
2
+
3
+ evo_config:
4
+ _target_: shinka.core.EvolutionConfig
5
+ patch_types:
6
+ - "diff"
7
+ - "full"
8
+ - "cross"
9
+ patch_type_probs:
10
+ - 0.6
11
+ - 0.3
12
+ - 0.1
13
+ num_generations: 100
14
+ max_proposal_jobs: 1
15
+ max_db_workers: 4
16
+ max_patch_resamples: 3
17
+ max_patch_attempts: 3
18
+ llm_models:
19
+ - "gemini-2.5-pro"
20
+ - "gemini-2.5-flash"
21
+ - "gpt-4.1-mini"
22
+ - "gpt-4.1-nano"
23
+ - "us.anthropic.claude-sonnet-4-20250514-v1:0"
24
+ - "o4-mini"
25
+ llm_dynamic_selection: ucb
26
+ llm_kwargs:
27
+ temperatures:
28
+ - 0.0
29
+ - 0.5
30
+ - 1.0
31
+ max_tokens: 16384
32
+ meta_rec_interval: 10
33
+ meta_llm_models:
34
+ - "gpt-4.1"
35
+ meta_llm_kwargs:
36
+ temperatures:
37
+ - 0.0
38
+ embedding_model: "text-embedding-3-small"
39
+ results_dir: ${output_dir}
40
+
ccevolve/baselines/shinkaevolve/configs/evolution/small_budget.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ max_evaluation_jobs: 1
2
+
3
+ evo_config:
4
+ _target_: shinka.core.EvolutionConfig
5
+ patch_types:
6
+ - "diff"
7
+ - "full"
8
+ patch_type_probs:
9
+ - 0.5
10
+ - 0.5
11
+ num_generations: 20
12
+ max_proposal_jobs: 1
13
+ max_db_workers: 4
14
+ max_patch_attempts: 10
15
+ llm_models:
16
+ - "gpt-4.1"
17
+ llm_dynamic_selection: null
18
+ embedding_model: "text-embedding-3-small"
19
+ results_dir: ${output_dir}
ccevolve/baselines/shinkaevolve/configs/task/circle_packing.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ evaluate_function:
2
+ _target_: examples.circle_packing.evaluate.main
3
+ program_path: ???
4
+ results_dir: ???
5
+
6
+ distributed_job_config:
7
+ _target_: shinka.launch.SlurmCondaJobConfig
8
+ modules:
9
+ - "cuda/12.4"
10
+ - "cudnn/8.9.7"
11
+ - "hpcx/2.20"
12
+ eval_program_path: "shinka/eval_hydra.py"
13
+ conda_env: "shinka"
14
+ time: "00:10:00"
15
+ cpus: 1
16
+ gpus: 0
17
+ mem: "8G"
18
+
19
+ evo_config:
20
+ task_sys_msg: |
21
+ You are an expert mathematician specializing in circle packing problems and computational geometry. The best known result for the sum of radii when packing 26 circles in a unit square is 2.635.
22
+
23
+ Key insights to explore:
24
+ 1. The optimal arrangement likely involves variable-sized circles
25
+ 2. A pure hexagonal arrangement may not be optimal due to edge effects
26
+ 3. The densest known circle packings often use a hybrid approach
27
+ 4. The optimization routine is critically important - simple physics-based models with carefully tuned parameters
28
+ 5. Consider strategic placement of circles at square corners and edges
29
+ 6. Adjusting the pattern to place larger circles at the center and smaller at the edges
30
+ 7. The math literature suggests special arrangements for specific values of n
31
+
32
+ Be creative and try to find a new solution.
33
+ language: "python"
34
+ init_program_path: "examples/circle_packing/initial.py"
35
+ job_type: "slurm_conda"
36
+
37
+ exp_name: "shinka_circle_packing"
ccevolve/baselines/shinkaevolve/configs/task/novelty_generator.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ novelty_generator_number_of_samples: 20
2
+
3
+ evaluate_function:
4
+ _target_: examples.novelty_generator.evaluate.evaluate_with_lm_judge
5
+ program_path: ???
6
+ results_dir: ???
7
+ lm_input_and_output_processors:
8
+ _target_: examples.novelty_generator.lm_judge_prompt.make_lm_input_and_output_processors
9
+ number_of_samples: ${novelty_generator_number_of_samples}
10
+ llm_judge_names:
11
+ - "bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0"
12
+ llm_judge_kwargs:
13
+ temperatures: 0.0
14
+ max_tokens: 8196
15
+ reasoning_efforts: "low"
16
+ model_sample_probs: null
17
+ output_model: null
18
+ verbose: true
19
+ limit_max_characters: 1000000
20
+ num_samples: ${novelty_generator_number_of_samples}
21
+ seed: 42
22
+
23
+ distributed_job_config:
24
+ _target_: shinka.launch.SlurmCondaJobConfig
25
+ modules:
26
+ - "cuda/12.4"
27
+ - "cudnn/8.9.7"
28
+ - "hpcx/2.20"
29
+ eval_program_path: "shinka/eval_hydra.py"
30
+ conda_env: "shinka"
31
+ time: "00:10:00"
32
+ cpus: 1
33
+ gpus: 0
34
+ mem: "8G"
35
+
36
+ evo_config:
37
+ task_sys_msg: |
38
+ Make a python function that takes as input a random integer and produces a piece of art that is cool and novel. Depending on its input, each output should be diverse from all other outputs produced with different inputs. Please, call this function "def generate_novelty(rng: int) -> str"
39
+
40
+ Different judges will evaluate how 1) diverse, 2) meaningful, and 3) inspirational the generated outputs are for different random seeds. These three criteria will be used to assign your function a "final_novelty_score" for each judge. Only functions excelling across all three dimensions will achieve a high "final_novelty_score".
41
+
42
+ Now bring out your creativity, it's time to surprise us!
43
+ language: "python"
44
+ init_program_path:
45
+ _target_: shinka.utils.add_evolve_markers
46
+ save_dir: ${output_dir}
47
+ initial_file_path: examples/novelty_generator/initial.py
48
+ insert_start: generate_novelty
49
+ insert_end: generate_novelty
50
+ job_type: "slurm_conda"
51
+
52
+ exp_name: "shinka_novelty_generator_llm_judge"
ccevolve/baselines/shinkaevolve/configs/variant/circle_packing_example.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - override /database@_global_: island_large
3
+ - override /evolution@_global_: large_budget
4
+ - override /task@_global_: circle_packing
5
+ - override /cluster@_global_: local
6
+ - _self_
7
+
8
+ variant_suffix: "_example"
ccevolve/baselines/shinkaevolve/configs/variant/default.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ variant_suffix: "_default"
ccevolve/baselines/shinkaevolve/configs/variant/novelty_generator_example.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - override /database@_global_: island_medium
3
+ - override /evolution@_global_: medium_budget
4
+ - override /task@_global_: novelty_generator
5
+ - override /cluster@_global_: local
6
+ - _self_
7
+
8
+ variant_suffix: "_example"
ccevolve/baselines/shinkaevolve/docs/agentic_usage.md ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agentic Usage Guide
2
+
3
+ This guide shows how to run Shinka with coding agents using the project skills:
4
+
5
+ - `shinka-setup`: scaffold task files (`evaluate.py`, `initial.<ext>`, optional run config)
6
+ - `shinka-run`: launch and iterate evolution batches via `shinka_run`
7
+ - `shinka-inspect`: load top-performing programs into a compact context bundle
8
+
9
+ It covers:
10
+ - installing Shinka
11
+ - installing Claude Code and/or Codex CLI
12
+ - copying skill files to the right skill directories
13
+ - running a practical setup -> run -> inspect loop
14
+
15
+ ## 1) Install Shinka
16
+
17
+ From a clean machine:
18
+
19
+ ```bash
20
+ git clone https://github.com/SakanaAI/ShinkaEvolve.git
21
+ cd ShinkaEvolve
22
+
23
+ uv venv --python 3.11
24
+ source .venv/bin/activate
25
+ uv pip install -e .
26
+ ```
27
+
28
+ Set API keys (example):
29
+
30
+ ```bash
31
+ cp .env.example .env 2>/dev/null || true
32
+ # Edit .env and add OPENAI_API_KEY / ANTHROPIC_API_KEY as needed
33
+ ```
34
+
35
+ ## 2) Install Agent CLI(s)
36
+
37
+ Install one or both.
38
+
39
+ ### Claude Code
40
+
41
+ ```bash
42
+ npm install -g @anthropic-ai/claude-code
43
+ claude --version
44
+ ```
45
+
46
+ ### Codex CLI
47
+
48
+ ```bash
49
+ npm install -g @openai/codex
50
+ codex --version
51
+ ```
52
+
53
+ ## 3) Copy Skills to Agent Skill Folders
54
+
55
+ Skill source files in this repo:
56
+
57
+ - `skills/shinka-setup/SKILL.md`
58
+ - `skills/shinka-run/SKILL.md`
59
+ - `skills/shinka-inspect/SKILL.md`
60
+ - optional helper scripts for setup skill:
61
+ - `skills/shinka-setup/scripts/run_evo.py`
62
+ - `skills/shinka-setup/scripts/shinka.yaml`
63
+ - helper script for inspect skill:
64
+ - `skills/shinka-inspect/scripts/inspect_best_programs.py`
65
+
66
+ ### Claude Code skill path
67
+
68
+ ```bash
69
+ mkdir -p ~/.claude/skills/shinka-setup ~/.claude/skills/shinka-run ~/.claude/skills/shinka-inspect
70
+ cp skills/shinka-setup/SKILL.md ~/.claude/skills/shinka-setup/SKILL.md
71
+ cp -R skills/shinka-setup/scripts ~/.claude/skills/shinka-setup/
72
+ cp skills/shinka-run/SKILL.md ~/.claude/skills/shinka-run/SKILL.md
73
+ cp skills/shinka-inspect/SKILL.md ~/.claude/skills/shinka-inspect/SKILL.md
74
+ cp -R skills/shinka-inspect/scripts ~/.claude/skills/shinka-inspect/
75
+ ```
76
+
77
+ ### Codex skill path
78
+
79
+ ```bash
80
+ mkdir -p ~/.codex/skills/shinka-setup ~/.codex/skills/shinka-run ~/.codex/skills/shinka-inspect
81
+ cp skills/shinka-setup/SKILL.md ~/.codex/skills/shinka-setup/SKILL.md
82
+ cp -R skills/shinka-setup/scripts ~/.codex/skills/shinka-setup/
83
+ cp skills/shinka-run/SKILL.md ~/.codex/skills/shinka-run/SKILL.md
84
+ cp skills/shinka-inspect/SKILL.md ~/.codex/skills/shinka-inspect/SKILL.md
85
+ cp -R skills/shinka-inspect/scripts ~/.codex/skills/shinka-inspect/
86
+ ```
87
+
88
+ ## 4) Setup Skill Walkthrough (`shinka-setup`)
89
+
90
+ Ask the agent to scaffold a new task directory and evaluator contract.
91
+
92
+ Example prompt:
93
+
94
+ ```text
95
+ Use shinka-setup to scaffold a new task in examples/my_task.
96
+ Language: python.
97
+ Goal: maximize <metric>.
98
+ ```
99
+
100
+ Illustration (setup flow):
101
+
102
+ ![Claude setup step 1](media/claude_setup_1.png)
103
+
104
+ ![Claude setup step 2](media/claude_setup_2.png)
105
+
106
+ Expected output:
107
+ - `initial.<ext>` with evolve block
108
+ - `evaluate.py` producing `metrics.json` + `correct.json`
109
+ - optional `run_evo.py` / `shinka.yaml` scaffolds when requested
110
+
111
+ ## 5) Run Skill Walkthrough (`shinka-run`)
112
+
113
+ Use `shinka_run` for agent-driven evolution loops.
114
+
115
+ Minimal batch:
116
+
117
+ ```bash
118
+ shinka_run \
119
+ --task-dir examples/my_task \
120
+ --results_dir results/my_task_agent \
121
+ --num_generations 10
122
+ ```
123
+
124
+ With core knobs via `--set`:
125
+
126
+ ```bash
127
+ shinka_run \
128
+ --task-dir examples/my_task \
129
+ --results_dir results/my_task_agent \
130
+ --num_generations 20 \
131
+ --set evo.max_api_costs=0.5 \
132
+ --set evo.llm_models='["gpt-5-mini","gpt-5-nano"]' \
133
+ --set db.num_islands=3 \
134
+ --set db.parent_selection_strategy=weighted
135
+ ```
136
+
137
+ Illustration (run flow):
138
+
139
+ ![Claude run step 1](media/claude_run_1.png)
140
+
141
+ ![Claude run step 2](media/claude_run_2.png)
142
+
143
+ ## 6) Inspect Skill Walkthrough (`shinka-inspect`)
144
+
145
+ Use `shinka-inspect` after one or more batches to generate an agent-ready context file.
146
+
147
+ Minimal:
148
+
149
+ ```bash
150
+ python skills/shinka-inspect/scripts/inspect_best_programs.py \
151
+ --results-dir results/my_task_agent \
152
+ --k 5
153
+ ```
154
+
155
+ With filters and explicit output:
156
+
157
+ ```bash
158
+ python skills/shinka-inspect/scripts/inspect_best_programs.py \
159
+ --results-dir results/my_task_agent \
160
+ --k 8 \
161
+ --min-generation 10 \
162
+ --max-code-chars 5000 \
163
+ --out results/my_task_agent/inspect/top_programs.md
164
+ ```
165
+
166
+ Output:
167
+ - default file: `results/my_task_agent/shinka_inspect_context.md`
168
+ - contains ranking + code snippets for top programs
169
+ - designed to be loaded directly into coding-agent context
170
+
171
+ ## 7) Batch Iteration Rules (Important)
172
+
173
+ When using `shinka-run` skill:
174
+
175
+ - unless user explicitly requests fully autonomous execution, ask for config confirmation between batches
176
+ - keep `--results_dir` the same across continuation batches so prior state can reload
177
+ - change `--results_dir` only when intentionally forking a new run
178
+
179
+ ## 8) Quick Validation Checklist
180
+
181
+ Before first run:
182
+
183
+ - `shinka_run --help` works
184
+ - task dir has `evaluate.py` + `initial.<ext>`
185
+ - API keys are available in environment
186
+ - skill files are installed under `~/.claude/skills` and/or `~/.codex/skills`
187
+
188
+ After each batch:
189
+
190
+ - check run artifacts/logs under the chosen `results_dir`
191
+ - review score and correctness trend
192
+ - run `shinka-inspect` and review the generated context markdown
193
+ - choose next batch config (budget, models, islands, attempts, generations)
ccevolve/baselines/shinkaevolve/docs/async_evolution.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Async Evolution Pipeline
2
+
3
+ Shinka runs evolution through `ShinkaEvolveRunner`.
4
+ Use proposal concurrency to control throughput and emulate prior sync behavior.
5
+
6
+ ## Quick Start
7
+
8
+ ```python
9
+ from shinka.core import ShinkaEvolveRunner, EvolutionConfig
10
+ from shinka.launch import LocalJobConfig
11
+ from shinka.database import DatabaseConfig
12
+
13
+
14
+ evo_config = EvolutionConfig(
15
+ num_generations=50,
16
+ max_proposal_jobs=1, # sync-like proposal behavior
17
+ llm_models=["gpt-5-mini"],
18
+ )
19
+
20
+ runner = ShinkaEvolveRunner(
21
+ evo_config=evo_config,
22
+ job_config=LocalJobConfig(eval_program_path="evaluate.py"),
23
+ db_config=DatabaseConfig(),
24
+ )
25
+
26
+ runner.run()
27
+ ```
28
+
29
+ In async contexts (for example notebooks/async apps), use:
30
+
31
+ ```python
32
+ await runner.run_async()
33
+ ```
34
+
35
+ ## Concurrency Knobs
36
+
37
+ - `max_evaluation_jobs`: max concurrent evaluation jobs.
38
+ - `max_proposal_jobs`: max concurrent proposal generation jobs.
39
+ - `max_db_workers`: max async database worker threads.
40
+
41
+ `max_proposal_jobs=1` gives sequential proposal generation behavior.
42
+
43
+ ## ShinkaEvolveRunner Parameters
44
+
45
+ ```python
46
+ ShinkaEvolveRunner(
47
+ evo_config=EvolutionConfig(...),
48
+ job_config=JobConfig(...),
49
+ db_config=DatabaseConfig(...),
50
+ verbose=True,
51
+ max_evaluation_jobs=2,
52
+ max_proposal_jobs=None, # defaults to evo_config.max_proposal_jobs
53
+ max_db_workers=None, # defaults to evo_config.max_db_workers
54
+ )
55
+ ```
56
+
57
+ ## Recommended Settings
58
+
59
+ | Scale | max_evaluation_jobs | max_proposal_jobs |
60
+ |-------|-------------------|-------------------|
61
+ | Sequential-like | 1-4 | 1 |
62
+ | Small | <= 10 | 2-5 |
63
+ | Medium | 10-50 | 5-10 |
64
+ | Large | 50+ | 10-20 |
65
+
66
+ ## Troubleshooting
67
+
68
+ - Too many requests: reduce `max_proposal_jobs`.
69
+ - Memory pressure: lower `max_proposal_jobs` and `max_evaluation_jobs`.
70
+ - DB contention: lower `max_db_workers`.
71
+ - File I/O errors: ensure `aiofiles` installed.
ccevolve/baselines/shinkaevolve/docs/configuration.md ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Shinka Configuration Guide ⚙️
2
+
3
+ This document is synced to the current code + config files in this repo.
4
+
5
+ ## Default Layers (Source of Truth)
6
+
7
+ Configuration values are resolved in this order (later wins):
8
+
9
+ 1. Dataclass defaults in code:
10
+ - `shinka/core/config.py` (`EvolutionConfig`)
11
+ - `shinka/database/dbase.py` (`DatabaseConfig`)
12
+ - `shinka/launch/scheduler.py` (`LocalJobConfig`, `SlurmDockerJobConfig`, `SlurmCondaJobConfig`)
13
+ 2. Hydra preset YAMLs in `configs/`
14
+ 3. Task/cluster/variant overrides from Hydra composition
15
+ 4. CLI overrides (`shinka_launch ... key=value`, or `shinka_run --set ...`)
16
+ 5. Authoritative `shinka_run` flags (`--results_dir`, `--num_generations`)
17
+
18
+ ## Runtime Config Objects
19
+
20
+ ### EvolutionConfig (`shinka.core.EvolutionConfig`)
21
+
22
+ | Parameter | Type | Default | Description |
23
+ |-----------|------|---------|-------------|
24
+ | `task_sys_msg` | `Optional[str]` | `None` | Task-specific system prompt. |
25
+ | `patch_types` | `List[str]` | `['diff']` | Patch formats; supports `diff`, `full`, `cross`. |
26
+ | `patch_type_probs` | `List[float]` | `[1.0]` | Sampling probabilities for `patch_types` (must sum to 1). |
27
+ | `num_generations` | `int` | `10` | Target number of generations. |
28
+ | `max_proposal_jobs` | `int` | `1` | Max concurrent proposal-generation tasks. |
29
+ | `max_db_workers` | `int` | `4` | Max async DB worker threads. |
30
+ | `max_patch_resamples` | `int` | `3` | Max patch resample loops per novelty attempt. |
31
+ | `max_patch_attempts` | `int` | `5` | Max attempts to produce a syntactically valid patch. |
32
+ | `job_type` | `str` | `'local'` | Job backend: `local`, `slurm_docker`, `slurm_conda`. |
33
+ | `language` | `str` | `'python'` | Language tag for prompts + file handling. |
34
+ | `llm_models` | `List[str]` | `['azure-gpt-4.1-mini']` | Mutation model pool. |
35
+ | `llm_dynamic_selection` | `Optional[Union[str, BanditBase]]` | `None` | Dynamic model selection (`fixed`, `ucb`, `ucb1`, `thompson`, or bandit object). |
36
+ | `llm_dynamic_selection_kwargs` | `dict` | `{}` | kwargs forwarded to selected bandit. |
37
+ | `llm_kwargs` | `dict` | `{}` | kwargs forwarded to LLM calls. |
38
+ | `meta_rec_interval` | `Optional[int]` | `None` | Generation interval for meta recommendations. |
39
+ | `meta_llm_models` | `Optional[List[str]]` | `None` | Model pool for meta-recommendations. |
40
+ | `meta_llm_kwargs` | `dict` | `{}` | kwargs for meta-recommendation LLM calls. |
41
+ | `meta_max_recommendations` | `int` | `5` | Max recommendations produced per meta step. |
42
+ | `sample_single_meta_rec` | `bool` | `True` | Whether to sample one recommendation when multiple exist. |
43
+ | `embedding_model` | `Optional[str]` | `None` | Embedding model for code similarity. |
44
+ | `init_program_path` | `Optional[str]` | `'initial.py'` | Initial program path. |
45
+ | `results_dir` | `Optional[str]` | `None` | Results directory; auto-assigned when `None`. |
46
+ | `max_novelty_attempts` | `int` | `3` | Max novelty loops per generation. |
47
+ | `code_embed_sim_threshold` | `float` | `1.0` | Similarity threshold used by novelty checks. |
48
+ | `novelty_llm_models` | `Optional[List[str]]` | `None` | Optional novelty-judge model pool. |
49
+ | `novelty_llm_kwargs` | `dict` | `{}` | kwargs for novelty-judge LLM calls. |
50
+ | `use_text_feedback` | `bool` | `False` | Include text feedback in mutation prompts. |
51
+ | `max_api_costs` | `Optional[float]` | `None` | API budget cap in USD; stops new submissions at cap. |
52
+ | `inspiration_sort_order` | `str` | `'ascending'` | Inspiration ordering (`ascending`, `chronological`, `none`). |
53
+ | `evolve_prompts` | `bool` | `False` | Enable system-prompt evolution. |
54
+ | `prompt_patch_types` | `List[str]` | `['diff', 'full']` | Patch formats for prompt evolution. |
55
+ | `prompt_patch_type_probs` | `List[float]` | `[0.7, 0.3]` | Sampling probabilities for prompt patch formats. |
56
+ | `prompt_evolution_interval` | `Optional[int]` | `None` | Prompt-evolution interval in generations. |
57
+ | `prompt_archive_size` | `int` | `10` | Prompt archive size. |
58
+ | `prompt_llm_models` | `Optional[List[str]]` | `None` | Prompt-evolution model pool (falls back to `llm_models`). |
59
+ | `prompt_llm_kwargs` | `dict` | `{}` | kwargs for prompt-evolution LLM calls. |
60
+ | `prompt_ucb_exploration_constant` | `float` | `1.0` | UCB exploration constant for prompt sampler. |
61
+ | `prompt_epsilon` | `float` | `0.1` | Epsilon-greedy exploration for prompt sampler. |
62
+ | `prompt_evo_top_k_programs` | `int` | `3` | Number of top programs used during prompt evolution. |
63
+ | `prompt_percentile_recompute_interval` | `int` | `20` | Generations between prompt percentile recomputations. |
64
+
65
+ ### DatabaseConfig (`shinka.database.DatabaseConfig`)
66
+
67
+ | Parameter | Type | Default | Description |
68
+ |-----------|------|---------|-------------|
69
+ | `db_path` | `Optional[str]` | `None` | SQLite DB path. |
70
+ | `num_islands` | `int` | `4` | Number of islands. |
71
+ | `archive_size` | `int` | `100` | Global archive size cap. |
72
+ | `elite_selection_ratio` | `float` | `0.3` | Fraction of elite inspirations. |
73
+ | `num_archive_inspirations` | `int` | `5` | Number of archive inspirations sampled. |
74
+ | `num_top_k_inspirations` | `int` | `2` | Number of top-k inspirations sampled. |
75
+ | `migration_interval` | `int` | `10` | Generations between migration events. |
76
+ | `migration_rate` | `float` | `0.1` | Fraction of programs migrated at migration events. |
77
+ | `island_elitism` | `bool` | `True` | Preserve best programs on islands. |
78
+ | `enforce_island_separation` | `bool` | `True` | Restrict inspiration sampling to source island. |
79
+ | `island_selection_strategy` | `str` | `'uniform'` | Island sampler: `uniform`, `equal`, `proportional`, `weighted`. |
80
+ | `enable_dynamic_islands` | `bool` | `False` | Enable stagnation-triggered island spawning. |
81
+ | `stagnation_threshold` | `int` | `100` | No-improvement generations before spawn. |
82
+ | `island_spawn_strategy` | `str` | `'initial'` | Spawn seed: `initial`, `best`, `archive_random`. |
83
+ | `island_spawn_subtree_size` | `int` | `1` | Number of copied programs when spawning. |
84
+ | `parent_selection_strategy` | `str` | `'power_law'` | Parent selector: `weighted`, `power_law`, `beam_search`. |
85
+ | `exploitation_alpha` | `float` | `1.0` | Power-law strength for parent selection. |
86
+ | `exploitation_ratio` | `float` | `0.2` | Probability of selecting from archive. |
87
+ | `parent_selection_lambda` | `float` | `10.0` | Sigmoid sharpness for weighted parent selection. |
88
+ | `num_beams` | `int` | `5` | Beam count for beam-search parent selection. |
89
+ | `archive_selection_strategy` | `str` | `'fitness'` | Archive replacement strategy: `fitness` or `crowding`. |
90
+ | `archive_criteria` | `Dict[str, float]` | `{'combined_score': 1.0}` | Weighted criteria for fitness archive scoring. |
91
+
92
+ ### Job Configs (`shinka.launch.*JobConfig`)
93
+
94
+ `JobConfig` base fields:
95
+
96
+ | Parameter | Type | Default | Description |
97
+ |-----------|------|---------|-------------|
98
+ | `eval_program_path` | `Optional[str]` | `'evaluate.py'` | Evaluation script path. |
99
+ | `extra_cmd_args` | `Dict[str, Any]` | `{}` | Extra CLI args forwarded to eval script. |
100
+
101
+ `LocalJobConfig` adds:
102
+
103
+ | Parameter | Type | Default | Description |
104
+ |-----------|------|---------|-------------|
105
+ | `time` | `Optional[str]` | `None` | Optional timeout (`HH:MM:SS`). |
106
+ | `conda_env` | `Optional[str]` | `None` | Optional conda env for local execution. |
107
+
108
+ `SlurmDockerJobConfig` adds:
109
+
110
+ | Parameter | Type | Default | Description |
111
+ |-----------|------|---------|-------------|
112
+ | `image` | `str` | `'ubuntu:latest'` | Docker image. |
113
+ | `image_tar_path` | `Optional[str]` | `None` | Optional image tar for upload/load. |
114
+ | `docker_flags` | `str` | `''` | Extra docker flags. |
115
+ | `partition` | `str` | `'gpu'` | SLURM partition. |
116
+ | `time` | `str` | `'01:00:00'` | SLURM time limit. |
117
+ | `cpus` | `int` | `1` | CPU request. |
118
+ | `gpus` | `int` | `1` | GPU request. |
119
+ | `mem` | `Optional[str]` | `'8G'` | Memory request. |
120
+
121
+ `SlurmCondaJobConfig` adds:
122
+
123
+ | Parameter | Type | Default | Description |
124
+ |-----------|------|---------|-------------|
125
+ | `conda_env` | `str` | `''` | Conda environment name. |
126
+ | `modules` | `Optional[List[str]]` | `None` | Modules to load (normalized to `[]` at runtime). |
127
+ | `partition` | `str` | `'gpu'` | SLURM partition. |
128
+ | `time` | `str` | `'01:00:00'` | SLURM time limit. |
129
+ | `cpus` | `int` | `1` | CPU request. |
130
+ | `gpus` | `int` | `1` | GPU request. |
131
+ | `mem` | `Optional[str]` | `'8G'` | Memory request. |
132
+
133
+ ## Hydra Presets In `configs/`
134
+
135
+ ### Evolution Presets
136
+
137
+ All `configs/evolution/*.yaml` override `EvolutionConfig` defaults only for listed keys. Unlisted keys inherit dataclass defaults.
138
+
139
+ #### `configs/evolution/small_budget.yaml`
140
+
141
+ ```yaml
142
+ max_evaluation_jobs: 1
143
+
144
+ evo_config:
145
+ patch_types: ["diff", "full"]
146
+ patch_type_probs: [0.5, 0.5]
147
+ num_generations: 20
148
+ max_proposal_jobs: 1
149
+ max_db_workers: 4
150
+ max_patch_attempts: 10
151
+ llm_models: ["gpt-4.1"]
152
+ llm_dynamic_selection: null
153
+ embedding_model: "text-embedding-3-small"
154
+ results_dir: ${output_dir}
155
+ ```
156
+
157
+ #### `configs/evolution/medium_budget.yaml`
158
+
159
+ ```yaml
160
+ max_evaluation_jobs: 10
161
+
162
+ evo_config:
163
+ patch_types: ["diff", "full", "cross"]
164
+ patch_type_probs: [0.6, 0.3, 0.1]
165
+ num_generations: 100
166
+ max_proposal_jobs: 1
167
+ max_db_workers: 4
168
+ max_patch_resamples: 3
169
+ max_patch_attempts: 3
170
+ llm_models:
171
+ - "gemini-2.5-pro"
172
+ - "gemini-2.5-flash"
173
+ - "gpt-4.1-mini"
174
+ - "gpt-4.1-nano"
175
+ - "us.anthropic.claude-sonnet-4-20250514-v1:0"
176
+ - "o4-mini"
177
+ llm_dynamic_selection: ucb
178
+ llm_kwargs:
179
+ temperatures: [0.0, 0.5, 1.0]
180
+ max_tokens: 16384
181
+ meta_rec_interval: 10
182
+ meta_llm_models: ["gpt-4.1"]
183
+ meta_llm_kwargs:
184
+ temperatures: [0.0]
185
+ embedding_model: "text-embedding-3-small"
186
+ results_dir: ${output_dir}
187
+ ```
188
+
189
+ #### `configs/evolution/large_budget.yaml`
190
+
191
+ ```yaml
192
+ max_evaluation_jobs: 6
193
+
194
+ evo_config:
195
+ patch_types: ["diff", "full", "cross"]
196
+ patch_type_probs: [0.4, 0.4, 0.2]
197
+ num_generations: 300
198
+ max_proposal_jobs: 1
199
+ max_db_workers: 4
200
+ max_patch_resamples: 3
201
+ max_patch_attempts: 3
202
+ llm_models:
203
+ - "gpt-4.1"
204
+ - "gpt-4.1-mini"
205
+ - "gpt-4.1-nano"
206
+ - "us.anthropic.claude-sonnet-4-20250514-v1:0"
207
+ - "o4-mini"
208
+ llm_dynamic_selection: ucb
209
+ llm_kwargs:
210
+ temperatures: [0.0, 0.5, 1.0]
211
+ max_tokens: 16384
212
+ meta_rec_interval: 10
213
+ meta_llm_models: ["gpt-4.1"]
214
+ meta_llm_kwargs:
215
+ temperatures: [0.0]
216
+ embedding_model: "text-embedding-3-small"
217
+ results_dir: ${output_dir}
218
+ ```
219
+
220
+ ### Database Presets
221
+
222
+ All `configs/database/*.yaml` override `DatabaseConfig` defaults only for listed keys.
223
+
224
+ #### `configs/database/island_small.yaml`
225
+
226
+ ```yaml
227
+ db_config:
228
+ db_path: "evolution_db.sqlite"
229
+ num_islands: 2
230
+ archive_size: 20
231
+ exploitation_ratio: 0.2
232
+ elite_selection_ratio: 0.3
233
+ num_archive_inspirations: 4
234
+ num_top_k_inspirations: 2
235
+ migration_interval: 10
236
+ migration_rate: 0.1
237
+ island_elitism: true
238
+ ```
239
+
240
+ #### `configs/database/island_medium.yaml`
241
+
242
+ ```yaml
243
+ db_config:
244
+ db_path: "evolution_db.sqlite"
245
+ num_islands: 2
246
+ archive_size: 40
247
+ exploitation_ratio: 0.2
248
+ elite_selection_ratio: 0.3
249
+ num_archive_inspirations: 4
250
+ num_top_k_inspirations: 2
251
+ migration_interval: 10
252
+ migration_rate: 0.0
253
+ island_elitism: true
254
+ parent_selection_strategy: "weighted"
255
+ parent_selection_lambda: 10.0
256
+ ```
257
+
258
+ #### `configs/database/island_large.yaml`
259
+
260
+ ```yaml
261
+ db_config:
262
+ db_path: "evolution_db.sqlite"
263
+ num_islands: 5
264
+ archive_size: 40
265
+ elite_selection_ratio: 0.3
266
+ num_archive_inspirations: 4
267
+ num_top_k_inspirations: 2
268
+ migration_interval: 10
269
+ migration_rate: 0.1
270
+ island_elitism: true
271
+ parent_selection_strategy: "weighted"
272
+ exploitation_alpha: 1.0
273
+ exploitation_ratio: 0.2
274
+ parent_selection_lambda: 10.0
275
+ ```
276
+
277
+ ### Cluster Presets
278
+
279
+ - `configs/cluster/local.yaml`
280
+ - `job_config: LocalJobConfig`
281
+ - `job_config.eval_program_path: ${distributed_job_config.eval_program_path}`
282
+ - `evo_config.job_type: "local"`
283
+ - `configs/cluster/remote.yaml`
284
+ - `job_config: ${distributed_job_config}`
285
+ - `configs/cluster/gcp.yaml`
286
+ - inherits `remote`
287
+ - overrides `distributed_job_config.partition: "a3,aisci"`
288
+
289
+ ### Task Presets (Current)
290
+
291
+ Only these task files currently exist:
292
+
293
+ - `configs/task/circle_packing.yaml`
294
+ - `configs/task/novelty_generator.yaml`
295
+
296
+ Both define task-specific `evaluate_function`, `distributed_job_config`, and `evo_config` task prompt/init path.
297
+
298
+ ## Current Hydra Composition Defaults
299
+
300
+ `configs/config.yaml` defaults chain:
301
+
302
+ ```yaml
303
+ defaults:
304
+ - _self_
305
+ - database@_global_: island_small
306
+ - evolution@_global_: small_budget
307
+ - task@_global_: circle_packing
308
+ - cluster@_global_: local
309
+ - variant@_global_: circle_packing_example
310
+ ```
311
+
312
+ Then `variant=circle_packing_example` overrides to:
313
+
314
+ - `database=island_large`
315
+ - `evolution=large_budget`
316
+ - `task=circle_packing`
317
+ - `cluster=local`
318
+
319
+ So default `shinka_launch` behavior is effectively the `circle_packing_example` preset stack unless you override.
320
+
321
+ ## `shinka_run` Config File Schema
322
+
323
+ `shinka_run --config-fname <yaml>` accepts:
324
+
325
+ - Namespaces: `evo`, `db`, `job` (aliases: `evo_config`, `db_config`, `job_config`)
326
+ - Runner keys: `max_evaluation_jobs`, `max_proposal_jobs`, `max_db_workers`, `verbose`, `debug`
327
+
328
+ Precedence for `shinka_run`:
329
+
330
+ 1. defaults from CLI builder
331
+ 2. config YAML (`--config-fname`)
332
+ 3. `--set` overrides
333
+ 4. authoritative flags:
334
+ - `--results_dir` always sets `evo.results_dir`
335
+ - `--num_generations` always sets `evo.num_generations`
336
+
337
+ ## Current Config Directory Structure
338
+
339
+ ```text
340
+ configs/
341
+ ├── config.yaml
342
+ ├── cluster/
343
+ │ ├── gcp.yaml
344
+ │ ├── local.yaml
345
+ │ └── remote.yaml
346
+ ├── database/
347
+ │ ├── island_large.yaml
348
+ │ ├── island_medium.yaml
349
+ │ └── island_small.yaml
350
+ ├── evolution/
351
+ │ ├── large_budget.yaml
352
+ │ ├── medium_budget.yaml
353
+ │ └── small_budget.yaml
354
+ ├── task/
355
+ │ ├── circle_packing.yaml
356
+ │ └── novelty_generator.yaml
357
+ └── variant/
358
+ ├── circle_packing_example.yaml
359
+ ├── default.yaml
360
+ └── novelty_generator_example.yaml
361
+ ```
362
+
363
+ ## Quick Valid Overrides
364
+
365
+ Hydra launch:
366
+
367
+ ```bash
368
+ shinka_launch \
369
+ task=novelty_generator \
370
+ database=island_medium \
371
+ evolution=medium_budget \
372
+ cluster=local \
373
+ evo_config.num_generations=50 \
374
+ evo_config.max_api_costs=25.0
375
+ ```
376
+
377
+ `shinka_run`:
378
+
379
+ ```bash
380
+ shinka_run \
381
+ --task-dir examples/circle_packing \
382
+ --results_dir results/circle_agent \
383
+ --num_generations 40 \
384
+ --max-evaluation-jobs 6 \
385
+ --set evo.llm_models='["gpt-5-mini","gpt-5-nano"]' \
386
+ --set evo.llm_dynamic_selection=ucb \
387
+ --set db.num_islands=3
388
+ ```