hamverbot commited on
Commit
ef9e35a
·
verified ·
1 Parent(s): 74299ca

Upload results/benchmark_200K_a10g_2026-05-05.json

Browse files
results/benchmark_200K_a10g_2026-05-05.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "max_rows": 200000,
4
+ "budget": 10000.0,
5
+ "T": 10000,
6
+ "vpc": 50.0,
7
+ "k": 0.8,
8
+ "n_runs": 5,
9
+ "seed": 42,
10
+ "ctr_test_auc": 0.6947,
11
+ "algorithms": ["DualOGD", "TwoSidedDual", "ValueShading", "RLB", "Linear", "Threshold"],
12
+ "run_date": "2026-05-05",
13
+ "hardware": "a10g-small",
14
+ "data_size": "200K rows (Criteo_x4)",
15
+ "auction_type": "first-price",
16
+ "market_price": "log-normal conditioned on pCTR features"
17
+ },
18
+ "aggregated": {
19
+ "TwoSidedDual": {
20
+ "clicks_mean": 284.6,
21
+ "clicks_std": 8.3,
22
+ "cpc_mean": 33.41,
23
+ "cpc_std": 0.86,
24
+ "budget_used_mean": 0.950,
25
+ "budget_used_std": 0.005,
26
+ "win_rate_mean": 0.076,
27
+ "win_rate_std": 0.002
28
+ },
29
+ "ValueShading": {
30
+ "clicks_mean": 257.8,
31
+ "clicks_std": 7.4,
32
+ "cpc_mean": 38.82,
33
+ "cpc_std": 1.14,
34
+ "budget_used_mean": 1.0,
35
+ "budget_used_std": 0.0,
36
+ "win_rate_mean": 0.082,
37
+ "win_rate_std": 0.002
38
+ },
39
+ "DualOGD": {
40
+ "clicks_mean": 248.0,
41
+ "clicks_std": 9.4,
42
+ "cpc_mean": 31.18,
43
+ "cpc_std": 1.13,
44
+ "budget_used_mean": 0.773,
45
+ "budget_used_std": 0.027,
46
+ "win_rate_mean": 0.066,
47
+ "win_rate_std": 0.002
48
+ },
49
+ "RLB": {
50
+ "clicks_mean": 135.8,
51
+ "clicks_std": 13.3,
52
+ "cpc_mean": 74.34,
53
+ "cpc_std": 7.16,
54
+ "budget_used_mean": 1.0,
55
+ "budget_used_std": 0.0,
56
+ "win_rate_mean": 0.042,
57
+ "win_rate_std": 0.004
58
+ },
59
+ "Threshold": {
60
+ "clicks_mean": 71.0,
61
+ "clicks_std": 4.1,
62
+ "cpc_mean": 70.36,
63
+ "cpc_std": 3.96,
64
+ "budget_used_mean": 0.0,
65
+ "budget_used_std": 0.0,
66
+ "win_rate_mean": 0.017,
67
+ "win_rate_std": 0.001
68
+ },
69
+ "Linear": {
70
+ "clicks_mean": 63.6,
71
+ "clicks_std": 6.0,
72
+ "cpc_mean": 79.20,
73
+ "cpc_std": 6.17,
74
+ "budget_used_mean": 0.0,
75
+ "budget_used_std": 0.0,
76
+ "win_rate_mean": 0.020,
77
+ "win_rate_std": 0.003
78
+ }
79
+ },
80
+ "notes": {
81
+ "data_note": "Linear and Threshold show 0% budget used because they were configured with infinite internal budget. They actually spent ~5000 each. The set_budget call is being fixed in a follow-up.",
82
+ "ctr_model": "LogisticRegression (max_iter=500, C=0.1), AUC=0.6947",
83
+ "two_sided_dual_advantage": "The TwoSidedDual beats DualOGD by spending 95% vs 77% of budget, converting 15% more clicks. The floor multiplier ν prevents the dual from becoming too conservative.",
84
+ "rl_performance": "RLB underperforms because it needs more data to learn its Q-table. With only 10K auctions per run, the tabular approach hasn't covered the state space.",
85
+ "citation": "All algorithms documented in RESEARCH_RESOURCES.md. Primary paper: Wang et al. 2023, arXiv:2304.13477"
86
+ }
87
+ }