SuperComputer commited on
Commit
4fc6159
·
verified ·
1 Parent(s): 886af36

Delete UCI_N6

Browse files
UCI_N6/jmlr_tables.tex DELETED
@@ -1,469 +0,0 @@
1
- % JMLR-ready tables for two-column papers
2
- % Required packages (no siunitx):
3
- % \usepackage{booktabs}
4
- % \usepackage{threeparttable}
5
- % \usepackage{threeparttablex} % for TableNotes + longtable
6
- % \usepackage{longtable}
7
- % Optional for landscape: \usepackage{pdflscape}
8
-
9
- \begin{table*}[t]
10
- \centering
11
- \begin{threeparttable}
12
- \caption{Model dimension ranges (min--max across all datasets and folds). Input/Output dimensions follow dataset label spaces.}
13
- \label{tab:model-ranges}
14
- \begin{tabular}{l r r r r r r}
15
- \toprule
16
- Algorithm & Width & Depth & Parameters & Padding & Input dim & Output dim \\
17
- \midrule
18
- AOL & 32--512 & 6--6 & 502--1245037 & 10--524 & 3--262 & 2--100 \\
19
- Orthogonal & 32--512 & 6--6 & 507--1245042 & 10--524 & 3--262 & 2--100 \\
20
- Sandwich & 32--512 & 6--6 & 1057--2620542 & 10--524 & 3--262 & 2--100 \\
21
- SLL & 32--512 & 6--6 & 2326--1622697 & 10--524 & 3--262 & 2--100 \\
22
- LDLT-L & 32--512 & 6--6 & 5480--1454611 & 10--524 & 3--262 & 2--100 \\
23
- LDLT-R & 32--512 & 6--6 & 5577--1588756 & 10--524 & 3--262 & 2--100 \\
24
- \bottomrule
25
- \end{tabular}
26
- \end{threeparttable}
27
- \end{table*}
28
-
29
- \begin{table*}[t]
30
- \centering
31
- \begin{threeparttable}
32
- \caption{Sorted mean$\pm$std across $N$ datasets for each algorithm.}
33
- \label{tab:metric_summary}
34
- \begin{tabular}{l r lllll}
35
- \toprule
36
- & & & \multicolumn{4}{c}{Certified Accuracy} \\
37
- \cmidrule(lr){4-7}
38
- Algorithm & $N$ & Accuracy & 36/255 & 72/255 & 108/255 & 255/255 \\
39
- \midrule
40
- AOL & 121 & 0.6049\,\tiny$\pm$0.2396 & 0.2918\,\tiny$\pm$0.3122 & 0.2161\,\tiny$\pm$0.2949 & 0.1740\,\tiny$\pm$0.2722 & 0.0837\,\tiny$\pm$0.1782 \\
41
- Orthogonal & 121 & 0.7036\,\tiny$\pm$0.1911 & 0.6071\,\tiny$\pm$0.2396 & 0.5068\,\tiny$\pm$0.2668 & 0.4233\,\tiny$\pm$0.2781 & 0.1983\,\tiny$\pm$0.2361 \\
42
- Sandwich & 121 & 0.7163\,\tiny$\pm$0.1879 & \textbf{0.6284\,\tiny$\pm$0.2414} & \textbf{0.5525\,\tiny$\pm$0.2633} & \textbf{0.4743\,\tiny$\pm$0.2773} & \textbf{0.2476\,\tiny$\pm$0.2509} \\
43
- SLL & 121 & 0.7011\,\tiny$\pm$0.1939 & 0.5866\,\tiny$\pm$0.2459 & 0.4810\,\tiny$\pm$0.2741 & 0.3932\,\tiny$\pm$0.2829 & 0.1882\,\tiny$\pm$0.2325 \\
44
- \midrule
45
- LDLT-L & 121 & \textbf{0.7245\,\tiny$\pm$0.1908} & 0.4665\,\tiny$\pm$0.3341 & 0.3863\,\tiny$\pm$0.3235 & 0.3226\,\tiny$\pm$0.3084 & 0.1562\,\tiny$\pm$0.2269 \\
46
- LDLT-R & 121 & 0.6970\,\tiny$\pm$0.2021 & 0.6114\,\tiny$\pm$0.2325 & 0.5253\,\tiny$\pm$0.2552 & 0.4516\,\tiny$\pm$0.2645 & 0.2138\,\tiny$\pm$0.2275 \\
47
- \bottomrule
48
- \end{tabular}
49
- \end{threeparttable}
50
- \end{table*}
51
-
52
- \begin{table}[t]
53
- \centering
54
- \begin{threeparttable}
55
- {\small
56
- \caption{Overall comparison on Mean Accuracy: average rank (lower is better) with Iman--Davenport $F=34.41$ (df=5,600), $p=1.11e-16$; Nemenyi CD$=0.969$. Counts are significant wins/losses after Holm within-metric at $\alpha=0.05$.}
57
- \label{tab:overall:mean_test_acc}
58
- \setlength{\tabcolsep}{4pt}
59
- \begin{tabular}{@{}l r r r r r r@{}}
60
- \toprule
61
- Algorithm & \shortstack{Avg \\ rank} $\downarrow$ & \shortstack{sig \\ wins} & \shortstack{sig \\ losses} & \shortstack{net \\ wins} & \shortstack{win \\ share} & mean $r$ \\
62
- \midrule
63
- LDLT-L & 2.525 & 4 & 0 & 4 & 0.800 & 0.533 \\
64
- Sandwich & 2.773 & 4 & 0 & 4 & 0.800 & 0.421 \\
65
- Orthogonal & 3.479 & 1 & 2 & -1 & 0.200 & 0.654 \\
66
- SLL & 3.545 & 1 & 2 & -1 & 0.200 & 0.621 \\
67
- LDLT-R & 3.628 & 1 & 2 & -1 & 0.200 & 0.560 \\
68
- AOL & 5.050 & 0 & 5 & -5 & 0.000 & 0.000 \\
69
- \bottomrule
70
- \end{tabular}
71
- }
72
- \end{threeparttable}
73
- \end{table}
74
-
75
- \begin{table}[t]
76
- \centering
77
- \begin{threeparttable}
78
- {\small
79
- \caption{Overall comparison on Mean Certified Accuracy (36/255): average rank (lower is better) with Iman--Davenport $F=71.33$ (df=5,600), $p=1.11e-16$; Nemenyi CD$=0.969$. Counts are significant wins/losses after Holm within-metric at $\alpha=0.05$.}
80
- \label{tab:overall:mean_cert_acc_36}
81
- \setlength{\tabcolsep}{4pt}
82
- \begin{tabular}{@{}l r r r r r r@{}}
83
- \toprule
84
- Algorithm & \shortstack{Avg \\ rank} $\downarrow$ & \shortstack{sig \\ wins} & \shortstack{sig \\ losses} & \shortstack{net \\ wins} & \shortstack{win \\ share} & mean $r$ \\
85
- \midrule
86
- Sandwich & 2.380 & 4 & 0 & 4 & 0.800 & 0.574 \\
87
- LDLT-R & 2.694 & 3 & 0 & 3 & 0.600 & 0.580 \\
88
- SLL & 3.128 & 2 & 2 & 0 & 0.400 & 0.696 \\
89
- Orthogonal & 3.252 & 2 & 1 & 1 & 0.400 & 0.671 \\
90
- LDLT-L & 4.004 & 1 & 4 & -3 & 0.200 & 0.645 \\
91
- AOL & 5.541 & 0 & 5 & -5 & 0.000 & 0.000 \\
92
- \bottomrule
93
- \end{tabular}
94
- }
95
- \end{threeparttable}
96
- \end{table}
97
-
98
- \begin{table}[t]
99
- \centering
100
- \begin{threeparttable}
101
- {\small
102
- \caption{Overall comparison on Mean Certified Accuracy (72/255): average rank (lower is better) with Iman--Davenport $F=111.54$ (df=5,600), $p=1.11e-16$; Nemenyi CD$=0.969$. Counts are significant wins/losses after Holm within-metric at $\alpha=0.05$.}
103
- \label{tab:overall:mean_cert_acc_72}
104
- \setlength{\tabcolsep}{4pt}
105
- \begin{tabular}{@{}l r r r r r r@{}}
106
- \toprule
107
- Algorithm & \shortstack{Avg \\ rank} $\downarrow$ & \shortstack{sig \\ wins} & \shortstack{sig \\ losses} & \shortstack{net \\ wins} & \shortstack{win \\ share} & mean $r$ \\
108
- \midrule
109
- Sandwich & 2.041 & 5 & 0 & 5 & 1.000 & 0.599 \\
110
- LDLT-R & 2.562 & 4 & 1 & 3 & 0.800 & 0.540 \\
111
- SLL & 3.260 & 2 & 2 & 0 & 0.400 & 0.729 \\
112
- Orthogonal & 3.273 & 2 & 2 & 0 & 0.400 & 0.707 \\
113
- LDLT-L & 4.153 & 1 & 4 & -3 & 0.200 & 0.776 \\
114
- AOL & 5.711 & 0 & 5 & -5 & 0.000 & 0.000 \\
115
- \bottomrule
116
- \end{tabular}
117
- }
118
- \end{threeparttable}
119
- \end{table}
120
-
121
- \begin{table}[t]
122
- \centering
123
- \begin{threeparttable}
124
- {\small
125
- \caption{Overall comparison on Mean Certified Accuracy (108/255): average rank (lower is better) with Iman--Davenport $F=129.26$ (df=5,600), $p=1.11e-16$; Nemenyi CD$=0.969$. Counts are significant wins/losses after Holm within-metric at $\alpha=0.05$.}
126
- \label{tab:overall:mean_cert_acc_108}
127
- \setlength{\tabcolsep}{4pt}
128
- \begin{tabular}{@{}l r r r r r r@{}}
129
- \toprule
130
- Algorithm & \shortstack{Avg \\ rank} $\downarrow$ & \shortstack{sig \\ wins} & \shortstack{sig \\ losses} & \shortstack{net \\ wins} & \shortstack{win \\ share} & mean $r$ \\
131
- \midrule
132
- Sandwich & 2.083 & 4 & 0 & 4 & 0.800 & 0.704 \\
133
- LDLT-R & 2.277 & 4 & 0 & 4 & 0.800 & 0.605 \\
134
- Orthogonal & 3.248 & 3 & 2 & 1 & 0.600 & 0.546 \\
135
- SLL & 3.430 & 2 & 3 & -1 & 0.400 & 0.702 \\
136
- LDLT-L & 4.236 & 1 & 4 & -3 & 0.200 & 0.828 \\
137
- AOL & 5.727 & 0 & 5 & -5 & 0.000 & 0.000 \\
138
- \bottomrule
139
- \end{tabular}
140
- }
141
- \end{threeparttable}
142
- \end{table}
143
-
144
- \begin{table}[t]
145
- \centering
146
- \begin{threeparttable}
147
- {\small
148
- \caption{Overall comparison on Mean Certified Accuracy (255/255): average rank (lower is better) with Iman--Davenport $F=91.19$ (df=5,600), $p=1.11e-16$; Nemenyi CD$=0.969$. Counts are significant wins/losses after Holm within-metric at $\alpha=0.05$.}
149
- \label{tab:overall:mean_cert_acc_255}
150
- \setlength{\tabcolsep}{4pt}
151
- \begin{tabular}{@{}l r r r r r r@{}}
152
- \toprule
153
- Algorithm & \shortstack{Avg \\ rank} $\downarrow$ & \shortstack{sig \\ wins} & \shortstack{sig \\ losses} & \shortstack{net \\ wins} & \shortstack{win \\ share} & mean $r$ \\
154
- \midrule
155
- Sandwich & 2.037 & 5 & 0 & 5 & 1.000 & 0.652 \\
156
- LDLT-R & 2.508 & 4 & 1 & 3 & 0.800 & 0.597 \\
157
- SLL & 3.322 & 2 & 2 & 0 & 0.400 & 0.679 \\
158
- Orthogonal & 3.459 & 2 & 2 & 0 & 0.400 & 0.653 \\
159
- LDLT-L & 4.194 & 1 & 4 & -3 & 0.200 & 0.850 \\
160
- AOL & 5.479 & 0 & 5 & -5 & 0.000 & 0.000 \\
161
- \bottomrule
162
- \end{tabular}
163
- }
164
- \end{threeparttable}
165
- \end{table}
166
-
167
- \begin{table}[t]
168
- \centering
169
- \begin{threeparttable}
170
- {
171
- \caption{Pairwise Wilcoxon outcomes for Mean Accuracy (Holm within-metric at $\alpha=0.05$): row vs. column (\textcolor{green}{$\blacktriangle$} win, \textcolor{red}{$\blacktriangledown$} loss, $\cdot$ none).}
172
- \label{tab:signif:mean_test_acc}
173
- \setlength{\tabcolsep}{3pt}
174
- \begin{tabular}{@{}l c c c c c c @{}}
175
- \toprule
176
- & AOL & LDLT-L & LDLT-R & Orthogonal & Sandwich & SLL \\
177
- \midrule
178
- AOL & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
179
- LDLT-L & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
180
- LDLT-R & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
181
- Orthogonal & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
182
- Sandwich & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
183
- SLL & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
184
- \bottomrule
185
- \end{tabular}
186
- }
187
- \end{threeparttable}
188
- \end{table}
189
-
190
- \begin{table}[t]
191
- \centering
192
- \begin{threeparttable}
193
- {
194
- \caption{Pairwise Wilcoxon outcomes for Mean Certified Accuracy (36/255) (Holm within-metric at $\alpha=0.05$): row vs. column (\textcolor{green}{$\blacktriangle$} win, \textcolor{red}{$\blacktriangledown$} loss, $\cdot$ none).}
195
- \label{tab:signif:mean_cert_acc_36}
196
- \setlength{\tabcolsep}{3pt}
197
- \begin{tabular}{@{}l c c c c c c @{}}
198
- \toprule
199
- & AOL & LDLT-L & LDLT-R & Orthogonal & Sandwich & SLL \\
200
- \midrule
201
- AOL & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
202
- LDLT-L & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
203
- LDLT-R & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & $\cdot$ & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
204
- Orthogonal & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
205
- Sandwich & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
206
- SLL & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
207
- \bottomrule
208
- \end{tabular}
209
- }
210
- \end{threeparttable}
211
- \end{table}
212
-
213
- \begin{table}[t]
214
- \centering
215
- \begin{threeparttable}
216
- {
217
- \caption{Pairwise Wilcoxon outcomes for Mean Certified Accuracy (72/255) (Holm within-metric at $\alpha=0.05$): row vs. column (\textcolor{green}{$\blacktriangle$} win, \textcolor{red}{$\blacktriangledown$} loss, $\cdot$ none).}
218
- \label{tab:signif:mean_cert_acc_72}
219
- \setlength{\tabcolsep}{3pt}
220
- \begin{tabular}{@{}l c c c c c c @{}}
221
- \toprule
222
- & AOL & LDLT-L & LDLT-R & Orthogonal & Sandwich & SLL \\
223
- \midrule
224
- AOL & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
225
- LDLT-L & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
226
- LDLT-R & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{green}{$\blacktriangle$} \\
227
- Orthogonal & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
228
- Sandwich & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
229
- SLL & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
230
- \bottomrule
231
- \end{tabular}
232
- }
233
- \end{threeparttable}
234
- \end{table}
235
-
236
- \begin{table}[t]
237
- \centering
238
- \begin{threeparttable}
239
- {
240
- \caption{Pairwise Wilcoxon outcomes for Mean Certified Accuracy (108/255) (Holm within-metric at $\alpha=0.05$): row vs. column (\textcolor{green}{$\blacktriangle$} win, \textcolor{red}{$\blacktriangledown$} loss, $\cdot$ none).}
241
- \label{tab:signif:mean_cert_acc_108}
242
- \setlength{\tabcolsep}{3pt}
243
- \begin{tabular}{@{}l c c c c c c @{}}
244
- \toprule
245
- & AOL & LDLT-L & LDLT-R & Orthogonal & Sandwich & SLL \\
246
- \midrule
247
- AOL & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
248
- LDLT-L & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
249
- LDLT-R & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
250
- Orthogonal & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{green}{$\blacktriangle$} \\
251
- Sandwich & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
252
- SLL & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
253
- \bottomrule
254
- \end{tabular}
255
- }
256
- \end{threeparttable}
257
- \end{table}
258
-
259
- \begin{table}[t]
260
- \centering
261
- \begin{threeparttable}
262
- {
263
- \caption{Pairwise Wilcoxon outcomes for Mean Certified Accuracy (255/255) (Holm within-metric at $\alpha=0.05$): row vs. column (\textcolor{green}{$\blacktriangle$} win, \textcolor{red}{$\blacktriangledown$} loss, $\cdot$ none).}
264
- \label{tab:signif:mean_cert_acc_255}
265
- \setlength{\tabcolsep}{3pt}
266
- \begin{tabular}{@{}l c c c c c c @{}}
267
- \toprule
268
- & AOL & LDLT-L & LDLT-R & Orthogonal & Sandwich & SLL \\
269
- \midrule
270
- AOL & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
271
- LDLT-L & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{red}{$\blacktriangledown$} \\
272
- LDLT-R & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & \textcolor{green}{$\blacktriangle$} \\
273
- Orthogonal & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
274
- Sandwich & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & $\cdot$ & \textcolor{green}{$\blacktriangle$} \\
275
- SLL & \textcolor{green}{$\blacktriangle$} & \textcolor{green}{$\blacktriangle$} & \textcolor{red}{$\blacktriangledown$} & $\cdot$ & \textcolor{red}{$\blacktriangledown$} & $\cdot$ \\
276
- \bottomrule
277
- \end{tabular}
278
- }
279
- \end{threeparttable}
280
- \end{table}
281
-
282
-
283
- \begin{table*}[t]
284
- \centering
285
- \begin{threeparttable}
286
- \caption[Mean Accuracy]{Wilcoxon signed-rank tests (two-sided) for Mean Accuracy; $p$-values with Holm FWER corrections within-metric and global.}
287
- \label{tab:wilcoxon:mean_test_acc}
288
- \begingroup
289
- \setlength{\tabcolsep}{4pt}
290
- \begin{tabular}{ll r r r r r r r r r r r}
291
- \toprule
292
- \multicolumn{2}{c}{Algorithms} & \multicolumn{6}{c}{Run Statistics} & \multicolumn{5}{c}{Wilcoxon pairwise Statistics} \\\cmidrule(lr){1-2} \cmidrule(lr){3-8} \cmidrule(lr){9-13}
293
- Alg A & Alg B & $n$ & wins$_A$ & wins$_B$ & ties & WinRate A & \shortstack{Median \\ $\Delta$ (A--B)} & $W$ & $p$ & $p_{\text{Holm,within}}$ & $p_{\text{Holm,global}}$ & $r$ \\
294
- \midrule
295
- AOL & LDLT-L & 121 & 16 & 105 & 0 & 0.1322 & -0.0632 & 409 & $2.1e-17^{***}$ & $3.2e-16^{***}$ & $0^{***}$ & 0.7715 \\
296
- AOL & Sandwich & 121 & 23 & 98 & 0 & 0.1901 & -0.0656 & 698 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.7036 \\
297
- AOL & Orthogonal & 121 & 25 & 96 & 0 & 0.2066 & -0.0525 & 908 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.6542 \\
298
- AOL & SLL & 121 & 24 & 97 & 0 & 0.1983 & -0.0445 & 1047 & $0^{***}$ & $1.0e-10^{***}$ & $4.0e-10^{***}$ & 0.6215 \\
299
- AOL & LDLT-R & 121 & 27 & 94 & 0 & 0.2231 & -0.0357 & 1307 & $7.0e-10^{***}$ & $7.8e-09^{***}$ & $2.4e-08^{***}$ & 0.5604 \\
300
- LDLT-L & SLL & 121 & 86 & 35 & 0 & 0.7107 & 0.0103 & 1511 & $1.7e-08^{***}$ & $1.7e-07^{***}$ & $5.0e-07^{***}$ & 0.5124 \\
301
- LDLT-L & LDLT-R & 121 & 87 & 34 & 0 & 0.7190 & 0.0125 & 1779 & $7.7e-07^{***}$ & $6.9e-06^{***}$ & $2.1e-05^{***}$ & 0.4494 \\
302
- LDLT-L & Orthogonal & 121 & 78 & 43 & 0 & 0.6446 & 0.0104 & 2002 & $1.3e-05^{***}$ & $1.0e-04^{***}$ & $2.8e-04^{***}$ & 0.3969 \\
303
- Orthogonal & Sandwich & 121 & 39 & 81 & 1 & 0.3264 & -0.0079 & 2171 & $1.3e-04^{***}$ & $9.4e-04^{***}$ & $2.5e-03^{**}$ & 0.3487 \\
304
- Sandwich & SLL & 121 & 80 & 41 & 0 & 0.6612 & 0.0063 & 2279 & $2.6e-04^{***}$ & $1.6e-03^{**}$ & $4.5e-03^{**}$ & 0.3318 \\
305
- LDLT-R & Sandwich & 121 & 47 & 74 & 0 & 0.3884 & -0.0101 & 2411 & $9.4e-04^{***}$ & $4.7e-03^{**}$ & $1.4e-02^{*}$ & 0.3008 \\
306
- LDLT-L & Sandwich & 121 & 64 & 56 & 1 & 0.5331 & 0.0008 & 3145 & $2.0e-01$ & $8.2e-01$ & $1.0e+00$ & 0.1158 \\
307
- LDLT-R & Orthogonal & 121 & 54 & 67 & 0 & 0.4463 & -0.0024 & 3206 & $2.1e-01$ & $8.2e-01$ & $1.0e+00$ & 0.1138 \\
308
- Orthogonal & SLL & 121 & 60 & 61 & 0 & 0.4959 & -0.0000 & 3382 & $4.3e-01$ & $8.5e-01$ & $1.0e+00$ & 0.0724 \\
309
- LDLT-R & SLL & 121 & 58 & 63 & 0 & 0.4793 & -0.0010 & 3627 & $8.7e-01$ & $8.7e-01$ & $1.0e+00$ & 0.0148 \\
310
- \bottomrule
311
- \end{tabular}
312
- \begin{tablenotes}
313
- \item Stars mark significance ($^*\,p\!\le\!0.05$, $^{**}\,p\!\le\!0.01$, $^{***}\,p\!\le\!0.001$).
314
- \end{tablenotes}
315
- \endgroup
316
- \end{threeparttable}
317
- \end{table*}
318
-
319
-
320
-
321
- \begin{table*}[t]
322
- \centering
323
- \begin{threeparttable}
324
- \caption[Mean Certified Accuracy (36/255)]{Wilcoxon signed-rank tests (two-sided) for Mean Certified Accuracy (36/255); $p$-values with Holm FWER corrections within-metric and global.}
325
- \label{tab:wilcoxon:mean_cert_acc_36}
326
- \begingroup
327
- \setlength{\tabcolsep}{4pt}
328
- \begin{tabular}{ll r r r r r r r r r r r}
329
- \toprule
330
- \multicolumn{2}{c}{Algorithms} & \multicolumn{6}{c}{Run Statistics} & \multicolumn{5}{c}{Wilcoxon pairwise Statistics} \\\cmidrule(lr){1-2} \cmidrule(lr){3-8} \cmidrule(lr){9-13}
331
- Alg A & Alg B & $n$ & wins$_A$ & wins$_B$ & ties & WinRate A & \shortstack{Median \\ $\Delta$ (A--B)} & $W$ & $p$ & $p_{\text{Holm,within}}$ & $p_{\text{Holm,global}}$ & $r$ \\
332
- \midrule
333
- AOL & LDLT-R & 121 & 6 & 115 & 0 & 0.0496 & -0.2928 & 93 & $1.4e-20^{***}$ & $2.0e-19^{***}$ & $9.5e-19^{***}$ & 0.8458 \\
334
- AOL & SLL & 121 & 7 & 114 & 0 & 0.0579 & -0.2868 & 156 & $6.2e-20^{***}$ & $8.7e-19^{***}$ & $4.1e-18^{***}$ & 0.8310 \\
335
- AOL & Sandwich & 121 & 9 & 112 & 0 & 0.0744 & -0.3563 & 174 & $9.5e-20^{***}$ & $1.2e-18^{***}$ & $6.2e-18^{***}$ & 0.8268 \\
336
- AOL & Orthogonal & 121 & 9 & 112 & 0 & 0.0744 & -0.2985 & 184 & $1.2e-19^{***}$ & $1.4e-18^{***}$ & $7.6e-18^{***}$ & 0.8245 \\
337
- AOL & LDLT-L & 121 & 21 & 93 & 7 & 0.2025 & -0.1185 & 843 & $0^{***}$ & $1.0e-10^{***}$ & $3.0e-10^{***}$ & 0.6445 \\
338
- LDLT-L & Sandwich & 121 & 31 & 90 & 0 & 0.2562 & -0.0772 & 975 & $0^{***}$ & $0^{***}$ & $1.0e-10^{***}$ & 0.6384 \\
339
- LDLT-L & LDLT-R & 121 & 36 & 85 & 0 & 0.2975 & -0.0605 & 1205 & $1.0e-10^{***}$ & $1.2e-09^{***}$ & $5.1e-09^{***}$ & 0.5843 \\
340
- LDLT-L & SLL & 121 & 36 & 85 & 0 & 0.2975 & -0.0489 & 1305 & $7.0e-10^{***}$ & $5.5e-09^{***}$ & $2.4e-08^{***}$ & 0.5608 \\
341
- LDLT-L & Orthogonal & 121 & 42 & 79 & 0 & 0.3471 & -0.0531 & 1489 & $1.3e-08^{***}$ & $8.7e-08^{***}$ & $3.7e-07^{***}$ & 0.5176 \\
342
- Sandwich & SLL & 121 & 84 & 35 & 2 & 0.7025 & 0.0174 & 1683 & $5.7e-07^{***}$ & $3.4e-06^{***}$ & $1.6e-05^{***}$ & 0.4586 \\
343
- Orthogonal & Sandwich & 121 & 38 & 82 & 1 & 0.3182 & -0.0130 & 2067 & $4.3e-05^{***}$ & $2.1e-04^{***}$ & $9.0e-04^{***}$ & 0.3735 \\
344
- LDLT-R & SLL & 121 & 71 & 49 & 1 & 0.5909 & 0.0081 & 2331 & $6.7e-04^{***}$ & $2.7e-03^{**}$ & $1.1e-02^{*}$ & 0.3104 \\
345
- LDLT-R & Sandwich & 121 & 52 & 68 & 1 & 0.4339 & -0.0088 & 2802 & $3.0e-02^{*}$ & $9.1e-02$ & $3.0e-01$ & 0.1978 \\
346
- Orthogonal & SLL & 121 & 58 & 63 & 0 & 0.4793 & -0.0016 & 3123 & $1.4e-01$ & $2.8e-01$ & $1.0e+00$ & 0.1333 \\
347
- LDLT-R & Orthogonal & 121 & 76 & 45 & 0 & 0.6281 & 0.0060 & 3225 & $2.3e-01$ & $2.8e-01$ & $1.0e+00$ & 0.1093 \\
348
- \bottomrule
349
- \end{tabular}
350
- \begin{tablenotes}
351
- \item Stars mark significance ($^*\,p\!\le\!0.05$, $^{**}\,p\!\le\!0.01$, $^{***}\,p\!\le\!0.001$).
352
- \end{tablenotes}
353
- \endgroup
354
- \end{threeparttable}
355
- \end{table*}
356
-
357
-
358
-
359
- \begin{table*}[t]
360
- \centering
361
- \begin{threeparttable}
362
- \caption[Mean Certified Accuracy (72/255)]{Wilcoxon signed-rank tests (two-sided) for Mean Certified Accuracy (72/255); $p$-values with Holm FWER corrections within-metric and global.}
363
- \label{tab:wilcoxon:mean_cert_acc_72}
364
- \begingroup
365
- \setlength{\tabcolsep}{4pt}
366
- \begin{tabular}{ll r r r r r r r r r r r}
367
- \toprule
368
- \multicolumn{2}{c}{Algorithms} & \multicolumn{6}{c}{Run Statistics} & \multicolumn{5}{c}{Wilcoxon pairwise Statistics} \\\cmidrule(lr){1-2} \cmidrule(lr){3-8} \cmidrule(lr){9-13}
369
- Alg A & Alg B & $n$ & wins$_A$ & wins$_B$ & ties & WinRate A & \shortstack{Median \\ $\Delta$ (A--B)} & $W$ & $p$ & $p_{\text{Holm,within}}$ & $p_{\text{Holm,global}}$ & $r$ \\
370
- \midrule
371
- AOL & SLL & 121 & 1 & 118 & 2 & 0.0165 & -0.2343 & 2 & $3.1e-21^{***}$ & $4.3e-20^{***}$ & $2.3e-19^{***}$ & 0.8672 \\
372
- AOL & LDLT-R & 121 & 1 & 120 & 0 & 0.0083 & -0.2920 & 21 & $2.3e-21^{***}$ & $3.5e-20^{***}$ & $1.7e-19^{***}$ & 0.8628 \\
373
- AOL & Orthogonal & 121 & 6 & 115 & 0 & 0.0496 & -0.2589 & 86 & $1.2e-20^{***}$ & $1.5e-19^{***}$ & $8.3e-19^{***}$ & 0.8473 \\
374
- AOL & Sandwich & 121 & 7 & 114 & 0 & 0.0579 & -0.3205 & 90 & $1.3e-20^{***}$ & $1.5e-19^{***}$ & $8.9e-19^{***}$ & 0.8465 \\
375
- AOL & LDLT-L & 121 & 10 & 93 & 18 & 0.1570 & -0.1040 & 284 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.7759 \\
376
- LDLT-L & Sandwich & 121 & 21 & 99 & 1 & 0.1777 & -0.0916 & 608 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.7223 \\
377
- LDLT-L & LDLT-R & 121 & 30 & 91 & 0 & 0.2479 & -0.0704 & 922 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.6509 \\
378
- Orthogonal & Sandwich & 121 & 19 & 99 & 3 & 0.1694 & -0.0301 & 1122 & $1.0e-10^{***}$ & $8.0e-10^{***}$ & $5.4e-09^{***}$ & 0.5905 \\
379
- LDLT-L & SLL & 121 & 34 & 86 & 1 & 0.2851 & -0.0486 & 1161 & $1.0e-10^{***}$ & $8.0e-10^{***}$ & $4.1e-09^{***}$ & 0.5901 \\
380
- Sandwich & SLL & 121 & 93 & 28 & 0 & 0.7686 & 0.0368 & 1188 & $1.0e-10^{***}$ & $8.0e-10^{***}$ & $4.0e-09^{***}$ & 0.5883 \\
381
- LDLT-L & Orthogonal & 121 & 35 & 85 & 1 & 0.2934 & -0.0477 & 1257 & $5.0e-10^{***}$ & $2.6e-09^{***}$ & $1.9e-08^{***}$ & 0.5672 \\
382
- LDLT-R & SLL & 121 & 82 & 39 & 0 & 0.6777 & 0.0152 & 1961 & $7.7e-06^{***}$ & $3.1e-05^{***}$ & $1.8e-04^{***}$ & 0.4066 \\
383
- LDLT-R & Sandwich & 121 & 49 & 72 & 0 & 0.4050 & -0.0105 & 2635 & $6.4e-03^{**}$ & $1.9e-02^{*}$ & $8.3e-02$ & 0.2481 \\
384
- LDLT-R & Orthogonal & 121 & 74 & 47 & 0 & 0.6116 & 0.0114 & 2675 & $8.7e-03^{**}$ & $1.9e-02^{*}$ & $1.0e-01$ & 0.2387 \\
385
- Orthogonal & SLL & 121 & 62 & 59 & 0 & 0.5124 & 0.0025 & 2936 & $5.1e-02$ & $5.1e-02$ & $4.6e-01$ & 0.1774 \\
386
- \bottomrule
387
- \end{tabular}
388
- \begin{tablenotes}
389
- \item Stars mark significance ($^*\,p\!\le\!0.05$, $^{**}\,p\!\le\!0.01$, $^{***}\,p\!\le\!0.001$).
390
- \end{tablenotes}
391
- \endgroup
392
- \end{threeparttable}
393
- \end{table*}
394
-
395
-
396
-
397
- \begin{table*}[t]
398
- \centering
399
- \begin{threeparttable}
400
- \caption[Mean Certified Accuracy (108/255)]{Wilcoxon signed-rank tests (two-sided) for Mean Certified Accuracy (108/255); $p$-values with Holm FWER corrections within-metric and global.}
401
- \label{tab:wilcoxon:mean_cert_acc_108}
402
- \begingroup
403
- \setlength{\tabcolsep}{4pt}
404
- \begin{tabular}{ll r r r r r r r r r r r}
405
- \toprule
406
- \multicolumn{2}{c}{Algorithms} & \multicolumn{6}{c}{Run Statistics} & \multicolumn{5}{c}{Wilcoxon pairwise Statistics} \\\cmidrule(lr){1-2} \cmidrule(lr){3-8} \cmidrule(lr){9-13}
407
- Alg A & Alg B & $n$ & wins$_A$ & wins$_B$ & ties & WinRate A & \shortstack{Median \\ $\Delta$ (A--B)} & $W$ & $p$ & $p_{\text{Holm,within}}$ & $p_{\text{Holm,global}}$ & $r$ \\
408
- \midrule
409
- AOL & SLL & 121 & 1 & 113 & 7 & 0.0372 & -0.1619 & 31 & $4.4e-20^{***}$ & $5.3e-19^{***}$ & $2.9e-18^{***}$ & 0.8596 \\
410
- AOL & LDLT-R & 121 & 1 & 120 & 0 & 0.0083 & -0.2368 & 43 & $4.0e-21^{***}$ & $6.0e-20^{***}$ & $2.9e-19^{***}$ & 0.8576 \\
411
- AOL & Orthogonal & 121 & 3 & 114 & 4 & 0.0413 & -0.1930 & 53 & $2.4e-20^{***}$ & $3.2e-19^{***}$ & $1.6e-18^{***}$ & 0.8544 \\
412
- AOL & Sandwich & 121 & 4 & 115 & 2 & 0.0413 & -0.2776 & 82 & $2.3e-20^{***}$ & $3.2e-19^{***}$ & $1.6e-18^{***}$ & 0.8478 \\
413
- AOL & LDLT-L & 121 & 5 & 91 & 25 & 0.1446 & -0.0716 & 108 & $5.0e-16^{***}$ & $0^{***}$ & $0^{***}$ & 0.8278 \\
414
- LDLT-L & Sandwich & 121 & 16 & 103 & 2 & 0.1405 & -0.0699 & 532 & $8.0e-16^{***}$ & $0^{***}$ & $0^{***}$ & 0.7384 \\
415
- LDLT-L & LDLT-R & 121 & 25 & 95 & 1 & 0.2107 & -0.0751 & 711 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.6977 \\
416
- Orthogonal & Sandwich & 121 & 20 & 97 & 4 & 0.1818 & -0.0322 & 1002 & $0^{***}$ & $2.0e-10^{***}$ & $1.1e-09^{***}$ & 0.6159 \\
417
- Sandwich & SLL & 121 & 92 & 27 & 2 & 0.7686 & 0.0456 & 1046 & $0^{***}$ & $2.0e-10^{***}$ & $9.0e-10^{***}$ & 0.6134 \\
418
- LDLT-L & Orthogonal & 121 & 31 & 86 & 4 & 0.2727 & -0.0380 & 1169 & $5.0e-10^{***}$ & $3.2e-09^{***}$ & $1.9e-08^{***}$ & 0.5738 \\
419
- LDLT-L & SLL & 121 & 31 & 83 & 7 & 0.2851 & -0.0308 & 1220 & $6.0e-09^{***}$ & $3.0e-08^{***}$ & $1.9e-07^{***}$ & 0.5447 \\
420
- LDLT-R & SLL & 121 & 92 & 29 & 0 & 0.7603 & 0.0294 & 1470 & $9.3e-09^{***}$ & $3.7e-08^{***}$ & $2.9e-07^{***}$ & 0.5220 \\
421
- LDLT-R & Orthogonal & 121 & 84 & 37 & 0 & 0.6942 & 0.0207 & 2239 & $1.7e-04^{***}$ & $5.2e-04^{***}$ & $3.1e-03^{**}$ & 0.3412 \\
422
- Orthogonal & SLL & 121 & 69 & 50 & 2 & 0.5785 & 0.0077 & 2708 & $2.2e-02^{*}$ & $4.5e-02^{*}$ & $2.4e-01$ & 0.2095 \\
423
- LDLT-R & Sandwich & 121 & 59 & 62 & 0 & 0.4876 & -0.0023 & 3035 & $9.0e-02$ & $9.0e-02$ & $7.2e-01$ & 0.1540 \\
424
- \bottomrule
425
- \end{tabular}
426
- \begin{tablenotes}
427
- \item Stars mark significance ($^*\,p\!\le\!0.05$, $^{**}\,p\!\le\!0.01$, $^{***}\,p\!\le\!0.001$).
428
- \end{tablenotes}
429
- \endgroup
430
- \end{threeparttable}
431
- \end{table*}
432
-
433
-
434
-
435
- \begin{table*}[t]
436
- \centering
437
- \begin{threeparttable}
438
- \caption[Mean Certified Accuracy (255/255)]{Wilcoxon signed-rank tests (two-sided) for Mean Certified Accuracy (255/255); $p$-values with Holm FWER corrections within-metric and global.}
439
- \label{tab:wilcoxon:mean_cert_acc_255}
440
- \begingroup
441
- \setlength{\tabcolsep}{4pt}
442
- \begin{tabular}{ll r r r r r r r r r r r}
443
- \toprule
444
- \multicolumn{2}{c}{Algorithms} & \multicolumn{6}{c}{Run Statistics} & \multicolumn{5}{c}{Wilcoxon pairwise Statistics} \\\cmidrule(lr){1-2} \cmidrule(lr){3-8} \cmidrule(lr){9-13}
445
- Alg A & Alg B & $n$ & wins$_A$ & wins$_B$ & ties & WinRate A & \shortstack{Median \\ $\Delta$ (A--B)} & $W$ & $p$ & $p_{\text{Holm,within}}$ & $p_{\text{Holm,global}}$ & $r$ \\
446
- \midrule
447
- AOL & LDLT-R & 121 & 0 & 107 & 14 & 0.0579 & -0.0753 & 0 & $2.8e-19^{***}$ & $4.2e-18^{***}$ & $1.7e-17^{***}$ & 0.8679 \\
448
- AOL & SLL & 121 & 1 & 99 & 21 & 0.0950 & -0.0435 & 4 & $4.5e-18^{***}$ & $5.8e-17^{***}$ & $2.7e-16^{***}$ & 0.8666 \\
449
- AOL & LDLT-L & 121 & 3 & 78 & 40 & 0.1901 & -0.0180 & 36 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.8496 \\
450
- AOL & Orthogonal & 121 & 2 & 97 & 22 & 0.1074 & -0.0518 & 61 & $3.6e-17^{***}$ & $4.4e-16^{***}$ & $0^{***}$ & 0.8466 \\
451
- AOL & Sandwich & 121 & 3 & 107 & 11 & 0.0702 & -0.0914 & 67 & $5.5e-19^{***}$ & $7.7e-18^{***}$ & $3.4e-17^{***}$ & 0.8488 \\
452
- Orthogonal & Sandwich & 121 & 12 & 97 & 12 & 0.1488 & -0.0259 & 367 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.7616 \\
453
- LDLT-L & Sandwich & 121 & 12 & 98 & 11 & 0.1446 & -0.0363 & 385 & $0^{***}$ & $0^{***}$ & $0^{***}$ & 0.7584 \\
454
- LDLT-L & LDLT-R & 121 & 22 & 86 & 13 & 0.2355 & -0.0305 & 669 & $0^{***}$ & $0^{***}$ & $1.0e-10^{***}$ & 0.6706 \\
455
- Sandwich & SLL & 121 & 83 & 28 & 10 & 0.7273 & 0.0187 & 1014 & $7.0e-10^{***}$ & $5.1e-09^{***}$ & $2.4e-08^{***}$ & 0.5846 \\
456
- LDLT-L & SLL & 121 & 24 & 77 & 20 & 0.2810 & -0.0083 & 1120 & $8.3e-07^{***}$ & $5.0e-06^{***}$ & $2.1e-05^{***}$ & 0.4905 \\
457
- LDLT-L & Orthogonal & 121 & 30 & 70 & 21 & 0.3347 & -0.0077 & 1191 & $4.5e-06^{***}$ & $1.8e-05^{***}$ & $1.1e-04^{***}$ & 0.4585 \\
458
- LDLT-R & SLL & 121 & 80 & 29 & 12 & 0.7107 & 0.0156 & 1368 & $8.5e-07^{***}$ & $5.0e-06^{***}$ & $2.1e-05^{***}$ & 0.4716 \\
459
- LDLT-R & Orthogonal & 121 & 76 & 34 & 11 & 0.6736 & 0.0123 & 1722 & $7.3e-05^{***}$ & $2.2e-04^{***}$ & $1.5e-03^{**}$ & 0.3780 \\
460
- LDLT-R & Sandwich & 121 & 44 & 68 & 9 & 0.4008 & -0.0070 & 2043 & $1.1e-03^{**}$ & $2.3e-03^{**}$ & $1.6e-02^{*}$ & 0.3074 \\
461
- Orthogonal & SLL & 121 & 53 & 51 & 17 & 0.5083 & 0.0000 & 2606 & $6.9e-01$ & $6.9e-01$ & $1.0e+00$ & 0.0393 \\
462
- \bottomrule
463
- \end{tabular}
464
- \begin{tablenotes}
465
- \item Stars mark significance ($^*\,p\!\le\!0.05$, $^{**}\,p\!\le\!0.01$, $^{***}\,p\!\le\!0.001$).
466
- \end{tablenotes}
467
- \endgroup
468
- \end{threeparttable}
469
- \end{table*}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
UCI_N6/wilcoxon_pairwise_all.csv DELETED
@@ -1,76 +0,0 @@
1
- metric,alg_a,alg_b,n_common,n_nonzero,wins_a,wins_b,ties,win_rate_a_over_b,mean_diff_a_minus_b,median_diff_a_minus_b,W_stat,p_two_sided,z_equiv,effect_size_r,p_holm_global,p_holm_within_metric
2
- mean_cert_acc_108,aol,ldlt-resnet,121,121,1,120,0,0.008264462809917356,-0.27756937579256197,-0.23680263569999999,43.0,3.971911186286591e-21,-9.433351346128775,0.857577395102616,2.899495165989211e-19,5.957866779429887e-20
3
- mean_cert_acc_108,aol,sandwich,121,119,4,115,2,0.04132231404958678,-0.3003505936595042,-0.2775541879,82.0,2.2832440515482022e-20,-9.248190838712615,0.8477802642077927,1.5754383955682595e-18,3.196541672167483e-19
4
- mean_cert_acc_108,aol,ortho,121,117,3,114,4,0.04132231404958678,-0.24928130693223144,-0.1929709499,53.0,2.4216488584946992e-20,-9.241897120975947,0.8544136910831229,1.6467212237763955e-18,3.196541672167483e-19
5
- mean_cert_acc_108,aol,sdp,121,114,1,113,7,0.0371900826446281,-0.21922742412561985,-0.1618937068,31.0,4.402939864544531e-20,-9.17772518836056,0.859572719401443,2.949969709244836e-18,5.2835278374534375e-19
6
- mean_cert_acc_108,aol,ldlt,121,96,5,91,25,0.1446280991735537,-0.14859004186611569,-0.0715934783,108.0,5.031122992210323e-16,-8.110742844187337,0.8277992251328726,2.918051335481987e-14,5.534235291431355e-15
7
- mean_cert_acc_108,ldlt,sandwich,121,119,16,103,2,0.14049586776859505,-0.15176055179338843,-0.06991830010000001,532.0,7.955949293607779e-16,-8.054875891781956,0.7383892623745293,4.534891097356434e-14,7.955949293607778e-15
8
- mean_cert_acc_108,ldlt,ldlt-resnet,121,120,25,95,1,0.21074380165289255,-0.12897933392644628,-0.0750768315,711.0,2.119279523234749e-14,-7.643176019800993,0.6977233195045927,1.0596397616173746e-12,1.9073515709112743e-13
9
- mean_cert_acc_108,sandwich,sdp,121,119,92,27,2,0.768595041322314,0.08112316953388428,0.0456349217,1046.0,2.203741621261607e-11,6.691845041287824,0.6134404291694237,9.47608897142491e-10,1.7629932970092855e-10
10
- mean_cert_acc_108,ortho,sandwich,121,117,20,97,4,0.18181818181818182,-0.05106928672727272,-0.0322356224,1001.5,2.6982241694705535e-11,-6.662165233713968,0.6159173937376728,1.1332541511776325e-09,1.8887569186293875e-10
11
- mean_cert_acc_108,ldlt,ortho,121,117,31,86,4,0.2727272727272727,-0.10069126506611571,-0.037954930200000014,1169.0,5.414495427550374e-10,-6.20659482933111,0.5737998949534099,1.9492183539181346e-08,3.248697256530224e-09
12
- mean_cert_acc_108,ldlt,sdp,121,114,31,83,7,0.28512396694214875,-0.07063738225950414,-0.0307513103,1220.0,6.028970842431367e-09,-5.815952160338162,0.5447138274210623,1.9292706695780375e-07,3.0144854212156835e-08
13
- mean_cert_acc_108,ldlt-resnet,sdp,121,121,92,29,0,0.7603305785123967,0.05834195166694215,0.02943234149999996,1470.0,9.3419869330056e-09,5.742262470887794,0.5220238609897995,2.896015949231736e-07,3.73679477320224e-08
14
- mean_cert_acc_108,ldlt-resnet,ortho,121,121,84,37,0,0.6942148760330579,0.028288068860330577,0.020693764000000003,2239.0,0.00017461692075476892,3.7531634438099943,0.3411966767099995,0.0031431045735858406,0.0005238507622643068
15
- mean_cert_acc_108,ortho,sdp,121,119,69,50,2,0.5785123966942148,0.030053882806611577,0.007734805400000044,2707.5,0.022262328422160898,2.2858620807716203,0.20954463338571241,0.24488561264376987,0.044524656844321796
16
- mean_cert_acc_108,ldlt-resnet,sandwich,121,121,59,62,0,0.48760330578512395,-0.022781217866942153,-0.00227646530000003,3035.0,0.09022246362523051,-1.6942253809302392,0.15402048917547628,0.7217797090018441,0.09022246362523051
17
- mean_cert_acc_255,aol,ldlt-resnet,121,107,0,107,14,0.05785123966942149,-0.13012088649173556,-0.07534636179999998,0.0,2.76996750319979e-19,-8.977496787439458,0.8678873724707945,1.7450795270158677e-17,4.154951254799685e-18
18
- mean_cert_acc_255,aol,sandwich,121,110,3,107,11,0.07024793388429752,-0.16391362448842975,-0.0914320648,67.0,5.47642526250318e-19,-8.902172131588635,0.8487888090494454,3.3953836627519717e-17,7.666995367504452e-18
19
- mean_cert_acc_255,aol,sdp,121,100,1,99,21,0.09504132231404959,-0.10447574966033059,-0.043450783900000005,4.0,4.463734493179987e-18,-8.666304171669932,0.8666304171669932,2.722878040839792e-16,5.802854841133983e-17
20
- mean_cert_acc_255,aol,ortho,121,99,2,97,22,0.10743801652892562,-0.11459076743471074,-0.05179674919999999,61.0,3.6442146275228883e-17,-8.423814379367323,0.846625199998847,2.1500866302385042e-15,4.373057553027466e-16
21
- mean_cert_acc_255,ldlt,sandwich,121,110,12,98,11,0.1446280991735537,-0.09138781576115705,-0.03625880179999996,385.0,1.8087600254343317e-15,-7.953800025107839,0.7583650766280977,1.0129056142432257e-13,1.989636027977765e-14
22
- mean_cert_acc_255,ortho,sandwich,121,109,12,97,12,0.1487603305785124,-0.04932285705371901,-0.0258620679,367.0,1.8479421669945226e-15,-7.951145858516235,0.7615816500914148,1.0163681918469874e-13,1.989636027977765e-14
23
- mean_cert_acc_255,aol,ldlt,121,81,3,78,40,0.19008264462809918,-0.07252580872727273,-0.0180000011,36.0,2.0692760364951348e-14,-7.646248493300708,0.8495831659223009,1.0553307786125188e-12,1.8623484328456214e-13
24
- mean_cert_acc_255,ldlt,ldlt-resnet,121,108,22,86,13,0.23553719008264462,-0.05759507776446282,-0.03051157290000006,669.0,3.2009930486043397e-12,-6.968604880955203,0.6705543173159377,1.4724568023579962e-10,2.5607944388834718e-11
25
- mean_cert_acc_255,sandwich,sdp,121,111,83,28,10,0.7272727272727273,0.059437874828099174,0.01872655469999998,1014.0,7.290838840470702e-10,6.15964458474609,0.5846475908605333,2.4135012522529935e-08,5.103587188329491e-09
26
- mean_cert_acc_255,ldlt,sdp,121,101,24,77,20,0.2809917355371901,-0.031949940933057856,-0.0083265586,1120.0,8.264525789853738e-07,-4.9290149335535665,0.4904553169986217,2.148776705361972e-05,4.958715473912243e-06
27
- mean_cert_acc_255,ldlt-resnet,sdp,121,109,80,29,12,0.7107438016528925,0.025645136831404965,0.015625,1368.5,8.506882131315028e-07,4.923364421074805,0.47157278542280645,2.148776705361972e-05,4.958715473912243e-06
28
- mean_cert_acc_255,ldlt,ortho,121,100,30,70,21,0.3347107438016529,-0.04206495870743802,-0.007675599399999997,1191.0,4.539730104155671e-06,-4.585006178949154,0.4585006178949154,0.0001089535224997361,1.8158920416622684e-05
29
- mean_cert_acc_255,ldlt-resnet,ortho,121,110,76,34,11,0.6735537190082644,0.015530119057024795,0.0123031566,1722.5,7.340425290385093e-05,3.9649730077287084,0.37804534302378984,0.0014680850580770185,0.0002202127587115528
30
- mean_cert_acc_255,ldlt-resnet,sandwich,121,112,44,68,9,0.40082644628099173,-0.033792737996694217,-0.006987711899999999,2043.0,0.001142117810154983,-3.2529545065204624,0.3073753089449993,0.015989649342169763,0.002284235620309966
31
- mean_cert_acc_255,ortho,sdp,121,104,53,51,17,0.5082644628099173,0.010115017774380166,0.0,2606.0,0.6887980651072516,0.4004867168703731,0.03927095354339887,1.0,0.6887980651072516
32
- mean_cert_acc_36,aol,ldlt-resnet,121,121,6,115,0,0.049586776859504134,-0.3195985256429752,-0.2928244397,93.0,1.3524220958164444e-20,-9.304013275123774,0.8458193886476159,9.46695467071511e-19,2.0286331437246665e-19
33
- mean_cert_acc_36,aol,sdp,121,121,7,114,0,0.05785123966942149,-0.29487104347272725,-0.2867985517,156.0,6.184471896701117e-20,-9.141057246118269,0.8310052041925698,4.0817514518227375e-18,8.6582606553815635e-19
34
- mean_cert_acc_36,aol,sandwich,121,121,9,112,0,0.0743801652892562,-0.3366267439752066,-0.3562658951,174.0,9.50240904957694e-20,-9.094498380688126,0.8267725800625569,6.176565882225011e-18,1.2353131764450022e-18
35
- mean_cert_acc_36,aol,ortho,121,121,9,112,0,0.0743801652892562,-0.3153361513099173,-0.2984701917,183.5,1.1909411544163069e-19,-9.069929438814437,0.8245390398922215,7.622023388264364e-18,1.4291293852995683e-18
36
- mean_cert_acc_36,ldlt,sandwich,121,121,31,90,0,0.256198347107438,-0.16186784709090912,-0.07720418269999996,975.0,2.177320182273665e-12,-7.022628869046719,0.6384208062769745,1.0233404856686225e-10,2.3950522005010313e-11
37
- mean_cert_acc_36,aol,ldlt,121,114,21,93,7,0.2024793388429752,-0.17475889688429755,-0.1185294129,843.0,5.906625670244248e-12,-6.8818836320461765,0.6445474566730744,2.6579815516099117e-10,5.906625670244248e-11
38
- mean_cert_acc_36,ldlt,ldlt-resnet,121,121,36,85,0,0.2975206611570248,-0.14483962875867767,-0.06054405490000003,1205.0,1.2954054230165275e-10,-6.427710032994878,0.5843372757268072,5.0520811497644576e-09,1.1658648807148747e-09
39
- mean_cert_acc_36,ldlt,sdp,121,121,36,85,0,0.2975206611570248,-0.12011214658842975,-0.048914968999999975,1305.0,6.870164641102856e-10,-6.169049669494079,0.5608226972267345,2.4045576243859998e-08,5.496131712882285e-09
40
- mean_cert_acc_36,ldlt,ortho,121,121,42,79,0,0.34710743801652894,-0.14057725442561983,-0.05313736200000008,1489.0,1.2474259752304599e-08,-5.693114600652607,0.5175558727866006,3.74227792569138e-07,8.731981826613219e-08
41
- mean_cert_acc_36,sandwich,sdp,121,119,84,35,2,0.7024793388429752,0.04175570050247934,0.017416268600000007,1683.0,5.655006341785244e-07,5.002641438632645,0.45859138879655953,1.5834017756998682e-05,3.393003805071146e-06
42
- mean_cert_acc_36,ortho,sandwich,121,120,38,82,1,0.3181818181818182,-0.02129059266528926,-0.013042002900000016,2067.0,4.277000715808299e-05,-4.091982968151117,0.3735452294305518,0.0008981701503197428,0.00021385003579041495
43
- mean_cert_acc_36,ldlt-resnet,sdp,121,120,71,49,1,0.5909090909090909,0.024727482170247932,0.008074492300000014,2331.0,0.0006723776229561895,3.4006015258523057,0.31043102746596585,0.010758041967299032,0.002689510491824758
44
- mean_cert_acc_36,ldlt-resnet,sandwich,121,120,52,68,1,0.43388429752066116,-0.017028218332231407,-0.008829414899999999,2802.0,0.030226148960912597,-2.167114179932833,0.1978295535064203,0.30226148960912597,0.0906784468827378
45
- mean_cert_acc_36,ortho,sdp,121,121,58,63,0,0.4793388429752066,0.020465107837190088,-0.0016042887999999644,3123.0,0.1424837402278625,1.466604261049536,0.13332766009541236,0.9973861815950376,0.284967480455725
46
- mean_cert_acc_36,ldlt-resnet,ortho,121,121,76,45,0,0.628099173553719,0.004262374333057851,0.005987092899999991,3225.0,0.2290650698616885,1.2027706902787196,0.10934279002533814,1.0,0.284967480455725
47
- mean_cert_acc_72,aol,ldlt-resnet,121,121,1,120,0,0.008264462809917356,-0.3092410627958678,-0.2919858993,21.0,2.3047485551154483e-21,-9.490252705264558,0.8627502459331416,1.7285614163365862e-19,3.4571228326731724e-20
48
- mean_cert_acc_72,aol,sdp,121,119,1,118,2,0.01652892561983471,-0.2649477861495868,-0.23431782420000002,2.0,3.06956082705418e-21,-9.460335718166954,0.8672275534225952,2.2714750120200932e-19,4.297385157875852e-20
49
- mean_cert_acc_72,aol,ortho,121,121,6,115,0,0.049586776859504134,-0.29073976685619835,-0.2589285746,86.5,1.15433610944114e-20,-9.320830096326201,0.8473481905751092,8.3112199879762075e-19,1.500636942273482e-19
50
- mean_cert_acc_72,aol,sandwich,121,121,7,114,0,0.05785123966942149,-0.33645687826198345,-0.32054936889999996,90.0,1.2571587049174732e-20,-9.311773086028797,0.8465248260026179,8.92582680491406e-19,1.5085904459009678e-19
51
- mean_cert_acc_72,ldlt,sandwich,121,120,21,99,1,0.17768595041322313,-0.1661887828859504,-0.09155106540000002,608.0,2.5143510494245837e-15,-7.9129129844919035,0.7223468228636237,1.3577495666892752e-13,2.765786154367042e-14
52
- mean_cert_acc_72,aol,ldlt,121,103,10,93,18,0.15702479338842976,-0.1702680953760331,-0.1040100288,284.0,3.4232603031044563e-15,-7.874420413270477,0.7758896981769975,1.8143279606453618e-13,3.423260303104456e-14
53
- mean_cert_acc_72,ldlt,ldlt-resnet,121,121,30,91,0,0.24793388429752067,-0.13897296741983473,-0.0703703724,922.0,8.084268131100275e-13,-7.159718861702142,0.650883532882013,3.8804487029281324e-11,7.2758413179902475e-12
54
- mean_cert_acc_72,sandwich,sdp,121,121,93,28,0,0.768595041322314,0.07150909211239669,0.03682704640000001,1188.0,9.69177377400736e-11,6.471682294790014,0.5883347540718195,3.973627247343017e-09,7.753419019205888e-10
55
- mean_cert_acc_72,ldlt,sdp,121,120,34,86,1,0.28512396694214875,-0.09467969077355372,-0.04857719500000002,1161.0,1.0151448930030595e-10,-6.464678372403863,0.5901416952635631,4.060579572012238e-09,7.753419019205888e-10
56
- mean_cert_acc_72,ortho,sandwich,121,118,19,99,3,0.16942148760330578,-0.04571711140578512,-0.030128806799999963,1121.5,1.4156553253762546e-10,-6.414198632497565,0.5904748455235395,5.3794902364297675e-09,8.493931952257528e-10
57
- mean_cert_acc_72,ldlt,ortho,121,120,35,85,1,0.29338842975206614,-0.12047167148016531,-0.04772219800000005,1257.0,5.189417983892635e-10,-6.213266938840658,0.5671910763673499,1.920084654040275e-08,2.5947089919463174e-09
58
- mean_cert_acc_72,ldlt-resnet,sdp,121,121,82,39,0,0.6776859504132231,0.044293276646280993,0.01521807910000006,1961.0,7.7404645028487e-06,4.472239555029277,0.4065672322753888,0.0001780306835655201,3.09618580113948e-05
59
- mean_cert_acc_72,ldlt-resnet,sandwich,121,121,49,72,0,0.4049586776859504,-0.0272158154661157,-0.010539218799999972,2635.0,0.006355236164498874,-2.728866834933439,0.24807880317576717,0.08261807013848536,0.019065708493496623
60
- mean_cert_acc_72,ldlt-resnet,ortho,121,121,74,47,0,0.6115702479338843,0.018501295939669423,0.011391893000000097,2675.0,0.008654654351165116,2.6254026895331193,0.23867297177573812,0.10385585221398139,0.019065708493496623
61
- mean_cert_acc_72,ortho,sdp,121,121,62,59,0,0.512396694214876,0.02579198070661157,0.0024819821999999547,2935.5,0.05098651424705218,1.9515932586868652,0.17741756897153318,0.45887862822346964,0.05098651424705218
62
- mean_test_acc,aol,ldlt,121,121,16,105,0,0.1322314049586777,-0.11958352569586778,-0.06315010610000005,409.0,2.1268544183487774e-17,-8.486646526461245,0.7715133205873859,1.2761126510092665e-15,3.1902816275231663e-16
63
- mean_test_acc,aol,sandwich,121,121,23,98,0,0.19008264462809918,-0.11132838667768594,-0.06561486950000006,698.0,1.0010877948097709e-14,-7.739118075943935,0.703556188722176,5.205656533010808e-13,1.4015229127336793e-13
64
- mean_test_acc,aol,ortho,121,121,25,96,0,0.2066115702479339,-0.09866207272396695,-0.05250947429999997,908.0,6.203585473534351e-13,-7.195931312592255,0.6541755738720232,3.039756882031832e-11,8.064661115594657e-12
65
- mean_test_acc,aol,sdp,121,121,24,97,0,0.19834710743801653,-0.09612382466942149,-0.044510062900000036,1047.0,8.121173320702691e-12,-6.836393407326143,0.6214903097569221,3.573316261109184e-10,9.74540798484323e-11
66
- mean_test_acc,aol,ldlt-resnet,121,121,27,94,0,0.2231404958677686,-0.09206069528677686,-0.03569083780000004,1307.0,7.098533094861746e-10,-6.163876462224062,0.5603524056567329,2.4135012522529935e-08,7.80838640434792e-09
67
- mean_test_acc,ldlt,sdp,121,121,86,35,0,0.7107438016528925,0.023459701026446286,0.010278406500000004,1511.0,1.7383406684454798e-08,5.636209320682431,0.5123826655165846,5.041187938491891e-07,1.7383406684454799e-07
68
- mean_test_acc,ldlt,ldlt-resnet,121,121,87,34,0,0.71900826446281,0.02752283040909091,0.01248115980000003,1779.0,7.69296628945801e-07,4.942999546500287,0.44936359513638974,2.077100898153663e-05,6.923669660512209e-06
69
- mean_test_acc,ldlt,ortho,121,121,78,43,0,0.6446280991735537,0.020921452971900834,0.010356299000000013,2002.0,1.264342847332638e-05,4.366186935893503,0.39692608508122756,0.00027815542641318036,0.00010114742778661104
70
- mean_test_acc,ortho,sandwich,121,120,39,81,1,0.32644628099173556,-0.01266631395371901,-0.007894689300000035,2171.0,0.00013365710865816523,-3.8196205817909794,0.3486820589596543,0.0025394850645051393,0.0009355997606071565
71
- mean_test_acc,sandwich,sdp,121,121,80,41,0,0.6611570247933884,0.015204562008264456,0.006305895000000006,2279.0,0.00026254907665535047,3.6496977289962866,0.33179070263602606,0.004463334303140958,0.001575294459932103
72
- mean_test_acc,ldlt-resnet,sandwich,121,121,47,74,0,0.3884297520661157,-0.019267691390909086,-0.01009704999999994,2411.0,0.0009387558222226796,-3.308266049175231,0.3007514590159301,0.014081337333340193,0.004693779111113398
73
- mean_test_acc,ldlt,sandwich,121,120,64,56,1,0.5330578512396694,0.008255139018181824,0.0008221223000000055,3145.0,0.2044973939148288,1.2688420787642989,0.11582890474182553,1.0,0.8179895756593152
74
- mean_test_acc,ldlt-resnet,ortho,121,121,54,67,0,0.4462809917355372,-0.006601377437190076,-0.0023532391000000152,3206.0,0.2106004158723539,-1.2519161593438723,0.11381055994035204,1.0,0.8179895756593152
75
- mean_test_acc,ortho,sdp,121,121,60,61,0,0.49586776859504134,0.002538248054545448,-6.572600000032125e-06,3382.0,0.42564043452591804,0.7966739195824637,0.07242490178022397,1.0,0.8512808690518361
76
- mean_test_acc,ldlt-resnet,sdp,121,121,58,63,0,0.4793388429752066,-0.0040631293826446264,-0.000977559300000097,3627.0,0.8705530555843003,-0.16295602900550393,0.014814184455045813,1.0,0.8705530555843003
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
UCI_N6/wilcoxon_pairwise_all.json DELETED
@@ -1,1427 +0,0 @@
1
- [
2
- {
3
- "metric":"mean_cert_acc_108",
4
- "alg_a":"aol",
5
- "alg_b":"ldlt-resnet",
6
- "n_common":121,
7
- "n_nonzero":121,
8
- "wins_a":1,
9
- "wins_b":120,
10
- "ties":0,
11
- "win_rate_a_over_b":0.0082644628,
12
- "mean_diff_a_minus_b":-0.2775693758,
13
- "median_diff_a_minus_b":-0.2368026357,
14
- "W_stat":43.0,
15
- "p_two_sided":3.971911186e-21,
16
- "z_equiv":-9.4333513461,
17
- "effect_size_r":0.8575773951,
18
- "p_holm_global":2.899495166e-19,
19
- "p_holm_within_metric":5.957866779e-20
20
- },
21
- {
22
- "metric":"mean_cert_acc_108",
23
- "alg_a":"aol",
24
- "alg_b":"sandwich",
25
- "n_common":121,
26
- "n_nonzero":119,
27
- "wins_a":4,
28
- "wins_b":115,
29
- "ties":2,
30
- "win_rate_a_over_b":0.041322314,
31
- "mean_diff_a_minus_b":-0.3003505937,
32
- "median_diff_a_minus_b":-0.2775541879,
33
- "W_stat":82.0,
34
- "p_two_sided":2.283244052e-20,
35
- "z_equiv":-9.2481908387,
36
- "effect_size_r":0.8477802642,
37
- "p_holm_global":1.575438396e-18,
38
- "p_holm_within_metric":3.196541672e-19
39
- },
40
- {
41
- "metric":"mean_cert_acc_108",
42
- "alg_a":"aol",
43
- "alg_b":"ortho",
44
- "n_common":121,
45
- "n_nonzero":117,
46
- "wins_a":3,
47
- "wins_b":114,
48
- "ties":4,
49
- "win_rate_a_over_b":0.041322314,
50
- "mean_diff_a_minus_b":-0.2492813069,
51
- "median_diff_a_minus_b":-0.1929709499,
52
- "W_stat":53.0,
53
- "p_two_sided":2.421648858e-20,
54
- "z_equiv":-9.241897121,
55
- "effect_size_r":0.8544136911,
56
- "p_holm_global":1.646721224e-18,
57
- "p_holm_within_metric":3.196541672e-19
58
- },
59
- {
60
- "metric":"mean_cert_acc_108",
61
- "alg_a":"aol",
62
- "alg_b":"sdp",
63
- "n_common":121,
64
- "n_nonzero":114,
65
- "wins_a":1,
66
- "wins_b":113,
67
- "ties":7,
68
- "win_rate_a_over_b":0.0371900826,
69
- "mean_diff_a_minus_b":-0.2192274241,
70
- "median_diff_a_minus_b":-0.1618937068,
71
- "W_stat":31.0,
72
- "p_two_sided":4.402939865e-20,
73
- "z_equiv":-9.1777251884,
74
- "effect_size_r":0.8595727194,
75
- "p_holm_global":2.949969709e-18,
76
- "p_holm_within_metric":5.283527837e-19
77
- },
78
- {
79
- "metric":"mean_cert_acc_108",
80
- "alg_a":"aol",
81
- "alg_b":"ldlt",
82
- "n_common":121,
83
- "n_nonzero":96,
84
- "wins_a":5,
85
- "wins_b":91,
86
- "ties":25,
87
- "win_rate_a_over_b":0.1446280992,
88
- "mean_diff_a_minus_b":-0.1485900419,
89
- "median_diff_a_minus_b":-0.0715934783,
90
- "W_stat":108.0,
91
- "p_two_sided":5.031122992e-16,
92
- "z_equiv":-8.1107428442,
93
- "effect_size_r":0.8277992251,
94
- "p_holm_global":0.0,
95
- "p_holm_within_metric":0.0
96
- },
97
- {
98
- "metric":"mean_cert_acc_108",
99
- "alg_a":"ldlt",
100
- "alg_b":"sandwich",
101
- "n_common":121,
102
- "n_nonzero":119,
103
- "wins_a":16,
104
- "wins_b":103,
105
- "ties":2,
106
- "win_rate_a_over_b":0.1404958678,
107
- "mean_diff_a_minus_b":-0.1517605518,
108
- "median_diff_a_minus_b":-0.0699183001,
109
- "W_stat":532.0,
110
- "p_two_sided":7.955949294e-16,
111
- "z_equiv":-8.0548758918,
112
- "effect_size_r":0.7383892624,
113
- "p_holm_global":0.0,
114
- "p_holm_within_metric":0.0
115
- },
116
- {
117
- "metric":"mean_cert_acc_108",
118
- "alg_a":"ldlt",
119
- "alg_b":"ldlt-resnet",
120
- "n_common":121,
121
- "n_nonzero":120,
122
- "wins_a":25,
123
- "wins_b":95,
124
- "ties":1,
125
- "win_rate_a_over_b":0.2107438017,
126
- "mean_diff_a_minus_b":-0.1289793339,
127
- "median_diff_a_minus_b":-0.0750768315,
128
- "W_stat":711.0,
129
- "p_two_sided":0.0,
130
- "z_equiv":-7.6431760198,
131
- "effect_size_r":0.6977233195,
132
- "p_holm_global":0.0,
133
- "p_holm_within_metric":0.0
134
- },
135
- {
136
- "metric":"mean_cert_acc_108",
137
- "alg_a":"sandwich",
138
- "alg_b":"sdp",
139
- "n_common":121,
140
- "n_nonzero":119,
141
- "wins_a":92,
142
- "wins_b":27,
143
- "ties":2,
144
- "win_rate_a_over_b":0.7685950413,
145
- "mean_diff_a_minus_b":0.0811231695,
146
- "median_diff_a_minus_b":0.0456349217,
147
- "W_stat":1046.0,
148
- "p_two_sided":0.0,
149
- "z_equiv":6.6918450413,
150
- "effect_size_r":0.6134404292,
151
- "p_holm_global":0.0000000009,
152
- "p_holm_within_metric":0.0000000002
153
- },
154
- {
155
- "metric":"mean_cert_acc_108",
156
- "alg_a":"ortho",
157
- "alg_b":"sandwich",
158
- "n_common":121,
159
- "n_nonzero":117,
160
- "wins_a":20,
161
- "wins_b":97,
162
- "ties":4,
163
- "win_rate_a_over_b":0.1818181818,
164
- "mean_diff_a_minus_b":-0.0510692867,
165
- "median_diff_a_minus_b":-0.0322356224,
166
- "W_stat":1001.5,
167
- "p_two_sided":0.0,
168
- "z_equiv":-6.6621652337,
169
- "effect_size_r":0.6159173937,
170
- "p_holm_global":0.0000000011,
171
- "p_holm_within_metric":0.0000000002
172
- },
173
- {
174
- "metric":"mean_cert_acc_108",
175
- "alg_a":"ldlt",
176
- "alg_b":"ortho",
177
- "n_common":121,
178
- "n_nonzero":117,
179
- "wins_a":31,
180
- "wins_b":86,
181
- "ties":4,
182
- "win_rate_a_over_b":0.2727272727,
183
- "mean_diff_a_minus_b":-0.1006912651,
184
- "median_diff_a_minus_b":-0.0379549302,
185
- "W_stat":1169.0,
186
- "p_two_sided":0.0000000005,
187
- "z_equiv":-6.2065948293,
188
- "effect_size_r":0.573799895,
189
- "p_holm_global":0.0000000195,
190
- "p_holm_within_metric":0.0000000032
191
- },
192
- {
193
- "metric":"mean_cert_acc_108",
194
- "alg_a":"ldlt",
195
- "alg_b":"sdp",
196
- "n_common":121,
197
- "n_nonzero":114,
198
- "wins_a":31,
199
- "wins_b":83,
200
- "ties":7,
201
- "win_rate_a_over_b":0.2851239669,
202
- "mean_diff_a_minus_b":-0.0706373823,
203
- "median_diff_a_minus_b":-0.0307513103,
204
- "W_stat":1220.0,
205
- "p_two_sided":0.000000006,
206
- "z_equiv":-5.8159521603,
207
- "effect_size_r":0.5447138274,
208
- "p_holm_global":0.0000001929,
209
- "p_holm_within_metric":0.0000000301
210
- },
211
- {
212
- "metric":"mean_cert_acc_108",
213
- "alg_a":"ldlt-resnet",
214
- "alg_b":"sdp",
215
- "n_common":121,
216
- "n_nonzero":121,
217
- "wins_a":92,
218
- "wins_b":29,
219
- "ties":0,
220
- "win_rate_a_over_b":0.7603305785,
221
- "mean_diff_a_minus_b":0.0583419517,
222
- "median_diff_a_minus_b":0.0294323415,
223
- "W_stat":1470.0,
224
- "p_two_sided":0.0000000093,
225
- "z_equiv":5.7422624709,
226
- "effect_size_r":0.522023861,
227
- "p_holm_global":0.0000002896,
228
- "p_holm_within_metric":0.0000000374
229
- },
230
- {
231
- "metric":"mean_cert_acc_108",
232
- "alg_a":"ldlt-resnet",
233
- "alg_b":"ortho",
234
- "n_common":121,
235
- "n_nonzero":121,
236
- "wins_a":84,
237
- "wins_b":37,
238
- "ties":0,
239
- "win_rate_a_over_b":0.694214876,
240
- "mean_diff_a_minus_b":0.0282880689,
241
- "median_diff_a_minus_b":0.020693764,
242
- "W_stat":2239.0,
243
- "p_two_sided":0.0001746169,
244
- "z_equiv":3.7531634438,
245
- "effect_size_r":0.3411966767,
246
- "p_holm_global":0.0031431046,
247
- "p_holm_within_metric":0.0005238508
248
- },
249
- {
250
- "metric":"mean_cert_acc_108",
251
- "alg_a":"ortho",
252
- "alg_b":"sdp",
253
- "n_common":121,
254
- "n_nonzero":119,
255
- "wins_a":69,
256
- "wins_b":50,
257
- "ties":2,
258
- "win_rate_a_over_b":0.5785123967,
259
- "mean_diff_a_minus_b":0.0300538828,
260
- "median_diff_a_minus_b":0.0077348054,
261
- "W_stat":2707.5,
262
- "p_two_sided":0.0222623284,
263
- "z_equiv":2.2858620808,
264
- "effect_size_r":0.2095446334,
265
- "p_holm_global":0.2448856126,
266
- "p_holm_within_metric":0.0445246568
267
- },
268
- {
269
- "metric":"mean_cert_acc_108",
270
- "alg_a":"ldlt-resnet",
271
- "alg_b":"sandwich",
272
- "n_common":121,
273
- "n_nonzero":121,
274
- "wins_a":59,
275
- "wins_b":62,
276
- "ties":0,
277
- "win_rate_a_over_b":0.4876033058,
278
- "mean_diff_a_minus_b":-0.0227812179,
279
- "median_diff_a_minus_b":-0.0022764653,
280
- "W_stat":3035.0,
281
- "p_two_sided":0.0902224636,
282
- "z_equiv":-1.6942253809,
283
- "effect_size_r":0.1540204892,
284
- "p_holm_global":0.721779709,
285
- "p_holm_within_metric":0.0902224636
286
- },
287
- {
288
- "metric":"mean_cert_acc_255",
289
- "alg_a":"aol",
290
- "alg_b":"ldlt-resnet",
291
- "n_common":121,
292
- "n_nonzero":107,
293
- "wins_a":0,
294
- "wins_b":107,
295
- "ties":14,
296
- "win_rate_a_over_b":0.0578512397,
297
- "mean_diff_a_minus_b":-0.1301208865,
298
- "median_diff_a_minus_b":-0.0753463618,
299
- "W_stat":0.0,
300
- "p_two_sided":2.769967503e-19,
301
- "z_equiv":-8.9774967874,
302
- "effect_size_r":0.8678873725,
303
- "p_holm_global":1.745079527e-17,
304
- "p_holm_within_metric":4.154951255e-18
305
- },
306
- {
307
- "metric":"mean_cert_acc_255",
308
- "alg_a":"aol",
309
- "alg_b":"sandwich",
310
- "n_common":121,
311
- "n_nonzero":110,
312
- "wins_a":3,
313
- "wins_b":107,
314
- "ties":11,
315
- "win_rate_a_over_b":0.0702479339,
316
- "mean_diff_a_minus_b":-0.1639136245,
317
- "median_diff_a_minus_b":-0.0914320648,
318
- "W_stat":67.0,
319
- "p_two_sided":5.476425263e-19,
320
- "z_equiv":-8.9021721316,
321
- "effect_size_r":0.848788809,
322
- "p_holm_global":3.395383663e-17,
323
- "p_holm_within_metric":7.666995368e-18
324
- },
325
- {
326
- "metric":"mean_cert_acc_255",
327
- "alg_a":"aol",
328
- "alg_b":"sdp",
329
- "n_common":121,
330
- "n_nonzero":100,
331
- "wins_a":1,
332
- "wins_b":99,
333
- "ties":21,
334
- "win_rate_a_over_b":0.0950413223,
335
- "mean_diff_a_minus_b":-0.1044757497,
336
- "median_diff_a_minus_b":-0.0434507839,
337
- "W_stat":4.0,
338
- "p_two_sided":4.463734493e-18,
339
- "z_equiv":-8.6663041717,
340
- "effect_size_r":0.8666304172,
341
- "p_holm_global":2.722878041e-16,
342
- "p_holm_within_metric":5.802854841e-17
343
- },
344
- {
345
- "metric":"mean_cert_acc_255",
346
- "alg_a":"aol",
347
- "alg_b":"ortho",
348
- "n_common":121,
349
- "n_nonzero":99,
350
- "wins_a":2,
351
- "wins_b":97,
352
- "ties":22,
353
- "win_rate_a_over_b":0.1074380165,
354
- "mean_diff_a_minus_b":-0.1145907674,
355
- "median_diff_a_minus_b":-0.0517967492,
356
- "W_stat":61.0,
357
- "p_two_sided":3.644214628e-17,
358
- "z_equiv":-8.4238143794,
359
- "effect_size_r":0.8466252,
360
- "p_holm_global":0.0,
361
- "p_holm_within_metric":4.373057553e-16
362
- },
363
- {
364
- "metric":"mean_cert_acc_255",
365
- "alg_a":"ldlt",
366
- "alg_b":"sandwich",
367
- "n_common":121,
368
- "n_nonzero":110,
369
- "wins_a":12,
370
- "wins_b":98,
371
- "ties":11,
372
- "win_rate_a_over_b":0.1446280992,
373
- "mean_diff_a_minus_b":-0.0913878158,
374
- "median_diff_a_minus_b":-0.0362588018,
375
- "W_stat":385.0,
376
- "p_two_sided":0.0,
377
- "z_equiv":-7.9538000251,
378
- "effect_size_r":0.7583650766,
379
- "p_holm_global":0.0,
380
- "p_holm_within_metric":0.0
381
- },
382
- {
383
- "metric":"mean_cert_acc_255",
384
- "alg_a":"ortho",
385
- "alg_b":"sandwich",
386
- "n_common":121,
387
- "n_nonzero":109,
388
- "wins_a":12,
389
- "wins_b":97,
390
- "ties":12,
391
- "win_rate_a_over_b":0.1487603306,
392
- "mean_diff_a_minus_b":-0.0493228571,
393
- "median_diff_a_minus_b":-0.0258620679,
394
- "W_stat":367.0,
395
- "p_two_sided":0.0,
396
- "z_equiv":-7.9511458585,
397
- "effect_size_r":0.7615816501,
398
- "p_holm_global":0.0,
399
- "p_holm_within_metric":0.0
400
- },
401
- {
402
- "metric":"mean_cert_acc_255",
403
- "alg_a":"aol",
404
- "alg_b":"ldlt",
405
- "n_common":121,
406
- "n_nonzero":81,
407
- "wins_a":3,
408
- "wins_b":78,
409
- "ties":40,
410
- "win_rate_a_over_b":0.1900826446,
411
- "mean_diff_a_minus_b":-0.0725258087,
412
- "median_diff_a_minus_b":-0.0180000011,
413
- "W_stat":36.0,
414
- "p_two_sided":0.0,
415
- "z_equiv":-7.6462484933,
416
- "effect_size_r":0.8495831659,
417
- "p_holm_global":0.0,
418
- "p_holm_within_metric":0.0
419
- },
420
- {
421
- "metric":"mean_cert_acc_255",
422
- "alg_a":"ldlt",
423
- "alg_b":"ldlt-resnet",
424
- "n_common":121,
425
- "n_nonzero":108,
426
- "wins_a":22,
427
- "wins_b":86,
428
- "ties":13,
429
- "win_rate_a_over_b":0.2355371901,
430
- "mean_diff_a_minus_b":-0.0575950778,
431
- "median_diff_a_minus_b":-0.0305115729,
432
- "W_stat":669.0,
433
- "p_two_sided":0.0,
434
- "z_equiv":-6.968604881,
435
- "effect_size_r":0.6705543173,
436
- "p_holm_global":0.0000000001,
437
- "p_holm_within_metric":0.0
438
- },
439
- {
440
- "metric":"mean_cert_acc_255",
441
- "alg_a":"sandwich",
442
- "alg_b":"sdp",
443
- "n_common":121,
444
- "n_nonzero":111,
445
- "wins_a":83,
446
- "wins_b":28,
447
- "ties":10,
448
- "win_rate_a_over_b":0.7272727273,
449
- "mean_diff_a_minus_b":0.0594378748,
450
- "median_diff_a_minus_b":0.0187265547,
451
- "W_stat":1014.0,
452
- "p_two_sided":0.0000000007,
453
- "z_equiv":6.1596445847,
454
- "effect_size_r":0.5846475909,
455
- "p_holm_global":0.0000000241,
456
- "p_holm_within_metric":0.0000000051
457
- },
458
- {
459
- "metric":"mean_cert_acc_255",
460
- "alg_a":"ldlt",
461
- "alg_b":"sdp",
462
- "n_common":121,
463
- "n_nonzero":101,
464
- "wins_a":24,
465
- "wins_b":77,
466
- "ties":20,
467
- "win_rate_a_over_b":0.2809917355,
468
- "mean_diff_a_minus_b":-0.0319499409,
469
- "median_diff_a_minus_b":-0.0083265586,
470
- "W_stat":1120.0,
471
- "p_two_sided":0.0000008265,
472
- "z_equiv":-4.9290149336,
473
- "effect_size_r":0.490455317,
474
- "p_holm_global":0.0000214878,
475
- "p_holm_within_metric":0.0000049587
476
- },
477
- {
478
- "metric":"mean_cert_acc_255",
479
- "alg_a":"ldlt-resnet",
480
- "alg_b":"sdp",
481
- "n_common":121,
482
- "n_nonzero":109,
483
- "wins_a":80,
484
- "wins_b":29,
485
- "ties":12,
486
- "win_rate_a_over_b":0.7107438017,
487
- "mean_diff_a_minus_b":0.0256451368,
488
- "median_diff_a_minus_b":0.015625,
489
- "W_stat":1368.5,
490
- "p_two_sided":0.0000008507,
491
- "z_equiv":4.9233644211,
492
- "effect_size_r":0.4715727854,
493
- "p_holm_global":0.0000214878,
494
- "p_holm_within_metric":0.0000049587
495
- },
496
- {
497
- "metric":"mean_cert_acc_255",
498
- "alg_a":"ldlt",
499
- "alg_b":"ortho",
500
- "n_common":121,
501
- "n_nonzero":100,
502
- "wins_a":30,
503
- "wins_b":70,
504
- "ties":21,
505
- "win_rate_a_over_b":0.3347107438,
506
- "mean_diff_a_minus_b":-0.0420649587,
507
- "median_diff_a_minus_b":-0.0076755994,
508
- "W_stat":1191.0,
509
- "p_two_sided":0.0000045397,
510
- "z_equiv":-4.5850061789,
511
- "effect_size_r":0.4585006179,
512
- "p_holm_global":0.0001089535,
513
- "p_holm_within_metric":0.0000181589
514
- },
515
- {
516
- "metric":"mean_cert_acc_255",
517
- "alg_a":"ldlt-resnet",
518
- "alg_b":"ortho",
519
- "n_common":121,
520
- "n_nonzero":110,
521
- "wins_a":76,
522
- "wins_b":34,
523
- "ties":11,
524
- "win_rate_a_over_b":0.673553719,
525
- "mean_diff_a_minus_b":0.0155301191,
526
- "median_diff_a_minus_b":0.0123031566,
527
- "W_stat":1722.5,
528
- "p_two_sided":0.0000734043,
529
- "z_equiv":3.9649730077,
530
- "effect_size_r":0.378045343,
531
- "p_holm_global":0.0014680851,
532
- "p_holm_within_metric":0.0002202128
533
- },
534
- {
535
- "metric":"mean_cert_acc_255",
536
- "alg_a":"ldlt-resnet",
537
- "alg_b":"sandwich",
538
- "n_common":121,
539
- "n_nonzero":112,
540
- "wins_a":44,
541
- "wins_b":68,
542
- "ties":9,
543
- "win_rate_a_over_b":0.4008264463,
544
- "mean_diff_a_minus_b":-0.033792738,
545
- "median_diff_a_minus_b":-0.0069877119,
546
- "W_stat":2043.0,
547
- "p_two_sided":0.0011421178,
548
- "z_equiv":-3.2529545065,
549
- "effect_size_r":0.3073753089,
550
- "p_holm_global":0.0159896493,
551
- "p_holm_within_metric":0.0022842356
552
- },
553
- {
554
- "metric":"mean_cert_acc_255",
555
- "alg_a":"ortho",
556
- "alg_b":"sdp",
557
- "n_common":121,
558
- "n_nonzero":104,
559
- "wins_a":53,
560
- "wins_b":51,
561
- "ties":17,
562
- "win_rate_a_over_b":0.5082644628,
563
- "mean_diff_a_minus_b":0.0101150178,
564
- "median_diff_a_minus_b":0.0,
565
- "W_stat":2606.0,
566
- "p_two_sided":0.6887980651,
567
- "z_equiv":0.4004867169,
568
- "effect_size_r":0.0392709535,
569
- "p_holm_global":1.0,
570
- "p_holm_within_metric":0.6887980651
571
- },
572
- {
573
- "metric":"mean_cert_acc_36",
574
- "alg_a":"aol",
575
- "alg_b":"ldlt-resnet",
576
- "n_common":121,
577
- "n_nonzero":121,
578
- "wins_a":6,
579
- "wins_b":115,
580
- "ties":0,
581
- "win_rate_a_over_b":0.0495867769,
582
- "mean_diff_a_minus_b":-0.3195985256,
583
- "median_diff_a_minus_b":-0.2928244397,
584
- "W_stat":93.0,
585
- "p_two_sided":1.352422096e-20,
586
- "z_equiv":-9.3040132751,
587
- "effect_size_r":0.8458193886,
588
- "p_holm_global":9.466954671e-19,
589
- "p_holm_within_metric":2.028633144e-19
590
- },
591
- {
592
- "metric":"mean_cert_acc_36",
593
- "alg_a":"aol",
594
- "alg_b":"sdp",
595
- "n_common":121,
596
- "n_nonzero":121,
597
- "wins_a":7,
598
- "wins_b":114,
599
- "ties":0,
600
- "win_rate_a_over_b":0.0578512397,
601
- "mean_diff_a_minus_b":-0.2948710435,
602
- "median_diff_a_minus_b":-0.2867985517,
603
- "W_stat":156.0,
604
- "p_two_sided":6.184471897e-20,
605
- "z_equiv":-9.1410572461,
606
- "effect_size_r":0.8310052042,
607
- "p_holm_global":4.081751452e-18,
608
- "p_holm_within_metric":8.658260655e-19
609
- },
610
- {
611
- "metric":"mean_cert_acc_36",
612
- "alg_a":"aol",
613
- "alg_b":"sandwich",
614
- "n_common":121,
615
- "n_nonzero":121,
616
- "wins_a":9,
617
- "wins_b":112,
618
- "ties":0,
619
- "win_rate_a_over_b":0.0743801653,
620
- "mean_diff_a_minus_b":-0.336626744,
621
- "median_diff_a_minus_b":-0.3562658951,
622
- "W_stat":174.0,
623
- "p_two_sided":9.50240905e-20,
624
- "z_equiv":-9.0944983807,
625
- "effect_size_r":0.8267725801,
626
- "p_holm_global":6.176565882e-18,
627
- "p_holm_within_metric":1.235313176e-18
628
- },
629
- {
630
- "metric":"mean_cert_acc_36",
631
- "alg_a":"aol",
632
- "alg_b":"ortho",
633
- "n_common":121,
634
- "n_nonzero":121,
635
- "wins_a":9,
636
- "wins_b":112,
637
- "ties":0,
638
- "win_rate_a_over_b":0.0743801653,
639
- "mean_diff_a_minus_b":-0.3153361513,
640
- "median_diff_a_minus_b":-0.2984701917,
641
- "W_stat":183.5,
642
- "p_two_sided":1.190941154e-19,
643
- "z_equiv":-9.0699294388,
644
- "effect_size_r":0.8245390399,
645
- "p_holm_global":7.622023388e-18,
646
- "p_holm_within_metric":1.429129385e-18
647
- },
648
- {
649
- "metric":"mean_cert_acc_36",
650
- "alg_a":"ldlt",
651
- "alg_b":"sandwich",
652
- "n_common":121,
653
- "n_nonzero":121,
654
- "wins_a":31,
655
- "wins_b":90,
656
- "ties":0,
657
- "win_rate_a_over_b":0.2561983471,
658
- "mean_diff_a_minus_b":-0.1618678471,
659
- "median_diff_a_minus_b":-0.0772041827,
660
- "W_stat":975.0,
661
- "p_two_sided":0.0,
662
- "z_equiv":-7.022628869,
663
- "effect_size_r":0.6384208063,
664
- "p_holm_global":0.0000000001,
665
- "p_holm_within_metric":0.0
666
- },
667
- {
668
- "metric":"mean_cert_acc_36",
669
- "alg_a":"aol",
670
- "alg_b":"ldlt",
671
- "n_common":121,
672
- "n_nonzero":114,
673
- "wins_a":21,
674
- "wins_b":93,
675
- "ties":7,
676
- "win_rate_a_over_b":0.2024793388,
677
- "mean_diff_a_minus_b":-0.1747588969,
678
- "median_diff_a_minus_b":-0.1185294129,
679
- "W_stat":843.0,
680
- "p_two_sided":0.0,
681
- "z_equiv":-6.881883632,
682
- "effect_size_r":0.6445474567,
683
- "p_holm_global":0.0000000003,
684
- "p_holm_within_metric":0.0000000001
685
- },
686
- {
687
- "metric":"mean_cert_acc_36",
688
- "alg_a":"ldlt",
689
- "alg_b":"ldlt-resnet",
690
- "n_common":121,
691
- "n_nonzero":121,
692
- "wins_a":36,
693
- "wins_b":85,
694
- "ties":0,
695
- "win_rate_a_over_b":0.2975206612,
696
- "mean_diff_a_minus_b":-0.1448396288,
697
- "median_diff_a_minus_b":-0.0605440549,
698
- "W_stat":1205.0,
699
- "p_two_sided":0.0000000001,
700
- "z_equiv":-6.427710033,
701
- "effect_size_r":0.5843372757,
702
- "p_holm_global":0.0000000051,
703
- "p_holm_within_metric":0.0000000012
704
- },
705
- {
706
- "metric":"mean_cert_acc_36",
707
- "alg_a":"ldlt",
708
- "alg_b":"sdp",
709
- "n_common":121,
710
- "n_nonzero":121,
711
- "wins_a":36,
712
- "wins_b":85,
713
- "ties":0,
714
- "win_rate_a_over_b":0.2975206612,
715
- "mean_diff_a_minus_b":-0.1201121466,
716
- "median_diff_a_minus_b":-0.048914969,
717
- "W_stat":1305.0,
718
- "p_two_sided":0.0000000007,
719
- "z_equiv":-6.1690496695,
720
- "effect_size_r":0.5608226972,
721
- "p_holm_global":0.000000024,
722
- "p_holm_within_metric":0.0000000055
723
- },
724
- {
725
- "metric":"mean_cert_acc_36",
726
- "alg_a":"ldlt",
727
- "alg_b":"ortho",
728
- "n_common":121,
729
- "n_nonzero":121,
730
- "wins_a":42,
731
- "wins_b":79,
732
- "ties":0,
733
- "win_rate_a_over_b":0.347107438,
734
- "mean_diff_a_minus_b":-0.1405772544,
735
- "median_diff_a_minus_b":-0.053137362,
736
- "W_stat":1489.0,
737
- "p_two_sided":0.0000000125,
738
- "z_equiv":-5.6931146007,
739
- "effect_size_r":0.5175558728,
740
- "p_holm_global":0.0000003742,
741
- "p_holm_within_metric":0.0000000873
742
- },
743
- {
744
- "metric":"mean_cert_acc_36",
745
- "alg_a":"sandwich",
746
- "alg_b":"sdp",
747
- "n_common":121,
748
- "n_nonzero":119,
749
- "wins_a":84,
750
- "wins_b":35,
751
- "ties":2,
752
- "win_rate_a_over_b":0.7024793388,
753
- "mean_diff_a_minus_b":0.0417557005,
754
- "median_diff_a_minus_b":0.0174162686,
755
- "W_stat":1683.0,
756
- "p_two_sided":0.0000005655,
757
- "z_equiv":5.0026414386,
758
- "effect_size_r":0.4585913888,
759
- "p_holm_global":0.000015834,
760
- "p_holm_within_metric":0.000003393
761
- },
762
- {
763
- "metric":"mean_cert_acc_36",
764
- "alg_a":"ortho",
765
- "alg_b":"sandwich",
766
- "n_common":121,
767
- "n_nonzero":120,
768
- "wins_a":38,
769
- "wins_b":82,
770
- "ties":1,
771
- "win_rate_a_over_b":0.3181818182,
772
- "mean_diff_a_minus_b":-0.0212905927,
773
- "median_diff_a_minus_b":-0.0130420029,
774
- "W_stat":2067.0,
775
- "p_two_sided":0.00004277,
776
- "z_equiv":-4.0919829682,
777
- "effect_size_r":0.3735452294,
778
- "p_holm_global":0.0008981702,
779
- "p_holm_within_metric":0.00021385
780
- },
781
- {
782
- "metric":"mean_cert_acc_36",
783
- "alg_a":"ldlt-resnet",
784
- "alg_b":"sdp",
785
- "n_common":121,
786
- "n_nonzero":120,
787
- "wins_a":71,
788
- "wins_b":49,
789
- "ties":1,
790
- "win_rate_a_over_b":0.5909090909,
791
- "mean_diff_a_minus_b":0.0247274822,
792
- "median_diff_a_minus_b":0.0080744923,
793
- "W_stat":2331.0,
794
- "p_two_sided":0.0006723776,
795
- "z_equiv":3.4006015259,
796
- "effect_size_r":0.3104310275,
797
- "p_holm_global":0.010758042,
798
- "p_holm_within_metric":0.0026895105
799
- },
800
- {
801
- "metric":"mean_cert_acc_36",
802
- "alg_a":"ldlt-resnet",
803
- "alg_b":"sandwich",
804
- "n_common":121,
805
- "n_nonzero":120,
806
- "wins_a":52,
807
- "wins_b":68,
808
- "ties":1,
809
- "win_rate_a_over_b":0.4338842975,
810
- "mean_diff_a_minus_b":-0.0170282183,
811
- "median_diff_a_minus_b":-0.0088294149,
812
- "W_stat":2802.0,
813
- "p_two_sided":0.030226149,
814
- "z_equiv":-2.1671141799,
815
- "effect_size_r":0.1978295535,
816
- "p_holm_global":0.3022614896,
817
- "p_holm_within_metric":0.0906784469
818
- },
819
- {
820
- "metric":"mean_cert_acc_36",
821
- "alg_a":"ortho",
822
- "alg_b":"sdp",
823
- "n_common":121,
824
- "n_nonzero":121,
825
- "wins_a":58,
826
- "wins_b":63,
827
- "ties":0,
828
- "win_rate_a_over_b":0.479338843,
829
- "mean_diff_a_minus_b":0.0204651078,
830
- "median_diff_a_minus_b":-0.0016042888,
831
- "W_stat":3123.0,
832
- "p_two_sided":0.1424837402,
833
- "z_equiv":1.466604261,
834
- "effect_size_r":0.1333276601,
835
- "p_holm_global":0.9973861816,
836
- "p_holm_within_metric":0.2849674805
837
- },
838
- {
839
- "metric":"mean_cert_acc_36",
840
- "alg_a":"ldlt-resnet",
841
- "alg_b":"ortho",
842
- "n_common":121,
843
- "n_nonzero":121,
844
- "wins_a":76,
845
- "wins_b":45,
846
- "ties":0,
847
- "win_rate_a_over_b":0.6280991736,
848
- "mean_diff_a_minus_b":0.0042623743,
849
- "median_diff_a_minus_b":0.0059870929,
850
- "W_stat":3225.0,
851
- "p_two_sided":0.2290650699,
852
- "z_equiv":1.2027706903,
853
- "effect_size_r":0.10934279,
854
- "p_holm_global":1.0,
855
- "p_holm_within_metric":0.2849674805
856
- },
857
- {
858
- "metric":"mean_cert_acc_72",
859
- "alg_a":"aol",
860
- "alg_b":"ldlt-resnet",
861
- "n_common":121,
862
- "n_nonzero":121,
863
- "wins_a":1,
864
- "wins_b":120,
865
- "ties":0,
866
- "win_rate_a_over_b":0.0082644628,
867
- "mean_diff_a_minus_b":-0.3092410628,
868
- "median_diff_a_minus_b":-0.2919858993,
869
- "W_stat":21.0,
870
- "p_two_sided":2.304748555e-21,
871
- "z_equiv":-9.4902527053,
872
- "effect_size_r":0.8627502459,
873
- "p_holm_global":1.728561416e-19,
874
- "p_holm_within_metric":3.457122833e-20
875
- },
876
- {
877
- "metric":"mean_cert_acc_72",
878
- "alg_a":"aol",
879
- "alg_b":"sdp",
880
- "n_common":121,
881
- "n_nonzero":119,
882
- "wins_a":1,
883
- "wins_b":118,
884
- "ties":2,
885
- "win_rate_a_over_b":0.0165289256,
886
- "mean_diff_a_minus_b":-0.2649477861,
887
- "median_diff_a_minus_b":-0.2343178242,
888
- "W_stat":2.0,
889
- "p_two_sided":3.069560827e-21,
890
- "z_equiv":-9.4603357182,
891
- "effect_size_r":0.8672275534,
892
- "p_holm_global":2.271475012e-19,
893
- "p_holm_within_metric":4.297385158e-20
894
- },
895
- {
896
- "metric":"mean_cert_acc_72",
897
- "alg_a":"aol",
898
- "alg_b":"ortho",
899
- "n_common":121,
900
- "n_nonzero":121,
901
- "wins_a":6,
902
- "wins_b":115,
903
- "ties":0,
904
- "win_rate_a_over_b":0.0495867769,
905
- "mean_diff_a_minus_b":-0.2907397669,
906
- "median_diff_a_minus_b":-0.2589285746,
907
- "W_stat":86.5,
908
- "p_two_sided":1.154336109e-20,
909
- "z_equiv":-9.3208300963,
910
- "effect_size_r":0.8473481906,
911
- "p_holm_global":8.311219988e-19,
912
- "p_holm_within_metric":1.500636942e-19
913
- },
914
- {
915
- "metric":"mean_cert_acc_72",
916
- "alg_a":"aol",
917
- "alg_b":"sandwich",
918
- "n_common":121,
919
- "n_nonzero":121,
920
- "wins_a":7,
921
- "wins_b":114,
922
- "ties":0,
923
- "win_rate_a_over_b":0.0578512397,
924
- "mean_diff_a_minus_b":-0.3364568783,
925
- "median_diff_a_minus_b":-0.3205493689,
926
- "W_stat":90.0,
927
- "p_two_sided":1.257158705e-20,
928
- "z_equiv":-9.311773086,
929
- "effect_size_r":0.846524826,
930
- "p_holm_global":8.925826805e-19,
931
- "p_holm_within_metric":1.508590446e-19
932
- },
933
- {
934
- "metric":"mean_cert_acc_72",
935
- "alg_a":"ldlt",
936
- "alg_b":"sandwich",
937
- "n_common":121,
938
- "n_nonzero":120,
939
- "wins_a":21,
940
- "wins_b":99,
941
- "ties":1,
942
- "win_rate_a_over_b":0.1776859504,
943
- "mean_diff_a_minus_b":-0.1661887829,
944
- "median_diff_a_minus_b":-0.0915510654,
945
- "W_stat":608.0,
946
- "p_two_sided":0.0,
947
- "z_equiv":-7.9129129845,
948
- "effect_size_r":0.7223468229,
949
- "p_holm_global":0.0,
950
- "p_holm_within_metric":0.0
951
- },
952
- {
953
- "metric":"mean_cert_acc_72",
954
- "alg_a":"aol",
955
- "alg_b":"ldlt",
956
- "n_common":121,
957
- "n_nonzero":103,
958
- "wins_a":10,
959
- "wins_b":93,
960
- "ties":18,
961
- "win_rate_a_over_b":0.1570247934,
962
- "mean_diff_a_minus_b":-0.1702680954,
963
- "median_diff_a_minus_b":-0.1040100288,
964
- "W_stat":284.0,
965
- "p_two_sided":0.0,
966
- "z_equiv":-7.8744204133,
967
- "effect_size_r":0.7758896982,
968
- "p_holm_global":0.0,
969
- "p_holm_within_metric":0.0
970
- },
971
- {
972
- "metric":"mean_cert_acc_72",
973
- "alg_a":"ldlt",
974
- "alg_b":"ldlt-resnet",
975
- "n_common":121,
976
- "n_nonzero":121,
977
- "wins_a":30,
978
- "wins_b":91,
979
- "ties":0,
980
- "win_rate_a_over_b":0.2479338843,
981
- "mean_diff_a_minus_b":-0.1389729674,
982
- "median_diff_a_minus_b":-0.0703703724,
983
- "W_stat":922.0,
984
- "p_two_sided":0.0,
985
- "z_equiv":-7.1597188617,
986
- "effect_size_r":0.6508835329,
987
- "p_holm_global":0.0,
988
- "p_holm_within_metric":0.0
989
- },
990
- {
991
- "metric":"mean_cert_acc_72",
992
- "alg_a":"sandwich",
993
- "alg_b":"sdp",
994
- "n_common":121,
995
- "n_nonzero":121,
996
- "wins_a":93,
997
- "wins_b":28,
998
- "ties":0,
999
- "win_rate_a_over_b":0.7685950413,
1000
- "mean_diff_a_minus_b":0.0715090921,
1001
- "median_diff_a_minus_b":0.0368270464,
1002
- "W_stat":1188.0,
1003
- "p_two_sided":0.0000000001,
1004
- "z_equiv":6.4716822948,
1005
- "effect_size_r":0.5883347541,
1006
- "p_holm_global":0.000000004,
1007
- "p_holm_within_metric":0.0000000008
1008
- },
1009
- {
1010
- "metric":"mean_cert_acc_72",
1011
- "alg_a":"ldlt",
1012
- "alg_b":"sdp",
1013
- "n_common":121,
1014
- "n_nonzero":120,
1015
- "wins_a":34,
1016
- "wins_b":86,
1017
- "ties":1,
1018
- "win_rate_a_over_b":0.2851239669,
1019
- "mean_diff_a_minus_b":-0.0946796908,
1020
- "median_diff_a_minus_b":-0.048577195,
1021
- "W_stat":1161.0,
1022
- "p_two_sided":0.0000000001,
1023
- "z_equiv":-6.4646783724,
1024
- "effect_size_r":0.5901416953,
1025
- "p_holm_global":0.0000000041,
1026
- "p_holm_within_metric":0.0000000008
1027
- },
1028
- {
1029
- "metric":"mean_cert_acc_72",
1030
- "alg_a":"ortho",
1031
- "alg_b":"sandwich",
1032
- "n_common":121,
1033
- "n_nonzero":118,
1034
- "wins_a":19,
1035
- "wins_b":99,
1036
- "ties":3,
1037
- "win_rate_a_over_b":0.1694214876,
1038
- "mean_diff_a_minus_b":-0.0457171114,
1039
- "median_diff_a_minus_b":-0.0301288068,
1040
- "W_stat":1121.5,
1041
- "p_two_sided":0.0000000001,
1042
- "z_equiv":-6.4141986325,
1043
- "effect_size_r":0.5904748455,
1044
- "p_holm_global":0.0000000054,
1045
- "p_holm_within_metric":0.0000000008
1046
- },
1047
- {
1048
- "metric":"mean_cert_acc_72",
1049
- "alg_a":"ldlt",
1050
- "alg_b":"ortho",
1051
- "n_common":121,
1052
- "n_nonzero":120,
1053
- "wins_a":35,
1054
- "wins_b":85,
1055
- "ties":1,
1056
- "win_rate_a_over_b":0.2933884298,
1057
- "mean_diff_a_minus_b":-0.1204716715,
1058
- "median_diff_a_minus_b":-0.047722198,
1059
- "W_stat":1257.0,
1060
- "p_two_sided":0.0000000005,
1061
- "z_equiv":-6.2132669388,
1062
- "effect_size_r":0.5671910764,
1063
- "p_holm_global":0.0000000192,
1064
- "p_holm_within_metric":0.0000000026
1065
- },
1066
- {
1067
- "metric":"mean_cert_acc_72",
1068
- "alg_a":"ldlt-resnet",
1069
- "alg_b":"sdp",
1070
- "n_common":121,
1071
- "n_nonzero":121,
1072
- "wins_a":82,
1073
- "wins_b":39,
1074
- "ties":0,
1075
- "win_rate_a_over_b":0.6776859504,
1076
- "mean_diff_a_minus_b":0.0442932766,
1077
- "median_diff_a_minus_b":0.0152180791,
1078
- "W_stat":1961.0,
1079
- "p_two_sided":0.0000077405,
1080
- "z_equiv":4.472239555,
1081
- "effect_size_r":0.4065672323,
1082
- "p_holm_global":0.0001780307,
1083
- "p_holm_within_metric":0.0000309619
1084
- },
1085
- {
1086
- "metric":"mean_cert_acc_72",
1087
- "alg_a":"ldlt-resnet",
1088
- "alg_b":"sandwich",
1089
- "n_common":121,
1090
- "n_nonzero":121,
1091
- "wins_a":49,
1092
- "wins_b":72,
1093
- "ties":0,
1094
- "win_rate_a_over_b":0.4049586777,
1095
- "mean_diff_a_minus_b":-0.0272158155,
1096
- "median_diff_a_minus_b":-0.0105392188,
1097
- "W_stat":2635.0,
1098
- "p_two_sided":0.0063552362,
1099
- "z_equiv":-2.7288668349,
1100
- "effect_size_r":0.2480788032,
1101
- "p_holm_global":0.0826180701,
1102
- "p_holm_within_metric":0.0190657085
1103
- },
1104
- {
1105
- "metric":"mean_cert_acc_72",
1106
- "alg_a":"ldlt-resnet",
1107
- "alg_b":"ortho",
1108
- "n_common":121,
1109
- "n_nonzero":121,
1110
- "wins_a":74,
1111
- "wins_b":47,
1112
- "ties":0,
1113
- "win_rate_a_over_b":0.6115702479,
1114
- "mean_diff_a_minus_b":0.0185012959,
1115
- "median_diff_a_minus_b":0.011391893,
1116
- "W_stat":2675.0,
1117
- "p_two_sided":0.0086546544,
1118
- "z_equiv":2.6254026895,
1119
- "effect_size_r":0.2386729718,
1120
- "p_holm_global":0.1038558522,
1121
- "p_holm_within_metric":0.0190657085
1122
- },
1123
- {
1124
- "metric":"mean_cert_acc_72",
1125
- "alg_a":"ortho",
1126
- "alg_b":"sdp",
1127
- "n_common":121,
1128
- "n_nonzero":121,
1129
- "wins_a":62,
1130
- "wins_b":59,
1131
- "ties":0,
1132
- "win_rate_a_over_b":0.5123966942,
1133
- "mean_diff_a_minus_b":0.0257919807,
1134
- "median_diff_a_minus_b":0.0024819822,
1135
- "W_stat":2935.5,
1136
- "p_two_sided":0.0509865142,
1137
- "z_equiv":1.9515932587,
1138
- "effect_size_r":0.177417569,
1139
- "p_holm_global":0.4588786282,
1140
- "p_holm_within_metric":0.0509865142
1141
- },
1142
- {
1143
- "metric":"mean_test_acc",
1144
- "alg_a":"aol",
1145
- "alg_b":"ldlt",
1146
- "n_common":121,
1147
- "n_nonzero":121,
1148
- "wins_a":16,
1149
- "wins_b":105,
1150
- "ties":0,
1151
- "win_rate_a_over_b":0.132231405,
1152
- "mean_diff_a_minus_b":-0.1195835257,
1153
- "median_diff_a_minus_b":-0.0631501061,
1154
- "W_stat":409.0,
1155
- "p_two_sided":2.126854418e-17,
1156
- "z_equiv":-8.4866465265,
1157
- "effect_size_r":0.7715133206,
1158
- "p_holm_global":0.0,
1159
- "p_holm_within_metric":3.190281628e-16
1160
- },
1161
- {
1162
- "metric":"mean_test_acc",
1163
- "alg_a":"aol",
1164
- "alg_b":"sandwich",
1165
- "n_common":121,
1166
- "n_nonzero":121,
1167
- "wins_a":23,
1168
- "wins_b":98,
1169
- "ties":0,
1170
- "win_rate_a_over_b":0.1900826446,
1171
- "mean_diff_a_minus_b":-0.1113283867,
1172
- "median_diff_a_minus_b":-0.0656148695,
1173
- "W_stat":698.0,
1174
- "p_two_sided":0.0,
1175
- "z_equiv":-7.7391180759,
1176
- "effect_size_r":0.7035561887,
1177
- "p_holm_global":0.0,
1178
- "p_holm_within_metric":0.0
1179
- },
1180
- {
1181
- "metric":"mean_test_acc",
1182
- "alg_a":"aol",
1183
- "alg_b":"ortho",
1184
- "n_common":121,
1185
- "n_nonzero":121,
1186
- "wins_a":25,
1187
- "wins_b":96,
1188
- "ties":0,
1189
- "win_rate_a_over_b":0.2066115702,
1190
- "mean_diff_a_minus_b":-0.0986620727,
1191
- "median_diff_a_minus_b":-0.0525094743,
1192
- "W_stat":908.0,
1193
- "p_two_sided":0.0,
1194
- "z_equiv":-7.1959313126,
1195
- "effect_size_r":0.6541755739,
1196
- "p_holm_global":0.0,
1197
- "p_holm_within_metric":0.0
1198
- },
1199
- {
1200
- "metric":"mean_test_acc",
1201
- "alg_a":"aol",
1202
- "alg_b":"sdp",
1203
- "n_common":121,
1204
- "n_nonzero":121,
1205
- "wins_a":24,
1206
- "wins_b":97,
1207
- "ties":0,
1208
- "win_rate_a_over_b":0.1983471074,
1209
- "mean_diff_a_minus_b":-0.0961238247,
1210
- "median_diff_a_minus_b":-0.0445100629,
1211
- "W_stat":1047.0,
1212
- "p_two_sided":0.0,
1213
- "z_equiv":-6.8363934073,
1214
- "effect_size_r":0.6214903098,
1215
- "p_holm_global":0.0000000004,
1216
- "p_holm_within_metric":0.0000000001
1217
- },
1218
- {
1219
- "metric":"mean_test_acc",
1220
- "alg_a":"aol",
1221
- "alg_b":"ldlt-resnet",
1222
- "n_common":121,
1223
- "n_nonzero":121,
1224
- "wins_a":27,
1225
- "wins_b":94,
1226
- "ties":0,
1227
- "win_rate_a_over_b":0.2231404959,
1228
- "mean_diff_a_minus_b":-0.0920606953,
1229
- "median_diff_a_minus_b":-0.0356908378,
1230
- "W_stat":1307.0,
1231
- "p_two_sided":0.0000000007,
1232
- "z_equiv":-6.1638764622,
1233
- "effect_size_r":0.5603524057,
1234
- "p_holm_global":0.0000000241,
1235
- "p_holm_within_metric":0.0000000078
1236
- },
1237
- {
1238
- "metric":"mean_test_acc",
1239
- "alg_a":"ldlt",
1240
- "alg_b":"sdp",
1241
- "n_common":121,
1242
- "n_nonzero":121,
1243
- "wins_a":86,
1244
- "wins_b":35,
1245
- "ties":0,
1246
- "win_rate_a_over_b":0.7107438017,
1247
- "mean_diff_a_minus_b":0.023459701,
1248
- "median_diff_a_minus_b":0.0102784065,
1249
- "W_stat":1511.0,
1250
- "p_two_sided":0.0000000174,
1251
- "z_equiv":5.6362093207,
1252
- "effect_size_r":0.5123826655,
1253
- "p_holm_global":0.0000005041,
1254
- "p_holm_within_metric":0.0000001738
1255
- },
1256
- {
1257
- "metric":"mean_test_acc",
1258
- "alg_a":"ldlt",
1259
- "alg_b":"ldlt-resnet",
1260
- "n_common":121,
1261
- "n_nonzero":121,
1262
- "wins_a":87,
1263
- "wins_b":34,
1264
- "ties":0,
1265
- "win_rate_a_over_b":0.7190082645,
1266
- "mean_diff_a_minus_b":0.0275228304,
1267
- "median_diff_a_minus_b":0.0124811598,
1268
- "W_stat":1779.0,
1269
- "p_two_sided":0.0000007693,
1270
- "z_equiv":4.9429995465,
1271
- "effect_size_r":0.4493635951,
1272
- "p_holm_global":0.000020771,
1273
- "p_holm_within_metric":0.0000069237
1274
- },
1275
- {
1276
- "metric":"mean_test_acc",
1277
- "alg_a":"ldlt",
1278
- "alg_b":"ortho",
1279
- "n_common":121,
1280
- "n_nonzero":121,
1281
- "wins_a":78,
1282
- "wins_b":43,
1283
- "ties":0,
1284
- "win_rate_a_over_b":0.6446280992,
1285
- "mean_diff_a_minus_b":0.020921453,
1286
- "median_diff_a_minus_b":0.010356299,
1287
- "W_stat":2002.0,
1288
- "p_two_sided":0.0000126434,
1289
- "z_equiv":4.3661869359,
1290
- "effect_size_r":0.3969260851,
1291
- "p_holm_global":0.0002781554,
1292
- "p_holm_within_metric":0.0001011474
1293
- },
1294
- {
1295
- "metric":"mean_test_acc",
1296
- "alg_a":"ortho",
1297
- "alg_b":"sandwich",
1298
- "n_common":121,
1299
- "n_nonzero":120,
1300
- "wins_a":39,
1301
- "wins_b":81,
1302
- "ties":1,
1303
- "win_rate_a_over_b":0.326446281,
1304
- "mean_diff_a_minus_b":-0.012666314,
1305
- "median_diff_a_minus_b":-0.0078946893,
1306
- "W_stat":2171.0,
1307
- "p_two_sided":0.0001336571,
1308
- "z_equiv":-3.8196205818,
1309
- "effect_size_r":0.348682059,
1310
- "p_holm_global":0.0025394851,
1311
- "p_holm_within_metric":0.0009355998
1312
- },
1313
- {
1314
- "metric":"mean_test_acc",
1315
- "alg_a":"sandwich",
1316
- "alg_b":"sdp",
1317
- "n_common":121,
1318
- "n_nonzero":121,
1319
- "wins_a":80,
1320
- "wins_b":41,
1321
- "ties":0,
1322
- "win_rate_a_over_b":0.6611570248,
1323
- "mean_diff_a_minus_b":0.015204562,
1324
- "median_diff_a_minus_b":0.006305895,
1325
- "W_stat":2279.0,
1326
- "p_two_sided":0.0002625491,
1327
- "z_equiv":3.649697729,
1328
- "effect_size_r":0.3317907026,
1329
- "p_holm_global":0.0044633343,
1330
- "p_holm_within_metric":0.0015752945
1331
- },
1332
- {
1333
- "metric":"mean_test_acc",
1334
- "alg_a":"ldlt-resnet",
1335
- "alg_b":"sandwich",
1336
- "n_common":121,
1337
- "n_nonzero":121,
1338
- "wins_a":47,
1339
- "wins_b":74,
1340
- "ties":0,
1341
- "win_rate_a_over_b":0.3884297521,
1342
- "mean_diff_a_minus_b":-0.0192676914,
1343
- "median_diff_a_minus_b":-0.01009705,
1344
- "W_stat":2411.0,
1345
- "p_two_sided":0.0009387558,
1346
- "z_equiv":-3.3082660492,
1347
- "effect_size_r":0.300751459,
1348
- "p_holm_global":0.0140813373,
1349
- "p_holm_within_metric":0.0046937791
1350
- },
1351
- {
1352
- "metric":"mean_test_acc",
1353
- "alg_a":"ldlt",
1354
- "alg_b":"sandwich",
1355
- "n_common":121,
1356
- "n_nonzero":120,
1357
- "wins_a":64,
1358
- "wins_b":56,
1359
- "ties":1,
1360
- "win_rate_a_over_b":0.5330578512,
1361
- "mean_diff_a_minus_b":0.008255139,
1362
- "median_diff_a_minus_b":0.0008221223,
1363
- "W_stat":3145.0,
1364
- "p_two_sided":0.2044973939,
1365
- "z_equiv":1.2688420788,
1366
- "effect_size_r":0.1158289047,
1367
- "p_holm_global":1.0,
1368
- "p_holm_within_metric":0.8179895757
1369
- },
1370
- {
1371
- "metric":"mean_test_acc",
1372
- "alg_a":"ldlt-resnet",
1373
- "alg_b":"ortho",
1374
- "n_common":121,
1375
- "n_nonzero":121,
1376
- "wins_a":54,
1377
- "wins_b":67,
1378
- "ties":0,
1379
- "win_rate_a_over_b":0.4462809917,
1380
- "mean_diff_a_minus_b":-0.0066013774,
1381
- "median_diff_a_minus_b":-0.0023532391,
1382
- "W_stat":3206.0,
1383
- "p_two_sided":0.2106004159,
1384
- "z_equiv":-1.2519161593,
1385
- "effect_size_r":0.1138105599,
1386
- "p_holm_global":1.0,
1387
- "p_holm_within_metric":0.8179895757
1388
- },
1389
- {
1390
- "metric":"mean_test_acc",
1391
- "alg_a":"ortho",
1392
- "alg_b":"sdp",
1393
- "n_common":121,
1394
- "n_nonzero":121,
1395
- "wins_a":60,
1396
- "wins_b":61,
1397
- "ties":0,
1398
- "win_rate_a_over_b":0.4958677686,
1399
- "mean_diff_a_minus_b":0.0025382481,
1400
- "median_diff_a_minus_b":-0.0000065726,
1401
- "W_stat":3382.0,
1402
- "p_two_sided":0.4256404345,
1403
- "z_equiv":0.7966739196,
1404
- "effect_size_r":0.0724249018,
1405
- "p_holm_global":1.0,
1406
- "p_holm_within_metric":0.8512808691
1407
- },
1408
- {
1409
- "metric":"mean_test_acc",
1410
- "alg_a":"ldlt-resnet",
1411
- "alg_b":"sdp",
1412
- "n_common":121,
1413
- "n_nonzero":121,
1414
- "wins_a":58,
1415
- "wins_b":63,
1416
- "ties":0,
1417
- "win_rate_a_over_b":0.479338843,
1418
- "mean_diff_a_minus_b":-0.0040631294,
1419
- "median_diff_a_minus_b":-0.0009775593,
1420
- "W_stat":3627.0,
1421
- "p_two_sided":0.8705530556,
1422
- "z_equiv":-0.162956029,
1423
- "effect_size_r":0.0148141845,
1424
- "p_holm_global":1.0,
1425
- "p_holm_within_metric":0.8705530556
1426
- }
1427
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
UCI_N6/wilcoxon_prep_all.json DELETED
The diff for this file is too large to render. See raw diff