Calculate perplexity and kld scores
Browse files- scores/watt-tool-8B-IQ3_M.log +37 -0
- scores/watt-tool-8B-IQ3_S.log +37 -0
- scores/watt-tool-8B-IQ4_NL.log +37 -0
- scores/watt-tool-8B-Q3_K_L.log +37 -0
- scores/watt-tool-8B-Q3_K_M.log +37 -0
- scores/watt-tool-8B-Q3_K_S.log +37 -0
- scores/watt-tool-8B-Q4_K_M.log +37 -0
- scores/watt-tool-8B-Q4_K_S.log +37 -0
- scores/watt-tool-8B-Q5_K_M.log +37 -0
- scores/watt-tool-8B-Q5_K_S.log +37 -0
- scores/watt-tool-8B-Q6_K.log +37 -0
- scores/watt-tool-8B-Q8_0.log +37 -0
scores/watt-tool-8B-IQ3_M.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 8.037667 ± 0.050228
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.76%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.064696 ± 0.001005
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.066835 ± 0.001072
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.503543 ± 0.007995
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.066422 ± 0.000322
|
11 |
+
Maximum KLD: 6.343029
|
12 |
+
99.9% KLD: 1.563787
|
13 |
+
99.0% KLD: 0.518421
|
14 |
+
99.0% KLD: 0.518421
|
15 |
+
Median KLD: 0.041650
|
16 |
+
10.0% KLD: 0.004530
|
17 |
+
5.0% KLD: 0.001681
|
18 |
+
1.0% KLD: 0.000255
|
19 |
+
Minimum KLD: 0.000001
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -2.699 ± 0.019 %
|
23 |
+
Maximum Δp: 69.336%
|
24 |
+
99.9% Δp: 26.823%
|
25 |
+
99.0% Δp: 11.876%
|
26 |
+
95.0% Δp: 4.305%
|
27 |
+
90.0% Δp: 1.889%
|
28 |
+
75.0% Δp: 0.060%
|
29 |
+
Median Δp: -0.671%
|
30 |
+
25.0% Δp: -4.411%
|
31 |
+
10.0% Δp: -10.028%
|
32 |
+
5.0% Δp: -14.549%
|
33 |
+
1.0% Δp: -28.523%
|
34 |
+
0.1% Δp: -62.974%
|
35 |
+
Minimum Δp: -96.416%
|
36 |
+
RMS Δp : 7.606 ± 0.046 %
|
37 |
+
Same top p: 88.690 ± 0.084 %
|
scores/watt-tool-8B-IQ3_S.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 8.131662 ± 0.051148
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.65%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.076323 ± 0.001049
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.079311 ± 0.001132
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.597538 ± 0.008682
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.071229 ± 0.000357
|
11 |
+
Maximum KLD: 8.778745
|
12 |
+
99.9% KLD: 1.718901
|
13 |
+
99.0% KLD: 0.556955
|
14 |
+
99.0% KLD: 0.556955
|
15 |
+
Median KLD: 0.043398
|
16 |
+
10.0% KLD: 0.004498
|
17 |
+
5.0% KLD: 0.001689
|
18 |
+
1.0% KLD: 0.000269
|
19 |
+
Minimum KLD: 0.000002
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -2.728 ± 0.019 %
|
23 |
+
Maximum Δp: 77.285%
|
24 |
+
99.9% Δp: 27.656%
|
25 |
+
99.0% Δp: 12.070%
|
26 |
+
95.0% Δp: 4.429%
|
27 |
+
90.0% Δp: 1.963%
|
28 |
+
75.0% Δp: 0.065%
|
29 |
+
Median Δp: -0.636%
|
30 |
+
25.0% Δp: -4.334%
|
31 |
+
10.0% Δp: -10.131%
|
32 |
+
5.0% Δp: -14.930%
|
33 |
+
1.0% Δp: -30.238%
|
34 |
+
0.1% Δp: -67.515%
|
35 |
+
Minimum Δp: -97.118%
|
36 |
+
RMS Δp : 7.864 ± 0.049 %
|
37 |
+
Same top p: 88.252 ± 0.085 %
|
scores/watt-tool-8B-IQ4_NL.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.730788 ± 0.049731
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.61%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.025768 ± 0.000567
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.026103 ± 0.000582
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.196664 ± 0.004583
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.019576 ± 0.000122
|
11 |
+
Maximum KLD: 7.128579
|
12 |
+
99.9% KLD: 0.530688
|
13 |
+
99.0% KLD: 0.163194
|
14 |
+
99.0% KLD: 0.163194
|
15 |
+
Median KLD: 0.011057
|
16 |
+
10.0% KLD: 0.000745
|
17 |
+
5.0% KLD: 0.000248
|
18 |
+
1.0% KLD: 0.000038
|
19 |
+
Minimum KLD: -0.000096
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.494 ± 0.010 %
|
23 |
+
Maximum Δp: 60.888%
|
24 |
+
99.9% Δp: 19.533%
|
25 |
+
99.0% Δp: 8.890%
|
26 |
+
95.0% Δp: 4.150%
|
27 |
+
90.0% Δp: 2.365%
|
28 |
+
75.0% Δp: 0.435%
|
29 |
+
Median Δp: -0.028%
|
30 |
+
25.0% Δp: -1.070%
|
31 |
+
10.0% Δp: -3.708%
|
32 |
+
5.0% Δp: -6.092%
|
33 |
+
1.0% Δp: -14.010%
|
34 |
+
0.1% Δp: -33.372%
|
35 |
+
Minimum Δp: -79.450%
|
36 |
+
RMS Δp : 3.873 ± 0.030 %
|
37 |
+
Same top p: 93.423 ± 0.065 %
|
scores/watt-tool-8B-Q3_K_L.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.955300 ± 0.051312
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.11%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.054396 ± 0.000860
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.055902 ± 0.000908
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.421176 ± 0.007333
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.043200 ± 0.000262
|
11 |
+
Maximum KLD: 8.829779
|
12 |
+
99.9% KLD: 1.281964
|
13 |
+
99.0% KLD: 0.376543
|
14 |
+
99.0% KLD: 0.376543
|
15 |
+
Median KLD: 0.023787
|
16 |
+
10.0% KLD: 0.001665
|
17 |
+
5.0% KLD: 0.000574
|
18 |
+
1.0% KLD: 0.000087
|
19 |
+
Minimum KLD: -0.000360
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.119 ± 0.016 %
|
23 |
+
Maximum Δp: 59.041%
|
24 |
+
99.9% Δp: 24.874%
|
25 |
+
99.0% Δp: 12.145%
|
26 |
+
95.0% Δp: 5.639%
|
27 |
+
90.0% Δp: 3.126%
|
28 |
+
75.0% Δp: 0.437%
|
29 |
+
Median Δp: -0.107%
|
30 |
+
25.0% Δp: -1.966%
|
31 |
+
10.0% Δp: -6.121%
|
32 |
+
5.0% Δp: -9.877%
|
33 |
+
1.0% Δp: -23.039%
|
34 |
+
0.1% Δp: -56.479%
|
35 |
+
Minimum Δp: -96.588%
|
36 |
+
RMS Δp : 6.074 ± 0.047 %
|
37 |
+
Same top p: 90.347 ± 0.078 %
|
scores/watt-tool-8B-Q3_K_M.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 8.029877 ± 0.051646
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.94%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.063727 ± 0.000933
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.065801 ± 0.000994
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.495753 ± 0.008026
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.051126 ± 0.000296
|
11 |
+
Maximum KLD: 6.852483
|
12 |
+
99.9% KLD: 1.491291
|
13 |
+
99.0% KLD: 0.440246
|
14 |
+
99.0% KLD: 0.440246
|
15 |
+
Median KLD: 0.028683
|
16 |
+
10.0% KLD: 0.002148
|
17 |
+
5.0% KLD: 0.000733
|
18 |
+
1.0% KLD: 0.000110
|
19 |
+
Minimum KLD: -0.000059
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.501 ± 0.017 %
|
23 |
+
Maximum Δp: 70.128%
|
24 |
+
99.9% Δp: 25.463%
|
25 |
+
99.0% Δp: 12.370%
|
26 |
+
95.0% Δp: 5.593%
|
27 |
+
90.0% Δp: 2.967%
|
28 |
+
75.0% Δp: 0.321%
|
29 |
+
Median Δp: -0.188%
|
30 |
+
25.0% Δp: -2.480%
|
31 |
+
10.0% Δp: -7.159%
|
32 |
+
5.0% Δp: -11.373%
|
33 |
+
1.0% Δp: -25.561%
|
34 |
+
0.1% Δp: -59.737%
|
35 |
+
Minimum Δp: -96.409%
|
36 |
+
RMS Δp : 6.613 ± 0.047 %
|
37 |
+
Same top p: 89.569 ± 0.081 %
|
scores/watt-tool-8B-Q3_K_S.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 9.061287 ± 0.057476
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 96.76%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.184569 ± 0.001623
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.202700 ± 0.001952
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.527163 ± 0.016298
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.164457 ± 0.000676
|
11 |
+
Maximum KLD: 9.411176
|
12 |
+
99.9% KLD: 2.855778
|
13 |
+
99.0% KLD: 1.234465
|
14 |
+
99.0% KLD: 1.234465
|
15 |
+
Median KLD: 0.104662
|
16 |
+
10.0% KLD: 0.009321
|
17 |
+
5.0% KLD: 0.002701
|
18 |
+
1.0% KLD: 0.000294
|
19 |
+
Minimum KLD: 0.000001
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -5.112 ± 0.033 %
|
23 |
+
Maximum Δp: 77.844%
|
24 |
+
99.9% Δp: 38.199%
|
25 |
+
99.0% Δp: 18.699%
|
26 |
+
95.0% Δp: 6.729%
|
27 |
+
90.0% Δp: 2.723%
|
28 |
+
75.0% Δp: 0.066%
|
29 |
+
Median Δp: -1.070%
|
30 |
+
25.0% Δp: -7.642%
|
31 |
+
10.0% Δp: -18.119%
|
32 |
+
5.0% Δp: -27.580%
|
33 |
+
1.0% Δp: -59.212%
|
34 |
+
0.1% Δp: -82.003%
|
35 |
+
Minimum Δp: -99.539%
|
36 |
+
RMS Δp : 13.476 ± 0.061 %
|
37 |
+
Same top p: 81.696 ± 0.102 %
|
scores/watt-tool-8B-Q4_K_M.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.706824 ± 0.049523
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.70%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.022664 ± 0.000499
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.022922 ± 0.000511
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.172700 ± 0.004022
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.015407 ± 0.000095
|
11 |
+
Maximum KLD: 4.300019
|
12 |
+
99.9% KLD: 0.426162
|
13 |
+
99.0% KLD: 0.128870
|
14 |
+
99.0% KLD: 0.128870
|
15 |
+
Median KLD: 0.008645
|
16 |
+
10.0% KLD: 0.000540
|
17 |
+
5.0% KLD: 0.000171
|
18 |
+
1.0% KLD: 0.000021
|
19 |
+
Minimum KLD: -0.000211
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.401 ± 0.009 %
|
23 |
+
Maximum Δp: 64.955%
|
24 |
+
99.9% Δp: 17.277%
|
25 |
+
99.0% Δp: 8.033%
|
26 |
+
95.0% Δp: 3.725%
|
27 |
+
90.0% Δp: 2.131%
|
28 |
+
75.0% Δp: 0.412%
|
29 |
+
Median Δp: -0.019%
|
30 |
+
25.0% Δp: -0.939%
|
31 |
+
10.0% Δp: -3.216%
|
32 |
+
5.0% Δp: -5.254%
|
33 |
+
1.0% Δp: -11.972%
|
34 |
+
0.1% Δp: -31.348%
|
35 |
+
Minimum Δp: -73.628%
|
36 |
+
RMS Δp : 3.465 ± 0.030 %
|
37 |
+
Same top p: 94.228 ± 0.061 %
|
scores/watt-tool-8B-Q4_K_S.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.704878 ± 0.049402
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.66%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.022411 ± 0.000529
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.022664 ± 0.000541
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.170754 ± 0.004204
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.017657 ± 0.000103
|
11 |
+
Maximum KLD: 3.435874
|
12 |
+
99.9% KLD: 0.503178
|
13 |
+
99.0% KLD: 0.146417
|
14 |
+
99.0% KLD: 0.146417
|
15 |
+
Median KLD: 0.010035
|
16 |
+
10.0% KLD: 0.000649
|
17 |
+
5.0% KLD: 0.000208
|
18 |
+
1.0% KLD: 0.000029
|
19 |
+
Minimum KLD: -0.000210
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.512 ± 0.010 %
|
23 |
+
Maximum Δp: 62.473%
|
24 |
+
99.9% Δp: 19.128%
|
25 |
+
99.0% Δp: 8.391%
|
26 |
+
95.0% Δp: 3.809%
|
27 |
+
90.0% Δp: 2.168%
|
28 |
+
75.0% Δp: 0.373%
|
29 |
+
Median Δp: -0.032%
|
30 |
+
25.0% Δp: -1.110%
|
31 |
+
10.0% Δp: -3.609%
|
32 |
+
5.0% Δp: -5.771%
|
33 |
+
1.0% Δp: -12.795%
|
34 |
+
0.1% Δp: -33.912%
|
35 |
+
Minimum Δp: -77.756%
|
36 |
+
RMS Δp : 3.707 ± 0.031 %
|
37 |
+
Same top p: 93.767 ± 0.064 %
|
scores/watt-tool-8B-Q5_K_M.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.586838 ± 0.048693
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.91%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.006972 ± 0.000278
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.006997 ± 0.000280
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.052714 ± 0.002153
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.004726 ± 0.000029
|
11 |
+
Maximum KLD: 1.109797
|
12 |
+
99.9% KLD: 0.120477
|
13 |
+
99.0% KLD: 0.036395
|
14 |
+
99.0% KLD: 0.036395
|
15 |
+
Median KLD: 0.002865
|
16 |
+
10.0% KLD: 0.000173
|
17 |
+
5.0% KLD: 0.000053
|
18 |
+
1.0% KLD: 0.000006
|
19 |
+
Minimum KLD: -0.000141
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.024 ± 0.005 %
|
23 |
+
Maximum Δp: 40.801%
|
24 |
+
99.9% Δp: 10.374%
|
25 |
+
99.0% Δp: 4.971%
|
26 |
+
95.0% Δp: 2.605%
|
27 |
+
90.0% Δp: 1.614%
|
28 |
+
75.0% Δp: 0.410%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.359%
|
31 |
+
10.0% Δp: -1.547%
|
32 |
+
5.0% Δp: -2.679%
|
33 |
+
1.0% Δp: -6.014%
|
34 |
+
0.1% Δp: -15.549%
|
35 |
+
Minimum Δp: -67.857%
|
36 |
+
RMS Δp : 1.918 ± 0.020 %
|
37 |
+
Same top p: 96.547 ± 0.048 %
|
scores/watt-tool-8B-Q5_K_S.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.582235 ± 0.048626
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.90%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.006365 ± 0.000282
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.006386 ± 0.000284
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.048111 ± 0.002168
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.004976 ± 0.000032
|
11 |
+
Maximum KLD: 1.635007
|
12 |
+
99.9% KLD: 0.127847
|
13 |
+
99.0% KLD: 0.038030
|
14 |
+
99.0% KLD: 0.038030
|
15 |
+
Median KLD: 0.003028
|
16 |
+
10.0% KLD: 0.000180
|
17 |
+
5.0% KLD: 0.000057
|
18 |
+
1.0% KLD: 0.000006
|
19 |
+
Minimum KLD: -0.000153
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.061 ± 0.005 %
|
23 |
+
Maximum Δp: 42.055%
|
24 |
+
99.9% Δp: 10.474%
|
25 |
+
99.0% Δp: 5.063%
|
26 |
+
95.0% Δp: 2.612%
|
27 |
+
90.0% Δp: 1.612%
|
28 |
+
75.0% Δp: 0.375%
|
29 |
+
Median Δp: -0.001%
|
30 |
+
25.0% Δp: -0.410%
|
31 |
+
10.0% Δp: -1.671%
|
32 |
+
5.0% Δp: -2.791%
|
33 |
+
1.0% Δp: -6.049%
|
34 |
+
0.1% Δp: -16.358%
|
35 |
+
Minimum Δp: -71.814%
|
36 |
+
RMS Δp : 1.956 ± 0.020 %
|
37 |
+
Same top p: 96.417 ± 0.049 %
|
scores/watt-tool-8B-Q6_K.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.564700 ± 0.048534
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.96%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.004050 ± 0.000188
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.004058 ± 0.000189
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.030576 ± 0.001453
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.002095 ± 0.000015
|
11 |
+
Maximum KLD: 1.330637
|
12 |
+
99.9% KLD: 0.053552
|
13 |
+
99.0% KLD: 0.014807
|
14 |
+
99.0% KLD: 0.014807
|
15 |
+
Median KLD: 0.001325
|
16 |
+
10.0% KLD: 0.000069
|
17 |
+
5.0% KLD: 0.000017
|
18 |
+
1.0% KLD: -0.000001
|
19 |
+
Minimum KLD: -0.000220
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: 0.024 ± 0.003 %
|
23 |
+
Maximum Δp: 36.481%
|
24 |
+
99.9% Δp: 7.433%
|
25 |
+
99.0% Δp: 3.550%
|
26 |
+
95.0% Δp: 1.840%
|
27 |
+
90.0% Δp: 1.165%
|
28 |
+
75.0% Δp: 0.306%
|
29 |
+
Median Δp: 0.001%
|
30 |
+
25.0% Δp: -0.225%
|
31 |
+
10.0% Δp: -1.031%
|
32 |
+
5.0% Δp: -1.774%
|
33 |
+
1.0% Δp: -3.760%
|
34 |
+
0.1% Δp: -9.008%
|
35 |
+
Minimum Δp: -41.338%
|
36 |
+
RMS Δp : 1.262 ± 0.011 %
|
37 |
+
Same top p: 97.635 ± 0.040 %
|
scores/watt-tool-8B-Q8_0.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 7.539926 ± 0.048262
|
3 |
+
Mean PPL(base) : 7.534124 ± 0.048206
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.99%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.000770 ± 0.000076
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.000770 ± 0.000077
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.005802 ± 0.000579
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.000238 ± 0.000002
|
11 |
+
Maximum KLD: 0.151414
|
12 |
+
99.9% KLD: 0.005715
|
13 |
+
99.0% KLD: 0.001698
|
14 |
+
99.0% KLD: 0.001698
|
15 |
+
Median KLD: 0.000153
|
16 |
+
10.0% KLD: 0.000008
|
17 |
+
5.0% KLD: 0.000001
|
18 |
+
1.0% KLD: -0.000007
|
19 |
+
Minimum KLD: -0.000144
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.017 ± 0.001 %
|
23 |
+
Maximum Δp: 14.747%
|
24 |
+
99.9% Δp: 2.639%
|
25 |
+
99.0% Δp: 1.139%
|
26 |
+
95.0% Δp: 0.550%
|
27 |
+
90.0% Δp: 0.325%
|
28 |
+
75.0% Δp: 0.074%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.100%
|
31 |
+
10.0% Δp: -0.387%
|
32 |
+
5.0% Δp: -0.625%
|
33 |
+
1.0% Δp: -1.236%
|
34 |
+
0.1% Δp: -2.830%
|
35 |
+
Minimum Δp: -8.728%
|
36 |
+
RMS Δp : 0.415 ± 0.004 %
|
37 |
+
Same top p: 99.218 ± 0.023 %
|