GGroenendaal
commited on
Commit
•
350ba75
1
Parent(s):
64df40b
add plots for reading times
Browse files- plots.py +61 -0
- results/read_time.png +0 -0
- results/read_time.tex +14 -0
- results/retrieval_time.png +0 -0
- results/retrieval_time.tex +14 -0
- test.py +0 -20
plots.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import scipy.stats as stats
|
5 |
+
|
6 |
+
data = pd.read_csv("results/timings.csv", index_col="Unnamed: 0")
|
7 |
+
data
|
8 |
+
# %%
|
9 |
+
data.columns
|
10 |
+
# %%
|
11 |
+
|
12 |
+
data_retrieve = data[["faiss_dpr.retrieve", "faiss_longformer.retrieve",
|
13 |
+
"es_dpr.retrieve", "es_longformer.retrieve"]]
|
14 |
+
|
15 |
+
# %%
|
16 |
+
plt.title("Retrieval time")
|
17 |
+
plt.ylabel("Time (s)")
|
18 |
+
plt.xlabel("Model")
|
19 |
+
plt.boxplot(data_retrieve, labels=[
|
20 |
+
"A1", "A2", "B1", "B2"])
|
21 |
+
plt.savefig("results/retrieval_time.png")
|
22 |
+
|
23 |
+
# %%
|
24 |
+
print(data_retrieve.describe())
|
25 |
+
|
26 |
+
with open("results/retrieval_time.tex", "w") as f:
|
27 |
+
f.write(data_retrieve.describe().to_latex())
|
28 |
+
|
29 |
+
# %%
|
30 |
+
|
31 |
+
# now the same for the reader
|
32 |
+
data_read = data[["faiss_dpr.read", "faiss_longformer.read",
|
33 |
+
"es_dpr.read", "es_longformer.read"]]
|
34 |
+
|
35 |
+
plt.title("Reading time")
|
36 |
+
plt.ylabel("Time (s)")
|
37 |
+
plt.xlabel("Model")
|
38 |
+
plt.boxplot(data_read, labels=["A1", "A2", "B1", "B2"])
|
39 |
+
plt.savefig("results/read_time.png")
|
40 |
+
|
41 |
+
# %%
|
42 |
+
print(data_read.describe())
|
43 |
+
|
44 |
+
with open("results/read_time.tex", "w") as f:
|
45 |
+
f.write(data_read.describe().to_latex())
|
46 |
+
|
47 |
+
|
48 |
+
# Statistical tests for reading time
|
49 |
+
|
50 |
+
# %%
|
51 |
+
stats.probplot(data_retrieve["es_longformer.retrieve"], dist="norm", plot=plt)
|
52 |
+
# %%
|
53 |
+
|
54 |
+
|
55 |
+
# %%
|
56 |
+
anova_retrieve = stats.f_oneway(*data_retrieve.T.values)
|
57 |
+
anova_read = stats.f_oneway(*data_read.T.values)
|
58 |
+
|
59 |
+
print(f"retrieve\n {anova_retrieve} \n\nread\n {anova_read}")
|
60 |
+
|
61 |
+
# %%
|
results/read_time.png
ADDED
results/read_time.tex
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
\begin{tabular}{lrrrr}
|
2 |
+
\toprule
|
3 |
+
{} & faiss\_dpr.read & faiss\_longformer.read & es\_dpr.read & es\_longformer.read \\
|
4 |
+
\midrule
|
5 |
+
count & 59.000000 & 59.000000 & 59.000000 & 59.000000 \\
|
6 |
+
mean & 1.222466 & 5.486930 & 1.866525 & 5.191112 \\
|
7 |
+
std & 0.923501 & 0.966157 & 1.005673 & 0.465743 \\
|
8 |
+
min & 0.341175 & 4.487846 & 0.314589 & 4.463429 \\
|
9 |
+
25\% & 0.695762 & 4.767350 & 1.141979 & 4.858446 \\
|
10 |
+
50\% & 0.919248 & 5.454382 & 1.650235 & 5.202449 \\
|
11 |
+
75\% & 1.394425 & 5.699257 & 2.516944 & 5.362522 \\
|
12 |
+
max & 5.365102 & 10.146074 & 4.782422 & 6.431236 \\
|
13 |
+
\bottomrule
|
14 |
+
\end{tabular}
|
results/retrieval_time.png
ADDED
results/retrieval_time.tex
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
\begin{tabular}{lrrrr}
|
2 |
+
\toprule
|
3 |
+
{} & faiss\_dpr.retrieve & faiss\_longformer.retrieve & es\_dpr.retrieve & es\_longformer.retrieve \\
|
4 |
+
\midrule
|
5 |
+
count & 59.000000 & 59.000000 & 59.000000 & 59.000000 \\
|
6 |
+
mean & 0.056994 & 0.854546 & 0.013451 & 0.013016 \\
|
7 |
+
std & 0.038737 & 0.165768 & 0.003771 & 0.002781 \\
|
8 |
+
min & 0.035896 & 0.729217 & 0.008990 & 0.009167 \\
|
9 |
+
25\% & 0.043558 & 0.775807 & 0.010590 & 0.011279 \\
|
10 |
+
50\% & 0.046970 & 0.795175 & 0.011699 & 0.012060 \\
|
11 |
+
75\% & 0.056887 & 0.838984 & 0.016232 & 0.013151 \\
|
12 |
+
max & 0.303843 & 1.465686 & 0.026489 & 0.020290 \\
|
13 |
+
\bottomrule
|
14 |
+
\end{tabular}
|
test.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
# %%
|
2 |
-
from datasets import load_dataset
|
3 |
-
from src.retrievers.faiss_retriever import FaissRetriever
|
4 |
-
|
5 |
-
|
6 |
-
data = load_dataset("GroNLP/ik-nlp-22_slp", "paragraphs")
|
7 |
-
|
8 |
-
# # %%
|
9 |
-
# x = data["test"][:3]
|
10 |
-
|
11 |
-
# # %%
|
12 |
-
# for y in x:
|
13 |
-
|
14 |
-
# print(y)
|
15 |
-
# # %%
|
16 |
-
# x.num_rows
|
17 |
-
|
18 |
-
# # %%
|
19 |
-
retriever = FaissRetriever(data)
|
20 |
-
scores, result = retriever.retrieve("hello world")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|