Petr Tsvetkov commited on
Commit
a01d3ba
β€’
1 Parent(s): 073db2c

Pretty-print all the correlations in the visualization app

Browse files
Files changed (3) hide show
  1. analysis.ipynb +1567 -0
  2. analysis_util.py +50 -0
  3. change_visualizer.py +5 -1
analysis.ipynb ADDED
@@ -0,0 +1,1567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2024-05-01T13:07:35.991719Z",
10
+ "start_time": "2024-05-01T13:07:16.672667Z"
11
+ }
12
+ },
13
+ "source": [
14
+ "import pandas as pd\n",
15
+ "\n",
16
+ "import config"
17
+ ],
18
+ "outputs": [
19
+ {
20
+ "name": "stderr",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "D:\\petrtsv\\work\\jetbrains\\commit-rewriting-processing\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
24
+ " from .autonotebook import tqdm as notebook_tqdm\n",
25
+ "[nltk_data] Downloading package wordnet to C:\\Users\\Petr\n",
26
+ "[nltk_data] Tsvetkov\\AppData\\Roaming\\nltk_data...\n",
27
+ "[nltk_data] Package wordnet is already up-to-date!\n",
28
+ "[nltk_data] Downloading package punkt to C:\\Users\\Petr\n",
29
+ "[nltk_data] Tsvetkov\\AppData\\Roaming\\nltk_data...\n",
30
+ "[nltk_data] Package punkt is already up-to-date!\n",
31
+ "[nltk_data] Downloading package omw-1.4 to C:\\Users\\Petr\n",
32
+ "[nltk_data] Tsvetkov\\AppData\\Roaming\\nltk_data...\n",
33
+ "[nltk_data] Package omw-1.4 is already up-to-date!\n"
34
+ ]
35
+ }
36
+ ],
37
+ "execution_count": 20
38
+ },
39
+ {
40
+ "metadata": {
41
+ "ExecuteTime": {
42
+ "end_time": "2024-05-01T12:57:08.596650Z",
43
+ "start_time": "2024-05-01T12:57:08.435650Z"
44
+ }
45
+ },
46
+ "cell_type": "code",
47
+ "source": [
48
+ "df = pd.read_csv(config.SYNTHETIC_DATASET_ARTIFACT, index_col=0)\n",
49
+ "\n",
50
+ "df.head()"
51
+ ],
52
+ "id": "2ac8757a17e62293",
53
+ "outputs": [
54
+ {
55
+ "data": {
56
+ "text/plain": [
57
+ " hash \\\n",
58
+ "0 9a581830e4fa02eed501b4e1f546a2e2ea358e13 \n",
59
+ "1 37067a53c4b3b99982ef8e1f431ba0c9302b66e8 \n",
60
+ "2 82e350064cb8d1622c7cde275567ae594483fe62 \n",
61
+ "3 cf98f5e3705603ae21bef9b0a577bcd001a8c92e \n",
62
+ "4 c17a80f47b772d759aeb0878aa767a768a6fdd0c \n",
63
+ "\n",
64
+ " repo \\\n",
65
+ "0 bitcoinunlimited/bitcoinunlimited \n",
66
+ "1 mesonbuild/meson \n",
67
+ "2 mycroftai/mycroft-core \n",
68
+ "3 mesonbuild/meson \n",
69
+ "4 mesonbuild/meson \n",
70
+ "\n",
71
+ " commit_msg_start \\\n",
72
+ "0 Add extensive test option to parallel RPC test... \n",
73
+ "1 Refactor argument parsing and command executio... \n",
74
+ "2 Add helper functions for disk space management... \n",
75
+ "3 Update path resolution for non-Windows systems... \n",
76
+ "4 Add support for VS2017 architecture detection\\... \n",
77
+ "\n",
78
+ " commit_msg_end \\\n",
79
+ "0 Add new block attack patterns\\n\\n- Added test ... \n",
80
+ "1 Introduce unified argument parsing in meson\\n\\... \n",
81
+ "2 Refactor file_utils.py\\n\\n- Add helper functio... \n",
82
+ "3 Enable loading crossfiles for all platforms ex... \n",
83
+ "4 Add support for VS2017 architecture detection.... \n",
84
+ "\n",
85
+ " session \\\n",
86
+ "0 032e60d7-621a-46b6-972f-7590cfaf6458 \n",
87
+ "1 5d7f1209-4ed9-4620-87ca-975f029c7f6f \n",
88
+ "2 93b1c57c-e56c-4d75-89a6-ae1158b4fa74 \n",
89
+ "3 5d7f1209-4ed9-4620-87ca-975f029c7f6f \n",
90
+ "4 16e57250-21ff-4cdd-ae0d-760cabcc6160 \n",
91
+ "\n",
92
+ " commit_msg_history \\\n",
93
+ "0 [{\"t\": \"-\", \"p\": 4, \"c\": \"e\", \"ts\": \"2024-04-0... \n",
94
+ "1 [] \n",
95
+ "2 [{\"t\": \"+\", \"p\": 0, \"c\": \"R\", \"ts\": \"2024-04-0... \n",
96
+ "3 [] \n",
97
+ "4 [{\"t\": \"-\", \"p\": 45, \"c\": \"\\n\", \"ts\": \"2024-04... \n",
98
+ "\n",
99
+ " loaded_ts submitted_ts edit_time_hist \\\n",
100
+ "0 2024-04-04T19:48:31.180017 2024-04-04T19:50:32.925989 59468.0 \n",
101
+ "1 2024-04-15T16:50:17.208813 2024-04-15T15:29:02.014310 0.0 \n",
102
+ "2 2024-04-04T19:52:38.276314 2024-04-04T19:57:02.449096 133655.0 \n",
103
+ "3 2024-04-15T17:42:14.482856 2024-04-15T15:29:02.014310 0.0 \n",
104
+ "4 2024-04-15T15:47:31.022477 2024-04-15T15:53:08.796895 163218.0 \n",
105
+ "\n",
106
+ " edit_time ... rel_edittime_ind_rouge2_pearson \\\n",
107
+ "0 121745.0 ... 0.281944 \n",
108
+ "1 NaN ... 0.281944 \n",
109
+ "2 264172.0 ... 0.281944 \n",
110
+ "3 NaN ... 0.281944 \n",
111
+ "4 337774.0 ... 0.281944 \n",
112
+ "\n",
113
+ " rel_edittime_ind_rouge2_spearman rel_edittime_ind_rougeL_pearson \\\n",
114
+ "0 0.218822 0.091196 \n",
115
+ "1 0.218822 0.091196 \n",
116
+ "2 0.218822 0.091196 \n",
117
+ "3 0.218822 0.091196 \n",
118
+ "4 0.218822 0.091196 \n",
119
+ "\n",
120
+ " rel_edittime_ind_rougeL_spearman rel_edittime_ind_bertscore_pearson \\\n",
121
+ "0 0.071344 0.158807 \n",
122
+ "1 0.071344 0.158807 \n",
123
+ "2 0.071344 0.158807 \n",
124
+ "3 0.071344 0.158807 \n",
125
+ "4 0.071344 0.158807 \n",
126
+ "\n",
127
+ " rel_edittime_ind_bertscore_spearman rel_edittime_ind_chrF_pearson \\\n",
128
+ "0 0.140481 0.184202 \n",
129
+ "1 0.140481 0.184202 \n",
130
+ "2 0.140481 0.184202 \n",
131
+ "3 0.140481 0.184202 \n",
132
+ "4 0.140481 0.184202 \n",
133
+ "\n",
134
+ " rel_edittime_ind_chrF_spearman rel_edittime_ind_ter_pearson \\\n",
135
+ "0 0.079802 0.062616 \n",
136
+ "1 0.079802 0.062616 \n",
137
+ "2 0.079802 0.062616 \n",
138
+ "3 0.079802 0.062616 \n",
139
+ "4 0.079802 0.062616 \n",
140
+ "\n",
141
+ " rel_edittime_ind_ter_spearman \n",
142
+ "0 0.305601 \n",
143
+ "1 0.305601 \n",
144
+ "2 0.305601 \n",
145
+ "3 0.305601 \n",
146
+ "4 0.305601 \n",
147
+ "\n",
148
+ "[5 rows x 71 columns]"
149
+ ],
150
+ "text/html": [
151
+ "<div>\n",
152
+ "<style scoped>\n",
153
+ " .dataframe tbody tr th:only-of-type {\n",
154
+ " vertical-align: middle;\n",
155
+ " }\n",
156
+ "\n",
157
+ " .dataframe tbody tr th {\n",
158
+ " vertical-align: top;\n",
159
+ " }\n",
160
+ "\n",
161
+ " .dataframe thead th {\n",
162
+ " text-align: right;\n",
163
+ " }\n",
164
+ "</style>\n",
165
+ "<table border=\"1\" class=\"dataframe\">\n",
166
+ " <thead>\n",
167
+ " <tr style=\"text-align: right;\">\n",
168
+ " <th></th>\n",
169
+ " <th>hash</th>\n",
170
+ " <th>repo</th>\n",
171
+ " <th>commit_msg_start</th>\n",
172
+ " <th>commit_msg_end</th>\n",
173
+ " <th>session</th>\n",
174
+ " <th>commit_msg_history</th>\n",
175
+ " <th>loaded_ts</th>\n",
176
+ " <th>submitted_ts</th>\n",
177
+ " <th>edit_time_hist</th>\n",
178
+ " <th>edit_time</th>\n",
179
+ " <th>...</th>\n",
180
+ " <th>rel_edittime_ind_rouge2_pearson</th>\n",
181
+ " <th>rel_edittime_ind_rouge2_spearman</th>\n",
182
+ " <th>rel_edittime_ind_rougeL_pearson</th>\n",
183
+ " <th>rel_edittime_ind_rougeL_spearman</th>\n",
184
+ " <th>rel_edittime_ind_bertscore_pearson</th>\n",
185
+ " <th>rel_edittime_ind_bertscore_spearman</th>\n",
186
+ " <th>rel_edittime_ind_chrF_pearson</th>\n",
187
+ " <th>rel_edittime_ind_chrF_spearman</th>\n",
188
+ " <th>rel_edittime_ind_ter_pearson</th>\n",
189
+ " <th>rel_edittime_ind_ter_spearman</th>\n",
190
+ " </tr>\n",
191
+ " </thead>\n",
192
+ " <tbody>\n",
193
+ " <tr>\n",
194
+ " <th>0</th>\n",
195
+ " <td>9a581830e4fa02eed501b4e1f546a2e2ea358e13</td>\n",
196
+ " <td>bitcoinunlimited/bitcoinunlimited</td>\n",
197
+ " <td>Add extensive test option to parallel RPC test...</td>\n",
198
+ " <td>Add new block attack patterns\\n\\n- Added test ...</td>\n",
199
+ " <td>032e60d7-621a-46b6-972f-7590cfaf6458</td>\n",
200
+ " <td>[{\"t\": \"-\", \"p\": 4, \"c\": \"e\", \"ts\": \"2024-04-0...</td>\n",
201
+ " <td>2024-04-04T19:48:31.180017</td>\n",
202
+ " <td>2024-04-04T19:50:32.925989</td>\n",
203
+ " <td>59468.0</td>\n",
204
+ " <td>121745.0</td>\n",
205
+ " <td>...</td>\n",
206
+ " <td>0.281944</td>\n",
207
+ " <td>0.218822</td>\n",
208
+ " <td>0.091196</td>\n",
209
+ " <td>0.071344</td>\n",
210
+ " <td>0.158807</td>\n",
211
+ " <td>0.140481</td>\n",
212
+ " <td>0.184202</td>\n",
213
+ " <td>0.079802</td>\n",
214
+ " <td>0.062616</td>\n",
215
+ " <td>0.305601</td>\n",
216
+ " </tr>\n",
217
+ " <tr>\n",
218
+ " <th>1</th>\n",
219
+ " <td>37067a53c4b3b99982ef8e1f431ba0c9302b66e8</td>\n",
220
+ " <td>mesonbuild/meson</td>\n",
221
+ " <td>Refactor argument parsing and command executio...</td>\n",
222
+ " <td>Introduce unified argument parsing in meson\\n\\...</td>\n",
223
+ " <td>5d7f1209-4ed9-4620-87ca-975f029c7f6f</td>\n",
224
+ " <td>[]</td>\n",
225
+ " <td>2024-04-15T16:50:17.208813</td>\n",
226
+ " <td>2024-04-15T15:29:02.014310</td>\n",
227
+ " <td>0.0</td>\n",
228
+ " <td>NaN</td>\n",
229
+ " <td>...</td>\n",
230
+ " <td>0.281944</td>\n",
231
+ " <td>0.218822</td>\n",
232
+ " <td>0.091196</td>\n",
233
+ " <td>0.071344</td>\n",
234
+ " <td>0.158807</td>\n",
235
+ " <td>0.140481</td>\n",
236
+ " <td>0.184202</td>\n",
237
+ " <td>0.079802</td>\n",
238
+ " <td>0.062616</td>\n",
239
+ " <td>0.305601</td>\n",
240
+ " </tr>\n",
241
+ " <tr>\n",
242
+ " <th>2</th>\n",
243
+ " <td>82e350064cb8d1622c7cde275567ae594483fe62</td>\n",
244
+ " <td>mycroftai/mycroft-core</td>\n",
245
+ " <td>Add helper functions for disk space management...</td>\n",
246
+ " <td>Refactor file_utils.py\\n\\n- Add helper functio...</td>\n",
247
+ " <td>93b1c57c-e56c-4d75-89a6-ae1158b4fa74</td>\n",
248
+ " <td>[{\"t\": \"+\", \"p\": 0, \"c\": \"R\", \"ts\": \"2024-04-0...</td>\n",
249
+ " <td>2024-04-04T19:52:38.276314</td>\n",
250
+ " <td>2024-04-04T19:57:02.449096</td>\n",
251
+ " <td>133655.0</td>\n",
252
+ " <td>264172.0</td>\n",
253
+ " <td>...</td>\n",
254
+ " <td>0.281944</td>\n",
255
+ " <td>0.218822</td>\n",
256
+ " <td>0.091196</td>\n",
257
+ " <td>0.071344</td>\n",
258
+ " <td>0.158807</td>\n",
259
+ " <td>0.140481</td>\n",
260
+ " <td>0.184202</td>\n",
261
+ " <td>0.079802</td>\n",
262
+ " <td>0.062616</td>\n",
263
+ " <td>0.305601</td>\n",
264
+ " </tr>\n",
265
+ " <tr>\n",
266
+ " <th>3</th>\n",
267
+ " <td>cf98f5e3705603ae21bef9b0a577bcd001a8c92e</td>\n",
268
+ " <td>mesonbuild/meson</td>\n",
269
+ " <td>Update path resolution for non-Windows systems...</td>\n",
270
+ " <td>Enable loading crossfiles for all platforms ex...</td>\n",
271
+ " <td>5d7f1209-4ed9-4620-87ca-975f029c7f6f</td>\n",
272
+ " <td>[]</td>\n",
273
+ " <td>2024-04-15T17:42:14.482856</td>\n",
274
+ " <td>2024-04-15T15:29:02.014310</td>\n",
275
+ " <td>0.0</td>\n",
276
+ " <td>NaN</td>\n",
277
+ " <td>...</td>\n",
278
+ " <td>0.281944</td>\n",
279
+ " <td>0.218822</td>\n",
280
+ " <td>0.091196</td>\n",
281
+ " <td>0.071344</td>\n",
282
+ " <td>0.158807</td>\n",
283
+ " <td>0.140481</td>\n",
284
+ " <td>0.184202</td>\n",
285
+ " <td>0.079802</td>\n",
286
+ " <td>0.062616</td>\n",
287
+ " <td>0.305601</td>\n",
288
+ " </tr>\n",
289
+ " <tr>\n",
290
+ " <th>4</th>\n",
291
+ " <td>c17a80f47b772d759aeb0878aa767a768a6fdd0c</td>\n",
292
+ " <td>mesonbuild/meson</td>\n",
293
+ " <td>Add support for VS2017 architecture detection\\...</td>\n",
294
+ " <td>Add support for VS2017 architecture detection....</td>\n",
295
+ " <td>16e57250-21ff-4cdd-ae0d-760cabcc6160</td>\n",
296
+ " <td>[{\"t\": \"-\", \"p\": 45, \"c\": \"\\n\", \"ts\": \"2024-04...</td>\n",
297
+ " <td>2024-04-15T15:47:31.022477</td>\n",
298
+ " <td>2024-04-15T15:53:08.796895</td>\n",
299
+ " <td>163218.0</td>\n",
300
+ " <td>337774.0</td>\n",
301
+ " <td>...</td>\n",
302
+ " <td>0.281944</td>\n",
303
+ " <td>0.218822</td>\n",
304
+ " <td>0.091196</td>\n",
305
+ " <td>0.071344</td>\n",
306
+ " <td>0.158807</td>\n",
307
+ " <td>0.140481</td>\n",
308
+ " <td>0.184202</td>\n",
309
+ " <td>0.079802</td>\n",
310
+ " <td>0.062616</td>\n",
311
+ " <td>0.305601</td>\n",
312
+ " </tr>\n",
313
+ " </tbody>\n",
314
+ "</table>\n",
315
+ "<p>5 rows Γ— 71 columns</p>\n",
316
+ "</div>"
317
+ ]
318
+ },
319
+ "execution_count": 6,
320
+ "metadata": {},
321
+ "output_type": "execute_result"
322
+ }
323
+ ],
324
+ "execution_count": 6
325
+ },
326
+ {
327
+ "metadata": {
328
+ "ExecuteTime": {
329
+ "end_time": "2024-05-01T13:02:40.761645Z",
330
+ "start_time": "2024-05-01T13:02:40.740647Z"
331
+ }
332
+ },
333
+ "cell_type": "code",
334
+ "source": [
335
+ "rel_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_related\")]\n",
336
+ "rel_metrics"
337
+ ],
338
+ "id": "d19c12dd10b25c75",
339
+ "outputs": [
340
+ {
341
+ "data": {
342
+ "text/plain": [
343
+ "['editdist', 'edittime']"
344
+ ]
345
+ },
346
+ "execution_count": 15,
347
+ "metadata": {},
348
+ "output_type": "execute_result"
349
+ }
350
+ ],
351
+ "execution_count": 15
352
+ },
353
+ {
354
+ "metadata": {
355
+ "ExecuteTime": {
356
+ "end_time": "2024-05-01T13:02:44.072037Z",
357
+ "start_time": "2024-05-01T13:02:44.055039Z"
358
+ }
359
+ },
360
+ "cell_type": "code",
361
+ "source": [
362
+ "ind_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_independent\")]\n",
363
+ "ind_metrics"
364
+ ],
365
+ "id": "79d644cd780b28a1",
366
+ "outputs": [
367
+ {
368
+ "data": {
369
+ "text/plain": [
370
+ "['gptscore-ref-1-req',\n",
371
+ " 'gptscore-noref-1-req',\n",
372
+ " 'editdist',\n",
373
+ " 'bleu',\n",
374
+ " 'meteor',\n",
375
+ " 'rouge1',\n",
376
+ " 'rouge2',\n",
377
+ " 'rougeL',\n",
378
+ " 'bertscore',\n",
379
+ " 'chrF',\n",
380
+ " 'ter']"
381
+ ]
382
+ },
383
+ "execution_count": 16,
384
+ "metadata": {},
385
+ "output_type": "execute_result"
386
+ }
387
+ ],
388
+ "execution_count": 16
389
+ },
390
+ {
391
+ "metadata": {
392
+ "ExecuteTime": {
393
+ "end_time": "2024-05-01T13:03:52.623346Z",
394
+ "start_time": "2024-05-01T13:03:52.577076Z"
395
+ }
396
+ },
397
+ "cell_type": "code",
398
+ "source": [
399
+ "AGGREGATION = {\"hash\": [\"count\"]}\n",
400
+ "\n",
401
+ "for metric in rel_metrics:\n",
402
+ " AGGREGATION[f\"{metric}_related\"] = [\"mean\"]\n",
403
+ "\n",
404
+ "for metric in ind_metrics:\n",
405
+ " AGGREGATION[f\"{metric}_independent\"] = [\"mean\"]\n",
406
+ "\n",
407
+ "df.groupby(by=[\"end_to_start\", \"start_to_end\"]).agg(AGGREGATION)"
408
+ ],
409
+ "id": "fdc5ae636bffbc8b",
410
+ "outputs": [
411
+ {
412
+ "data": {
413
+ "text/plain": [
414
+ " hash editdist_related edittime_related \\\n",
415
+ " count mean mean \n",
416
+ "end_to_start start_to_end \n",
417
+ "False False 43 355.441860 364099.0625 \n",
418
+ " True 129 406.627907 NaN \n",
419
+ "True False 129 433.899225 NaN \n",
420
+ " True 387 444.509044 NaN \n",
421
+ "\n",
422
+ " gptscore-ref-1-req_independent \\\n",
423
+ " mean \n",
424
+ "end_to_start start_to_end \n",
425
+ "False False 7.255814 \n",
426
+ " True 7.217054 \n",
427
+ "True False 7.356589 \n",
428
+ " True 7.312661 \n",
429
+ "\n",
430
+ " gptscore-noref-1-req_independent \\\n",
431
+ " mean \n",
432
+ "end_to_start start_to_end \n",
433
+ "False False 8.116279 \n",
434
+ " True 8.178295 \n",
435
+ "True False 8.302326 \n",
436
+ " True 8.276486 \n",
437
+ "\n",
438
+ " editdist_independent bleu_independent \\\n",
439
+ " mean mean \n",
440
+ "end_to_start start_to_end \n",
441
+ "False False 491.069767 0.012805 \n",
442
+ " True 491.069767 0.012805 \n",
443
+ "True False 534.015504 0.009542 \n",
444
+ " True 534.015504 0.009542 \n",
445
+ "\n",
446
+ " meteor_independent rouge1_independent \\\n",
447
+ " mean mean \n",
448
+ "end_to_start start_to_end \n",
449
+ "False False 0.224961 0.202063 \n",
450
+ " True 0.224961 0.202063 \n",
451
+ "True False 0.221893 0.205151 \n",
452
+ " True 0.221893 0.205151 \n",
453
+ "\n",
454
+ " rouge2_independent rougeL_independent \\\n",
455
+ " mean mean \n",
456
+ "end_to_start start_to_end \n",
457
+ "False False 0.040718 0.136427 \n",
458
+ " True 0.040718 0.136427 \n",
459
+ "True False 0.039033 0.134114 \n",
460
+ " True 0.039033 0.134114 \n",
461
+ "\n",
462
+ " bertscore_independent chrF_independent \\\n",
463
+ " mean mean \n",
464
+ "end_to_start start_to_end \n",
465
+ "False False 0.780266 32.067005 \n",
466
+ " True 0.780266 32.067005 \n",
467
+ "True False 0.777162 31.753065 \n",
468
+ " True 0.777162 31.753065 \n",
469
+ "\n",
470
+ " ter_independent \n",
471
+ " mean \n",
472
+ "end_to_start start_to_end \n",
473
+ "False False 312.732989 \n",
474
+ " True 312.732989 \n",
475
+ "True False 317.717517 \n",
476
+ " True 317.717517 "
477
+ ],
478
+ "text/html": [
479
+ "<div>\n",
480
+ "<style scoped>\n",
481
+ " .dataframe tbody tr th:only-of-type {\n",
482
+ " vertical-align: middle;\n",
483
+ " }\n",
484
+ "\n",
485
+ " .dataframe tbody tr th {\n",
486
+ " vertical-align: top;\n",
487
+ " }\n",
488
+ "\n",
489
+ " .dataframe thead tr th {\n",
490
+ " text-align: left;\n",
491
+ " }\n",
492
+ "\n",
493
+ " .dataframe thead tr:last-of-type th {\n",
494
+ " text-align: right;\n",
495
+ " }\n",
496
+ "</style>\n",
497
+ "<table border=\"1\" class=\"dataframe\">\n",
498
+ " <thead>\n",
499
+ " <tr>\n",
500
+ " <th></th>\n",
501
+ " <th></th>\n",
502
+ " <th>hash</th>\n",
503
+ " <th>editdist_related</th>\n",
504
+ " <th>edittime_related</th>\n",
505
+ " <th>gptscore-ref-1-req_independent</th>\n",
506
+ " <th>gptscore-noref-1-req_independent</th>\n",
507
+ " <th>editdist_independent</th>\n",
508
+ " <th>bleu_independent</th>\n",
509
+ " <th>meteor_independent</th>\n",
510
+ " <th>rouge1_independent</th>\n",
511
+ " <th>rouge2_independent</th>\n",
512
+ " <th>rougeL_independent</th>\n",
513
+ " <th>bertscore_independent</th>\n",
514
+ " <th>chrF_independent</th>\n",
515
+ " <th>ter_independent</th>\n",
516
+ " </tr>\n",
517
+ " <tr>\n",
518
+ " <th></th>\n",
519
+ " <th></th>\n",
520
+ " <th>count</th>\n",
521
+ " <th>mean</th>\n",
522
+ " <th>mean</th>\n",
523
+ " <th>mean</th>\n",
524
+ " <th>mean</th>\n",
525
+ " <th>mean</th>\n",
526
+ " <th>mean</th>\n",
527
+ " <th>mean</th>\n",
528
+ " <th>mean</th>\n",
529
+ " <th>mean</th>\n",
530
+ " <th>mean</th>\n",
531
+ " <th>mean</th>\n",
532
+ " <th>mean</th>\n",
533
+ " <th>mean</th>\n",
534
+ " </tr>\n",
535
+ " <tr>\n",
536
+ " <th>end_to_start</th>\n",
537
+ " <th>start_to_end</th>\n",
538
+ " <th></th>\n",
539
+ " <th></th>\n",
540
+ " <th></th>\n",
541
+ " <th></th>\n",
542
+ " <th></th>\n",
543
+ " <th></th>\n",
544
+ " <th></th>\n",
545
+ " <th></th>\n",
546
+ " <th></th>\n",
547
+ " <th></th>\n",
548
+ " <th></th>\n",
549
+ " <th></th>\n",
550
+ " <th></th>\n",
551
+ " <th></th>\n",
552
+ " </tr>\n",
553
+ " </thead>\n",
554
+ " <tbody>\n",
555
+ " <tr>\n",
556
+ " <th rowspan=\"2\" valign=\"top\">False</th>\n",
557
+ " <th>False</th>\n",
558
+ " <td>43</td>\n",
559
+ " <td>355.441860</td>\n",
560
+ " <td>364099.0625</td>\n",
561
+ " <td>7.255814</td>\n",
562
+ " <td>8.116279</td>\n",
563
+ " <td>491.069767</td>\n",
564
+ " <td>0.012805</td>\n",
565
+ " <td>0.224961</td>\n",
566
+ " <td>0.202063</td>\n",
567
+ " <td>0.040718</td>\n",
568
+ " <td>0.136427</td>\n",
569
+ " <td>0.780266</td>\n",
570
+ " <td>32.067005</td>\n",
571
+ " <td>312.732989</td>\n",
572
+ " </tr>\n",
573
+ " <tr>\n",
574
+ " <th>True</th>\n",
575
+ " <td>129</td>\n",
576
+ " <td>406.627907</td>\n",
577
+ " <td>NaN</td>\n",
578
+ " <td>7.217054</td>\n",
579
+ " <td>8.178295</td>\n",
580
+ " <td>491.069767</td>\n",
581
+ " <td>0.012805</td>\n",
582
+ " <td>0.224961</td>\n",
583
+ " <td>0.202063</td>\n",
584
+ " <td>0.040718</td>\n",
585
+ " <td>0.136427</td>\n",
586
+ " <td>0.780266</td>\n",
587
+ " <td>32.067005</td>\n",
588
+ " <td>312.732989</td>\n",
589
+ " </tr>\n",
590
+ " <tr>\n",
591
+ " <th rowspan=\"2\" valign=\"top\">True</th>\n",
592
+ " <th>False</th>\n",
593
+ " <td>129</td>\n",
594
+ " <td>433.899225</td>\n",
595
+ " <td>NaN</td>\n",
596
+ " <td>7.356589</td>\n",
597
+ " <td>8.302326</td>\n",
598
+ " <td>534.015504</td>\n",
599
+ " <td>0.009542</td>\n",
600
+ " <td>0.221893</td>\n",
601
+ " <td>0.205151</td>\n",
602
+ " <td>0.039033</td>\n",
603
+ " <td>0.134114</td>\n",
604
+ " <td>0.777162</td>\n",
605
+ " <td>31.753065</td>\n",
606
+ " <td>317.717517</td>\n",
607
+ " </tr>\n",
608
+ " <tr>\n",
609
+ " <th>True</th>\n",
610
+ " <td>387</td>\n",
611
+ " <td>444.509044</td>\n",
612
+ " <td>NaN</td>\n",
613
+ " <td>7.312661</td>\n",
614
+ " <td>8.276486</td>\n",
615
+ " <td>534.015504</td>\n",
616
+ " <td>0.009542</td>\n",
617
+ " <td>0.221893</td>\n",
618
+ " <td>0.205151</td>\n",
619
+ " <td>0.039033</td>\n",
620
+ " <td>0.134114</td>\n",
621
+ " <td>0.777162</td>\n",
622
+ " <td>31.753065</td>\n",
623
+ " <td>317.717517</td>\n",
624
+ " </tr>\n",
625
+ " </tbody>\n",
626
+ "</table>\n",
627
+ "</div>"
628
+ ]
629
+ },
630
+ "execution_count": 19,
631
+ "metadata": {},
632
+ "output_type": "execute_result"
633
+ }
634
+ ],
635
+ "execution_count": 19
636
+ },
637
+ {
638
+ "metadata": {
639
+ "ExecuteTime": {
640
+ "end_time": "2024-05-01T13:42:57.052768Z",
641
+ "start_time": "2024-05-01T13:42:56.812556Z"
642
+ }
643
+ },
644
+ "cell_type": "code",
645
+ "source": "",
646
+ "id": "3429b60eab154b79",
647
+ "outputs": [
648
+ {
649
+ "data": {
650
+ "text/plain": [
651
+ " all golden \\\n",
652
+ " spearman pearson spearman pearson \n",
653
+ "relative independent \n",
654
+ "editdist bertscore -0.184962 -0.129057 -0.316215 -0.254700 \n",
655
+ " bleu 0.260118 0.185995 0.269028 0.259690 \n",
656
+ " chrF -0.199200 -0.129029 -0.343201 -0.300656 \n",
657
+ " editdist 0.909934 0.910641 0.710772 0.662808 \n",
658
+ " gptscore-noref-1-req 0.032048 0.055364 0.155510 0.048588 \n",
659
+ " gptscore-ref-1-req 0.024550 0.035295 -0.009830 -0.062574 \n",
660
+ " meteor 0.336016 0.371949 0.068034 0.173237 \n",
661
+ " rouge1 -0.077574 -0.043738 -0.187349 -0.163230 \n",
662
+ " rouge2 0.414256 0.340732 0.276139 0.332087 \n",
663
+ " rougeL 0.006513 -0.008078 -0.041502 -0.034867 \n",
664
+ " ter 0.618095 0.385515 0.575614 0.501385 \n",
665
+ "edittime bertscore 0.140481 0.158807 0.140481 0.158807 \n",
666
+ " bleu 0.302380 0.326167 0.302380 0.326167 \n",
667
+ " chrF 0.079802 0.184202 0.079802 0.184202 \n",
668
+ " editdist 0.252645 0.411131 0.252645 0.411131 \n",
669
+ " gptscore-noref-1-req 0.206465 0.026235 0.206465 0.026235 \n",
670
+ " gptscore-ref-1-req 0.130419 -0.055218 0.130419 -0.055218 \n",
671
+ " meteor 0.253380 0.403564 0.253380 0.403564 \n",
672
+ " rouge1 0.155926 0.136971 0.155926 0.136971 \n",
673
+ " rouge2 0.218822 0.281944 0.218822 0.281944 \n",
674
+ " rougeL 0.071344 0.091196 0.071344 0.091196 \n",
675
+ " ter 0.305601 0.062616 0.305601 0.062616 \n",
676
+ "\n",
677
+ " +s2e +e2s \\\n",
678
+ " spearman pearson spearman pearson \n",
679
+ "relative independent \n",
680
+ "editdist bertscore -0.308494 -0.113525 -0.181393 -0.165924 \n",
681
+ " bleu 0.512841 0.502827 0.109831 0.068138 \n",
682
+ " chrF -0.238124 -0.064922 -0.233123 -0.201726 \n",
683
+ " editdist 0.950494 0.935064 0.861930 0.878118 \n",
684
+ " gptscore-noref-1-req 0.067857 0.047215 -0.029048 -0.013128 \n",
685
+ " gptscore-ref-1-req -0.015178 -0.036001 0.071345 0.087584 \n",
686
+ " meteor 0.203616 0.425775 0.372598 0.360051 \n",
687
+ " rouge1 -0.139874 -0.065543 -0.082093 -0.035603 \n",
688
+ " rouge2 0.523559 0.537560 0.323911 0.282872 \n",
689
+ " rougeL -0.022288 -0.004664 0.012409 0.016372 \n",
690
+ " ter 0.774086 0.462554 0.529338 0.388592 \n",
691
+ "edittime bertscore NaN NaN NaN NaN \n",
692
+ " bleu NaN NaN NaN NaN \n",
693
+ " chrF NaN NaN NaN NaN \n",
694
+ " editdist NaN NaN NaN NaN \n",
695
+ " gptscore-noref-1-req NaN NaN NaN NaN \n",
696
+ " gptscore-ref-1-req NaN NaN NaN NaN \n",
697
+ " meteor NaN NaN NaN NaN \n",
698
+ " rouge1 NaN NaN NaN NaN \n",
699
+ " rouge2 NaN NaN NaN NaN \n",
700
+ " rougeL NaN NaN NaN NaN \n",
701
+ " ter NaN NaN NaN NaN \n",
702
+ "\n",
703
+ " +e2s+s2e \n",
704
+ " spearman pearson \n",
705
+ "relative independent \n",
706
+ "editdist bertscore -0.135421 -0.091748 \n",
707
+ " bleu 0.229712 0.145062 \n",
708
+ " chrF -0.156914 -0.093376 \n",
709
+ " editdist 0.939318 0.962305 \n",
710
+ " gptscore-noref-1-req 0.012102 0.066882 \n",
711
+ " gptscore-ref-1-req 0.013012 0.033618 \n",
712
+ " meteor 0.392262 0.401802 \n",
713
+ " rouge1 -0.054034 -0.030799 \n",
714
+ " rouge2 0.433859 0.324538 \n",
715
+ " rougeL 0.021983 -0.010644 \n",
716
+ " ter 0.591684 0.354459 \n",
717
+ "edittime bertscore NaN NaN \n",
718
+ " bleu NaN NaN \n",
719
+ " chrF NaN NaN \n",
720
+ " editdist NaN NaN \n",
721
+ " gptscore-noref-1-req NaN NaN \n",
722
+ " gptscore-ref-1-req NaN NaN \n",
723
+ " meteor NaN NaN \n",
724
+ " rouge1 NaN NaN \n",
725
+ " rouge2 NaN NaN \n",
726
+ " rougeL NaN NaN \n",
727
+ " ter NaN NaN "
728
+ ],
729
+ "text/html": [
730
+ "<div>\n",
731
+ "<style scoped>\n",
732
+ " .dataframe tbody tr th:only-of-type {\n",
733
+ " vertical-align: middle;\n",
734
+ " }\n",
735
+ "\n",
736
+ " .dataframe tbody tr th {\n",
737
+ " vertical-align: top;\n",
738
+ " }\n",
739
+ "\n",
740
+ " .dataframe thead tr th {\n",
741
+ " text-align: left;\n",
742
+ " }\n",
743
+ "\n",
744
+ " .dataframe thead tr:last-of-type th {\n",
745
+ " text-align: right;\n",
746
+ " }\n",
747
+ "</style>\n",
748
+ "<table border=\"1\" class=\"dataframe\">\n",
749
+ " <thead>\n",
750
+ " <tr>\n",
751
+ " <th></th>\n",
752
+ " <th></th>\n",
753
+ " <th colspan=\"2\" halign=\"left\">all</th>\n",
754
+ " <th colspan=\"2\" halign=\"left\">golden</th>\n",
755
+ " <th colspan=\"2\" halign=\"left\">+s2e</th>\n",
756
+ " <th colspan=\"2\" halign=\"left\">+e2s</th>\n",
757
+ " <th colspan=\"2\" halign=\"left\">+e2s+s2e</th>\n",
758
+ " </tr>\n",
759
+ " <tr>\n",
760
+ " <th></th>\n",
761
+ " <th></th>\n",
762
+ " <th>spearman</th>\n",
763
+ " <th>pearson</th>\n",
764
+ " <th>spearman</th>\n",
765
+ " <th>pearson</th>\n",
766
+ " <th>spearman</th>\n",
767
+ " <th>pearson</th>\n",
768
+ " <th>spearman</th>\n",
769
+ " <th>pearson</th>\n",
770
+ " <th>spearman</th>\n",
771
+ " <th>pearson</th>\n",
772
+ " </tr>\n",
773
+ " <tr>\n",
774
+ " <th>relative</th>\n",
775
+ " <th>independent</th>\n",
776
+ " <th></th>\n",
777
+ " <th></th>\n",
778
+ " <th></th>\n",
779
+ " <th></th>\n",
780
+ " <th></th>\n",
781
+ " <th></th>\n",
782
+ " <th></th>\n",
783
+ " <th></th>\n",
784
+ " <th></th>\n",
785
+ " <th></th>\n",
786
+ " </tr>\n",
787
+ " </thead>\n",
788
+ " <tbody>\n",
789
+ " <tr>\n",
790
+ " <th rowspan=\"11\" valign=\"top\">editdist</th>\n",
791
+ " <th>bertscore</th>\n",
792
+ " <td>-0.184962</td>\n",
793
+ " <td>-0.129057</td>\n",
794
+ " <td>-0.316215</td>\n",
795
+ " <td>-0.254700</td>\n",
796
+ " <td>-0.308494</td>\n",
797
+ " <td>-0.113525</td>\n",
798
+ " <td>-0.181393</td>\n",
799
+ " <td>-0.165924</td>\n",
800
+ " <td>-0.135421</td>\n",
801
+ " <td>-0.091748</td>\n",
802
+ " </tr>\n",
803
+ " <tr>\n",
804
+ " <th>bleu</th>\n",
805
+ " <td>0.260118</td>\n",
806
+ " <td>0.185995</td>\n",
807
+ " <td>0.269028</td>\n",
808
+ " <td>0.259690</td>\n",
809
+ " <td>0.512841</td>\n",
810
+ " <td>0.502827</td>\n",
811
+ " <td>0.109831</td>\n",
812
+ " <td>0.068138</td>\n",
813
+ " <td>0.229712</td>\n",
814
+ " <td>0.145062</td>\n",
815
+ " </tr>\n",
816
+ " <tr>\n",
817
+ " <th>chrF</th>\n",
818
+ " <td>-0.199200</td>\n",
819
+ " <td>-0.129029</td>\n",
820
+ " <td>-0.343201</td>\n",
821
+ " <td>-0.300656</td>\n",
822
+ " <td>-0.238124</td>\n",
823
+ " <td>-0.064922</td>\n",
824
+ " <td>-0.233123</td>\n",
825
+ " <td>-0.201726</td>\n",
826
+ " <td>-0.156914</td>\n",
827
+ " <td>-0.093376</td>\n",
828
+ " </tr>\n",
829
+ " <tr>\n",
830
+ " <th>editdist</th>\n",
831
+ " <td>0.909934</td>\n",
832
+ " <td>0.910641</td>\n",
833
+ " <td>0.710772</td>\n",
834
+ " <td>0.662808</td>\n",
835
+ " <td>0.950494</td>\n",
836
+ " <td>0.935064</td>\n",
837
+ " <td>0.861930</td>\n",
838
+ " <td>0.878118</td>\n",
839
+ " <td>0.939318</td>\n",
840
+ " <td>0.962305</td>\n",
841
+ " </tr>\n",
842
+ " <tr>\n",
843
+ " <th>gptscore-noref-1-req</th>\n",
844
+ " <td>0.032048</td>\n",
845
+ " <td>0.055364</td>\n",
846
+ " <td>0.155510</td>\n",
847
+ " <td>0.048588</td>\n",
848
+ " <td>0.067857</td>\n",
849
+ " <td>0.047215</td>\n",
850
+ " <td>-0.029048</td>\n",
851
+ " <td>-0.013128</td>\n",
852
+ " <td>0.012102</td>\n",
853
+ " <td>0.066882</td>\n",
854
+ " </tr>\n",
855
+ " <tr>\n",
856
+ " <th>gptscore-ref-1-req</th>\n",
857
+ " <td>0.024550</td>\n",
858
+ " <td>0.035295</td>\n",
859
+ " <td>-0.009830</td>\n",
860
+ " <td>-0.062574</td>\n",
861
+ " <td>-0.015178</td>\n",
862
+ " <td>-0.036001</td>\n",
863
+ " <td>0.071345</td>\n",
864
+ " <td>0.087584</td>\n",
865
+ " <td>0.013012</td>\n",
866
+ " <td>0.033618</td>\n",
867
+ " </tr>\n",
868
+ " <tr>\n",
869
+ " <th>meteor</th>\n",
870
+ " <td>0.336016</td>\n",
871
+ " <td>0.371949</td>\n",
872
+ " <td>0.068034</td>\n",
873
+ " <td>0.173237</td>\n",
874
+ " <td>0.203616</td>\n",
875
+ " <td>0.425775</td>\n",
876
+ " <td>0.372598</td>\n",
877
+ " <td>0.360051</td>\n",
878
+ " <td>0.392262</td>\n",
879
+ " <td>0.401802</td>\n",
880
+ " </tr>\n",
881
+ " <tr>\n",
882
+ " <th>rouge1</th>\n",
883
+ " <td>-0.077574</td>\n",
884
+ " <td>-0.043738</td>\n",
885
+ " <td>-0.187349</td>\n",
886
+ " <td>-0.163230</td>\n",
887
+ " <td>-0.139874</td>\n",
888
+ " <td>-0.065543</td>\n",
889
+ " <td>-0.082093</td>\n",
890
+ " <td>-0.035603</td>\n",
891
+ " <td>-0.054034</td>\n",
892
+ " <td>-0.030799</td>\n",
893
+ " </tr>\n",
894
+ " <tr>\n",
895
+ " <th>rouge2</th>\n",
896
+ " <td>0.414256</td>\n",
897
+ " <td>0.340732</td>\n",
898
+ " <td>0.276139</td>\n",
899
+ " <td>0.332087</td>\n",
900
+ " <td>0.523559</td>\n",
901
+ " <td>0.537560</td>\n",
902
+ " <td>0.323911</td>\n",
903
+ " <td>0.282872</td>\n",
904
+ " <td>0.433859</td>\n",
905
+ " <td>0.324538</td>\n",
906
+ " </tr>\n",
907
+ " <tr>\n",
908
+ " <th>rougeL</th>\n",
909
+ " <td>0.006513</td>\n",
910
+ " <td>-0.008078</td>\n",
911
+ " <td>-0.041502</td>\n",
912
+ " <td>-0.034867</td>\n",
913
+ " <td>-0.022288</td>\n",
914
+ " <td>-0.004664</td>\n",
915
+ " <td>0.012409</td>\n",
916
+ " <td>0.016372</td>\n",
917
+ " <td>0.021983</td>\n",
918
+ " <td>-0.010644</td>\n",
919
+ " </tr>\n",
920
+ " <tr>\n",
921
+ " <th>ter</th>\n",
922
+ " <td>0.618095</td>\n",
923
+ " <td>0.385515</td>\n",
924
+ " <td>0.575614</td>\n",
925
+ " <td>0.501385</td>\n",
926
+ " <td>0.774086</td>\n",
927
+ " <td>0.462554</td>\n",
928
+ " <td>0.529338</td>\n",
929
+ " <td>0.388592</td>\n",
930
+ " <td>0.591684</td>\n",
931
+ " <td>0.354459</td>\n",
932
+ " </tr>\n",
933
+ " <tr>\n",
934
+ " <th rowspan=\"11\" valign=\"top\">edittime</th>\n",
935
+ " <th>bertscore</th>\n",
936
+ " <td>0.140481</td>\n",
937
+ " <td>0.158807</td>\n",
938
+ " <td>0.140481</td>\n",
939
+ " <td>0.158807</td>\n",
940
+ " <td>NaN</td>\n",
941
+ " <td>NaN</td>\n",
942
+ " <td>NaN</td>\n",
943
+ " <td>NaN</td>\n",
944
+ " <td>NaN</td>\n",
945
+ " <td>NaN</td>\n",
946
+ " </tr>\n",
947
+ " <tr>\n",
948
+ " <th>bleu</th>\n",
949
+ " <td>0.302380</td>\n",
950
+ " <td>0.326167</td>\n",
951
+ " <td>0.302380</td>\n",
952
+ " <td>0.326167</td>\n",
953
+ " <td>NaN</td>\n",
954
+ " <td>NaN</td>\n",
955
+ " <td>NaN</td>\n",
956
+ " <td>NaN</td>\n",
957
+ " <td>NaN</td>\n",
958
+ " <td>NaN</td>\n",
959
+ " </tr>\n",
960
+ " <tr>\n",
961
+ " <th>chrF</th>\n",
962
+ " <td>0.079802</td>\n",
963
+ " <td>0.184202</td>\n",
964
+ " <td>0.079802</td>\n",
965
+ " <td>0.184202</td>\n",
966
+ " <td>NaN</td>\n",
967
+ " <td>NaN</td>\n",
968
+ " <td>NaN</td>\n",
969
+ " <td>NaN</td>\n",
970
+ " <td>NaN</td>\n",
971
+ " <td>NaN</td>\n",
972
+ " </tr>\n",
973
+ " <tr>\n",
974
+ " <th>editdist</th>\n",
975
+ " <td>0.252645</td>\n",
976
+ " <td>0.411131</td>\n",
977
+ " <td>0.252645</td>\n",
978
+ " <td>0.411131</td>\n",
979
+ " <td>NaN</td>\n",
980
+ " <td>NaN</td>\n",
981
+ " <td>NaN</td>\n",
982
+ " <td>NaN</td>\n",
983
+ " <td>NaN</td>\n",
984
+ " <td>NaN</td>\n",
985
+ " </tr>\n",
986
+ " <tr>\n",
987
+ " <th>gptscore-noref-1-req</th>\n",
988
+ " <td>0.206465</td>\n",
989
+ " <td>0.026235</td>\n",
990
+ " <td>0.206465</td>\n",
991
+ " <td>0.026235</td>\n",
992
+ " <td>NaN</td>\n",
993
+ " <td>NaN</td>\n",
994
+ " <td>NaN</td>\n",
995
+ " <td>NaN</td>\n",
996
+ " <td>NaN</td>\n",
997
+ " <td>NaN</td>\n",
998
+ " </tr>\n",
999
+ " <tr>\n",
1000
+ " <th>gptscore-ref-1-req</th>\n",
1001
+ " <td>0.130419</td>\n",
1002
+ " <td>-0.055218</td>\n",
1003
+ " <td>0.130419</td>\n",
1004
+ " <td>-0.055218</td>\n",
1005
+ " <td>NaN</td>\n",
1006
+ " <td>NaN</td>\n",
1007
+ " <td>NaN</td>\n",
1008
+ " <td>NaN</td>\n",
1009
+ " <td>NaN</td>\n",
1010
+ " <td>NaN</td>\n",
1011
+ " </tr>\n",
1012
+ " <tr>\n",
1013
+ " <th>meteor</th>\n",
1014
+ " <td>0.253380</td>\n",
1015
+ " <td>0.403564</td>\n",
1016
+ " <td>0.253380</td>\n",
1017
+ " <td>0.403564</td>\n",
1018
+ " <td>NaN</td>\n",
1019
+ " <td>NaN</td>\n",
1020
+ " <td>NaN</td>\n",
1021
+ " <td>NaN</td>\n",
1022
+ " <td>NaN</td>\n",
1023
+ " <td>NaN</td>\n",
1024
+ " </tr>\n",
1025
+ " <tr>\n",
1026
+ " <th>rouge1</th>\n",
1027
+ " <td>0.155926</td>\n",
1028
+ " <td>0.136971</td>\n",
1029
+ " <td>0.155926</td>\n",
1030
+ " <td>0.136971</td>\n",
1031
+ " <td>NaN</td>\n",
1032
+ " <td>NaN</td>\n",
1033
+ " <td>NaN</td>\n",
1034
+ " <td>NaN</td>\n",
1035
+ " <td>NaN</td>\n",
1036
+ " <td>NaN</td>\n",
1037
+ " </tr>\n",
1038
+ " <tr>\n",
1039
+ " <th>rouge2</th>\n",
1040
+ " <td>0.218822</td>\n",
1041
+ " <td>0.281944</td>\n",
1042
+ " <td>0.218822</td>\n",
1043
+ " <td>0.281944</td>\n",
1044
+ " <td>NaN</td>\n",
1045
+ " <td>NaN</td>\n",
1046
+ " <td>NaN</td>\n",
1047
+ " <td>NaN</td>\n",
1048
+ " <td>NaN</td>\n",
1049
+ " <td>NaN</td>\n",
1050
+ " </tr>\n",
1051
+ " <tr>\n",
1052
+ " <th>rougeL</th>\n",
1053
+ " <td>0.071344</td>\n",
1054
+ " <td>0.091196</td>\n",
1055
+ " <td>0.071344</td>\n",
1056
+ " <td>0.091196</td>\n",
1057
+ " <td>NaN</td>\n",
1058
+ " <td>NaN</td>\n",
1059
+ " <td>NaN</td>\n",
1060
+ " <td>NaN</td>\n",
1061
+ " <td>NaN</td>\n",
1062
+ " <td>NaN</td>\n",
1063
+ " </tr>\n",
1064
+ " <tr>\n",
1065
+ " <th>ter</th>\n",
1066
+ " <td>0.305601</td>\n",
1067
+ " <td>0.062616</td>\n",
1068
+ " <td>0.305601</td>\n",
1069
+ " <td>0.062616</td>\n",
1070
+ " <td>NaN</td>\n",
1071
+ " <td>NaN</td>\n",
1072
+ " <td>NaN</td>\n",
1073
+ " <td>NaN</td>\n",
1074
+ " <td>NaN</td>\n",
1075
+ " <td>NaN</td>\n",
1076
+ " </tr>\n",
1077
+ " </tbody>\n",
1078
+ "</table>\n",
1079
+ "</div>"
1080
+ ]
1081
+ },
1082
+ "execution_count": 47,
1083
+ "metadata": {},
1084
+ "output_type": "execute_result"
1085
+ }
1086
+ ],
1087
+ "execution_count": 47
1088
+ },
1089
+ {
1090
+ "metadata": {
1091
+ "ExecuteTime": {
1092
+ "end_time": "2024-05-01T13:49:09.514129Z",
1093
+ "start_time": "2024-05-01T13:49:09.295101Z"
1094
+ }
1095
+ },
1096
+ "cell_type": "code",
1097
+ "source": [
1098
+ "from analysis_util import get_ref_only_correlations_for_groups\n",
1099
+ "\n",
1100
+ "get_ref_only_correlations_for_groups(df)"
1101
+ ],
1102
+ "id": "a3531f28722fa5bc",
1103
+ "outputs": [
1104
+ {
1105
+ "data": {
1106
+ "text/plain": [
1107
+ " all golden \\\n",
1108
+ " spearman pearson spearman pearson \n",
1109
+ "relative independent \n",
1110
+ "editdist bertscore -0.184962 -0.129057 -0.316215 -0.254700 \n",
1111
+ " bleu 0.260118 0.185995 0.269028 0.259690 \n",
1112
+ " chrF -0.199200 -0.129029 -0.343201 -0.300656 \n",
1113
+ " editdist 0.909934 0.910641 0.710772 0.662808 \n",
1114
+ " gptscore-noref-1-req 0.032048 0.055364 0.155510 0.048588 \n",
1115
+ " gptscore-ref-1-req 0.024550 0.035295 -0.009830 -0.062574 \n",
1116
+ " meteor 0.336016 0.371949 0.068034 0.173237 \n",
1117
+ " rouge1 -0.077574 -0.043738 -0.187349 -0.163230 \n",
1118
+ " rouge2 0.414256 0.340732 0.276139 0.332087 \n",
1119
+ " rougeL 0.006513 -0.008078 -0.041502 -0.034867 \n",
1120
+ " ter 0.618095 0.385515 0.575614 0.501385 \n",
1121
+ "edittime bertscore 0.140481 0.158807 0.140481 0.158807 \n",
1122
+ " bleu 0.302380 0.326167 0.302380 0.326167 \n",
1123
+ " chrF 0.079802 0.184202 0.079802 0.184202 \n",
1124
+ " editdist 0.252645 0.411131 0.252645 0.411131 \n",
1125
+ " gptscore-noref-1-req 0.206465 0.026235 0.206465 0.026235 \n",
1126
+ " gptscore-ref-1-req 0.130419 -0.055218 0.130419 -0.055218 \n",
1127
+ " meteor 0.253380 0.403564 0.253380 0.403564 \n",
1128
+ " rouge1 0.155926 0.136971 0.155926 0.136971 \n",
1129
+ " rouge2 0.218822 0.281944 0.218822 0.281944 \n",
1130
+ " rougeL 0.071344 0.091196 0.071344 0.091196 \n",
1131
+ " ter 0.305601 0.062616 0.305601 0.062616 \n",
1132
+ "\n",
1133
+ " +s2e +e2s \\\n",
1134
+ " spearman pearson spearman pearson \n",
1135
+ "relative independent \n",
1136
+ "editdist bertscore -0.308494 -0.113525 -0.181393 -0.165924 \n",
1137
+ " bleu 0.512841 0.502827 0.109831 0.068138 \n",
1138
+ " chrF -0.238124 -0.064922 -0.233123 -0.201726 \n",
1139
+ " editdist 0.950494 0.935064 0.861930 0.878118 \n",
1140
+ " gptscore-noref-1-req 0.067857 0.047215 -0.029048 -0.013128 \n",
1141
+ " gptscore-ref-1-req -0.015178 -0.036001 0.071345 0.087584 \n",
1142
+ " meteor 0.203616 0.425775 0.372598 0.360051 \n",
1143
+ " rouge1 -0.139874 -0.065543 -0.082093 -0.035603 \n",
1144
+ " rouge2 0.523559 0.537560 0.323911 0.282872 \n",
1145
+ " rougeL -0.022288 -0.004664 0.012409 0.016372 \n",
1146
+ " ter 0.774086 0.462554 0.529338 0.388592 \n",
1147
+ "edittime bertscore NaN NaN NaN NaN \n",
1148
+ " bleu NaN NaN NaN NaN \n",
1149
+ " chrF NaN NaN NaN NaN \n",
1150
+ " editdist NaN NaN NaN NaN \n",
1151
+ " gptscore-noref-1-req NaN NaN NaN NaN \n",
1152
+ " gptscore-ref-1-req NaN NaN NaN NaN \n",
1153
+ " meteor NaN NaN NaN NaN \n",
1154
+ " rouge1 NaN NaN NaN NaN \n",
1155
+ " rouge2 NaN NaN NaN NaN \n",
1156
+ " rougeL NaN NaN NaN NaN \n",
1157
+ " ter NaN NaN NaN NaN \n",
1158
+ "\n",
1159
+ " +e2s+s2e \n",
1160
+ " spearman pearson \n",
1161
+ "relative independent \n",
1162
+ "editdist bertscore -0.135421 -0.091748 \n",
1163
+ " bleu 0.229712 0.145062 \n",
1164
+ " chrF -0.156914 -0.093376 \n",
1165
+ " editdist 0.939318 0.962305 \n",
1166
+ " gptscore-noref-1-req 0.012102 0.066882 \n",
1167
+ " gptscore-ref-1-req 0.013012 0.033618 \n",
1168
+ " meteor 0.392262 0.401802 \n",
1169
+ " rouge1 -0.054034 -0.030799 \n",
1170
+ " rouge2 0.433859 0.324538 \n",
1171
+ " rougeL 0.021983 -0.010644 \n",
1172
+ " ter 0.591684 0.354459 \n",
1173
+ "edittime bertscore NaN NaN \n",
1174
+ " bleu NaN NaN \n",
1175
+ " chrF NaN NaN \n",
1176
+ " editdist NaN NaN \n",
1177
+ " gptscore-noref-1-req NaN NaN \n",
1178
+ " gptscore-ref-1-req NaN NaN \n",
1179
+ " meteor NaN NaN \n",
1180
+ " rouge1 NaN NaN \n",
1181
+ " rouge2 NaN NaN \n",
1182
+ " rougeL NaN NaN \n",
1183
+ " ter NaN NaN "
1184
+ ],
1185
+ "text/html": [
1186
+ "<div>\n",
1187
+ "<style scoped>\n",
1188
+ " .dataframe tbody tr th:only-of-type {\n",
1189
+ " vertical-align: middle;\n",
1190
+ " }\n",
1191
+ "\n",
1192
+ " .dataframe tbody tr th {\n",
1193
+ " vertical-align: top;\n",
1194
+ " }\n",
1195
+ "\n",
1196
+ " .dataframe thead tr th {\n",
1197
+ " text-align: left;\n",
1198
+ " }\n",
1199
+ "\n",
1200
+ " .dataframe thead tr:last-of-type th {\n",
1201
+ " text-align: right;\n",
1202
+ " }\n",
1203
+ "</style>\n",
1204
+ "<table border=\"1\" class=\"dataframe\">\n",
1205
+ " <thead>\n",
1206
+ " <tr>\n",
1207
+ " <th></th>\n",
1208
+ " <th></th>\n",
1209
+ " <th colspan=\"2\" halign=\"left\">all</th>\n",
1210
+ " <th colspan=\"2\" halign=\"left\">golden</th>\n",
1211
+ " <th colspan=\"2\" halign=\"left\">+s2e</th>\n",
1212
+ " <th colspan=\"2\" halign=\"left\">+e2s</th>\n",
1213
+ " <th colspan=\"2\" halign=\"left\">+e2s+s2e</th>\n",
1214
+ " </tr>\n",
1215
+ " <tr>\n",
1216
+ " <th></th>\n",
1217
+ " <th></th>\n",
1218
+ " <th>spearman</th>\n",
1219
+ " <th>pearson</th>\n",
1220
+ " <th>spearman</th>\n",
1221
+ " <th>pearson</th>\n",
1222
+ " <th>spearman</th>\n",
1223
+ " <th>pearson</th>\n",
1224
+ " <th>spearman</th>\n",
1225
+ " <th>pearson</th>\n",
1226
+ " <th>spearman</th>\n",
1227
+ " <th>pearson</th>\n",
1228
+ " </tr>\n",
1229
+ " <tr>\n",
1230
+ " <th>relative</th>\n",
1231
+ " <th>independent</th>\n",
1232
+ " <th></th>\n",
1233
+ " <th></th>\n",
1234
+ " <th></th>\n",
1235
+ " <th></th>\n",
1236
+ " <th></th>\n",
1237
+ " <th></th>\n",
1238
+ " <th></th>\n",
1239
+ " <th></th>\n",
1240
+ " <th></th>\n",
1241
+ " <th></th>\n",
1242
+ " </tr>\n",
1243
+ " </thead>\n",
1244
+ " <tbody>\n",
1245
+ " <tr>\n",
1246
+ " <th rowspan=\"11\" valign=\"top\">editdist</th>\n",
1247
+ " <th>bertscore</th>\n",
1248
+ " <td>-0.184962</td>\n",
1249
+ " <td>-0.129057</td>\n",
1250
+ " <td>-0.316215</td>\n",
1251
+ " <td>-0.254700</td>\n",
1252
+ " <td>-0.308494</td>\n",
1253
+ " <td>-0.113525</td>\n",
1254
+ " <td>-0.181393</td>\n",
1255
+ " <td>-0.165924</td>\n",
1256
+ " <td>-0.135421</td>\n",
1257
+ " <td>-0.091748</td>\n",
1258
+ " </tr>\n",
1259
+ " <tr>\n",
1260
+ " <th>bleu</th>\n",
1261
+ " <td>0.260118</td>\n",
1262
+ " <td>0.185995</td>\n",
1263
+ " <td>0.269028</td>\n",
1264
+ " <td>0.259690</td>\n",
1265
+ " <td>0.512841</td>\n",
1266
+ " <td>0.502827</td>\n",
1267
+ " <td>0.109831</td>\n",
1268
+ " <td>0.068138</td>\n",
1269
+ " <td>0.229712</td>\n",
1270
+ " <td>0.145062</td>\n",
1271
+ " </tr>\n",
1272
+ " <tr>\n",
1273
+ " <th>chrF</th>\n",
1274
+ " <td>-0.199200</td>\n",
1275
+ " <td>-0.129029</td>\n",
1276
+ " <td>-0.343201</td>\n",
1277
+ " <td>-0.300656</td>\n",
1278
+ " <td>-0.238124</td>\n",
1279
+ " <td>-0.064922</td>\n",
1280
+ " <td>-0.233123</td>\n",
1281
+ " <td>-0.201726</td>\n",
1282
+ " <td>-0.156914</td>\n",
1283
+ " <td>-0.093376</td>\n",
1284
+ " </tr>\n",
1285
+ " <tr>\n",
1286
+ " <th>editdist</th>\n",
1287
+ " <td>0.909934</td>\n",
1288
+ " <td>0.910641</td>\n",
1289
+ " <td>0.710772</td>\n",
1290
+ " <td>0.662808</td>\n",
1291
+ " <td>0.950494</td>\n",
1292
+ " <td>0.935064</td>\n",
1293
+ " <td>0.861930</td>\n",
1294
+ " <td>0.878118</td>\n",
1295
+ " <td>0.939318</td>\n",
1296
+ " <td>0.962305</td>\n",
1297
+ " </tr>\n",
1298
+ " <tr>\n",
1299
+ " <th>gptscore-noref-1-req</th>\n",
1300
+ " <td>0.032048</td>\n",
1301
+ " <td>0.055364</td>\n",
1302
+ " <td>0.155510</td>\n",
1303
+ " <td>0.048588</td>\n",
1304
+ " <td>0.067857</td>\n",
1305
+ " <td>0.047215</td>\n",
1306
+ " <td>-0.029048</td>\n",
1307
+ " <td>-0.013128</td>\n",
1308
+ " <td>0.012102</td>\n",
1309
+ " <td>0.066882</td>\n",
1310
+ " </tr>\n",
1311
+ " <tr>\n",
1312
+ " <th>gptscore-ref-1-req</th>\n",
1313
+ " <td>0.024550</td>\n",
1314
+ " <td>0.035295</td>\n",
1315
+ " <td>-0.009830</td>\n",
1316
+ " <td>-0.062574</td>\n",
1317
+ " <td>-0.015178</td>\n",
1318
+ " <td>-0.036001</td>\n",
1319
+ " <td>0.071345</td>\n",
1320
+ " <td>0.087584</td>\n",
1321
+ " <td>0.013012</td>\n",
1322
+ " <td>0.033618</td>\n",
1323
+ " </tr>\n",
1324
+ " <tr>\n",
1325
+ " <th>meteor</th>\n",
1326
+ " <td>0.336016</td>\n",
1327
+ " <td>0.371949</td>\n",
1328
+ " <td>0.068034</td>\n",
1329
+ " <td>0.173237</td>\n",
1330
+ " <td>0.203616</td>\n",
1331
+ " <td>0.425775</td>\n",
1332
+ " <td>0.372598</td>\n",
1333
+ " <td>0.360051</td>\n",
1334
+ " <td>0.392262</td>\n",
1335
+ " <td>0.401802</td>\n",
1336
+ " </tr>\n",
1337
+ " <tr>\n",
1338
+ " <th>rouge1</th>\n",
1339
+ " <td>-0.077574</td>\n",
1340
+ " <td>-0.043738</td>\n",
1341
+ " <td>-0.187349</td>\n",
1342
+ " <td>-0.163230</td>\n",
1343
+ " <td>-0.139874</td>\n",
1344
+ " <td>-0.065543</td>\n",
1345
+ " <td>-0.082093</td>\n",
1346
+ " <td>-0.035603</td>\n",
1347
+ " <td>-0.054034</td>\n",
1348
+ " <td>-0.030799</td>\n",
1349
+ " </tr>\n",
1350
+ " <tr>\n",
1351
+ " <th>rouge2</th>\n",
1352
+ " <td>0.414256</td>\n",
1353
+ " <td>0.340732</td>\n",
1354
+ " <td>0.276139</td>\n",
1355
+ " <td>0.332087</td>\n",
1356
+ " <td>0.523559</td>\n",
1357
+ " <td>0.537560</td>\n",
1358
+ " <td>0.323911</td>\n",
1359
+ " <td>0.282872</td>\n",
1360
+ " <td>0.433859</td>\n",
1361
+ " <td>0.324538</td>\n",
1362
+ " </tr>\n",
1363
+ " <tr>\n",
1364
+ " <th>rougeL</th>\n",
1365
+ " <td>0.006513</td>\n",
1366
+ " <td>-0.008078</td>\n",
1367
+ " <td>-0.041502</td>\n",
1368
+ " <td>-0.034867</td>\n",
1369
+ " <td>-0.022288</td>\n",
1370
+ " <td>-0.004664</td>\n",
1371
+ " <td>0.012409</td>\n",
1372
+ " <td>0.016372</td>\n",
1373
+ " <td>0.021983</td>\n",
1374
+ " <td>-0.010644</td>\n",
1375
+ " </tr>\n",
1376
+ " <tr>\n",
1377
+ " <th>ter</th>\n",
1378
+ " <td>0.618095</td>\n",
1379
+ " <td>0.385515</td>\n",
1380
+ " <td>0.575614</td>\n",
1381
+ " <td>0.501385</td>\n",
1382
+ " <td>0.774086</td>\n",
1383
+ " <td>0.462554</td>\n",
1384
+ " <td>0.529338</td>\n",
1385
+ " <td>0.388592</td>\n",
1386
+ " <td>0.591684</td>\n",
1387
+ " <td>0.354459</td>\n",
1388
+ " </tr>\n",
1389
+ " <tr>\n",
1390
+ " <th rowspan=\"11\" valign=\"top\">edittime</th>\n",
1391
+ " <th>bertscore</th>\n",
1392
+ " <td>0.140481</td>\n",
1393
+ " <td>0.158807</td>\n",
1394
+ " <td>0.140481</td>\n",
1395
+ " <td>0.158807</td>\n",
1396
+ " <td>NaN</td>\n",
1397
+ " <td>NaN</td>\n",
1398
+ " <td>NaN</td>\n",
1399
+ " <td>NaN</td>\n",
1400
+ " <td>NaN</td>\n",
1401
+ " <td>NaN</td>\n",
1402
+ " </tr>\n",
1403
+ " <tr>\n",
1404
+ " <th>bleu</th>\n",
1405
+ " <td>0.302380</td>\n",
1406
+ " <td>0.326167</td>\n",
1407
+ " <td>0.302380</td>\n",
1408
+ " <td>0.326167</td>\n",
1409
+ " <td>NaN</td>\n",
1410
+ " <td>NaN</td>\n",
1411
+ " <td>NaN</td>\n",
1412
+ " <td>NaN</td>\n",
1413
+ " <td>NaN</td>\n",
1414
+ " <td>NaN</td>\n",
1415
+ " </tr>\n",
1416
+ " <tr>\n",
1417
+ " <th>chrF</th>\n",
1418
+ " <td>0.079802</td>\n",
1419
+ " <td>0.184202</td>\n",
1420
+ " <td>0.079802</td>\n",
1421
+ " <td>0.184202</td>\n",
1422
+ " <td>NaN</td>\n",
1423
+ " <td>NaN</td>\n",
1424
+ " <td>NaN</td>\n",
1425
+ " <td>NaN</td>\n",
1426
+ " <td>NaN</td>\n",
1427
+ " <td>NaN</td>\n",
1428
+ " </tr>\n",
1429
+ " <tr>\n",
1430
+ " <th>editdist</th>\n",
1431
+ " <td>0.252645</td>\n",
1432
+ " <td>0.411131</td>\n",
1433
+ " <td>0.252645</td>\n",
1434
+ " <td>0.411131</td>\n",
1435
+ " <td>NaN</td>\n",
1436
+ " <td>NaN</td>\n",
1437
+ " <td>NaN</td>\n",
1438
+ " <td>NaN</td>\n",
1439
+ " <td>NaN</td>\n",
1440
+ " <td>NaN</td>\n",
1441
+ " </tr>\n",
1442
+ " <tr>\n",
1443
+ " <th>gptscore-noref-1-req</th>\n",
1444
+ " <td>0.206465</td>\n",
1445
+ " <td>0.026235</td>\n",
1446
+ " <td>0.206465</td>\n",
1447
+ " <td>0.026235</td>\n",
1448
+ " <td>NaN</td>\n",
1449
+ " <td>NaN</td>\n",
1450
+ " <td>NaN</td>\n",
1451
+ " <td>NaN</td>\n",
1452
+ " <td>NaN</td>\n",
1453
+ " <td>NaN</td>\n",
1454
+ " </tr>\n",
1455
+ " <tr>\n",
1456
+ " <th>gptscore-ref-1-req</th>\n",
1457
+ " <td>0.130419</td>\n",
1458
+ " <td>-0.055218</td>\n",
1459
+ " <td>0.130419</td>\n",
1460
+ " <td>-0.055218</td>\n",
1461
+ " <td>NaN</td>\n",
1462
+ " <td>NaN</td>\n",
1463
+ " <td>NaN</td>\n",
1464
+ " <td>NaN</td>\n",
1465
+ " <td>NaN</td>\n",
1466
+ " <td>NaN</td>\n",
1467
+ " </tr>\n",
1468
+ " <tr>\n",
1469
+ " <th>meteor</th>\n",
1470
+ " <td>0.253380</td>\n",
1471
+ " <td>0.403564</td>\n",
1472
+ " <td>0.253380</td>\n",
1473
+ " <td>0.403564</td>\n",
1474
+ " <td>NaN</td>\n",
1475
+ " <td>NaN</td>\n",
1476
+ " <td>NaN</td>\n",
1477
+ " <td>NaN</td>\n",
1478
+ " <td>NaN</td>\n",
1479
+ " <td>NaN</td>\n",
1480
+ " </tr>\n",
1481
+ " <tr>\n",
1482
+ " <th>rouge1</th>\n",
1483
+ " <td>0.155926</td>\n",
1484
+ " <td>0.136971</td>\n",
1485
+ " <td>0.155926</td>\n",
1486
+ " <td>0.136971</td>\n",
1487
+ " <td>NaN</td>\n",
1488
+ " <td>NaN</td>\n",
1489
+ " <td>NaN</td>\n",
1490
+ " <td>NaN</td>\n",
1491
+ " <td>NaN</td>\n",
1492
+ " <td>NaN</td>\n",
1493
+ " </tr>\n",
1494
+ " <tr>\n",
1495
+ " <th>rouge2</th>\n",
1496
+ " <td>0.218822</td>\n",
1497
+ " <td>0.281944</td>\n",
1498
+ " <td>0.218822</td>\n",
1499
+ " <td>0.281944</td>\n",
1500
+ " <td>NaN</td>\n",
1501
+ " <td>NaN</td>\n",
1502
+ " <td>NaN</td>\n",
1503
+ " <td>NaN</td>\n",
1504
+ " <td>NaN</td>\n",
1505
+ " <td>NaN</td>\n",
1506
+ " </tr>\n",
1507
+ " <tr>\n",
1508
+ " <th>rougeL</th>\n",
1509
+ " <td>0.071344</td>\n",
1510
+ " <td>0.091196</td>\n",
1511
+ " <td>0.071344</td>\n",
1512
+ " <td>0.091196</td>\n",
1513
+ " <td>NaN</td>\n",
1514
+ " <td>NaN</td>\n",
1515
+ " <td>NaN</td>\n",
1516
+ " <td>NaN</td>\n",
1517
+ " <td>NaN</td>\n",
1518
+ " <td>NaN</td>\n",
1519
+ " </tr>\n",
1520
+ " <tr>\n",
1521
+ " <th>ter</th>\n",
1522
+ " <td>0.305601</td>\n",
1523
+ " <td>0.062616</td>\n",
1524
+ " <td>0.305601</td>\n",
1525
+ " <td>0.062616</td>\n",
1526
+ " <td>NaN</td>\n",
1527
+ " <td>NaN</td>\n",
1528
+ " <td>NaN</td>\n",
1529
+ " <td>NaN</td>\n",
1530
+ " <td>NaN</td>\n",
1531
+ " <td>NaN</td>\n",
1532
+ " </tr>\n",
1533
+ " </tbody>\n",
1534
+ "</table>\n",
1535
+ "</div>"
1536
+ ]
1537
+ },
1538
+ "execution_count": 50,
1539
+ "metadata": {},
1540
+ "output_type": "execute_result"
1541
+ }
1542
+ ],
1543
+ "execution_count": 50
1544
+ }
1545
+ ],
1546
+ "metadata": {
1547
+ "kernelspec": {
1548
+ "display_name": "Python 3",
1549
+ "language": "python",
1550
+ "name": "python3"
1551
+ },
1552
+ "language_info": {
1553
+ "codemirror_mode": {
1554
+ "name": "ipython",
1555
+ "version": 2
1556
+ },
1557
+ "file_extension": ".py",
1558
+ "mimetype": "text/x-python",
1559
+ "name": "python",
1560
+ "nbconvert_exporter": "python",
1561
+ "pygments_lexer": "ipython2",
1562
+ "version": "2.7.6"
1563
+ }
1564
+ },
1565
+ "nbformat": 4,
1566
+ "nbformat_minor": 5
1567
+ }
analysis_util.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from generation_steps.metrics_analysis import correlations_for_group
4
+
5
+
6
+ def split_metrics_string(s):
7
+ tokens = s.split("_")
8
+ return tokens[1], tokens[3]
9
+
10
+
11
+ def get_ref_only_correlations_df(df):
12
+ correlations_raw = correlations_for_group(df)
13
+
14
+ idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index))
15
+
16
+ data = []
17
+ for metrics in idx:
18
+ data.append(
19
+ {"metrics": metrics,
20
+ "spearman": correlations_raw[f"{metrics}_spearman"],
21
+ "pearson": correlations_raw[f"{metrics}_pearson"],
22
+ }
23
+ )
24
+
25
+ result = pd.DataFrame.from_records(data=data, index="metrics").sort_index()
26
+ result.index = pd.MultiIndex.from_tuples(result.index.map(split_metrics_string).tolist())
27
+ result.index.set_names(["relative", "independent"], inplace=True)
28
+
29
+ return result
30
+
31
+
32
+ def get_ref_only_correlations_for_groups(df):
33
+ noref_correlations = {"all": get_ref_only_correlations_df(df)}
34
+
35
+ for e2s in (False, True):
36
+ for s2e in (False, True):
37
+ suffix = ""
38
+ if e2s:
39
+ suffix += "+e2s"
40
+ if s2e:
41
+ suffix += "+s2e"
42
+ if suffix == "":
43
+ suffix = "golden"
44
+
45
+ subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)]
46
+ subdf_noref_corr = get_ref_only_correlations_df(subdf)
47
+ noref_correlations[suffix] = subdf_noref_corr
48
+
49
+ noref_correlations = pd.concat(noref_correlations, axis=1)
50
+ return noref_correlations
change_visualizer.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
 
 
3
  import generate_annotated_diffs
4
  import statistics
5
 
@@ -87,7 +88,7 @@ if __name__ == '__main__':
87
 
88
  slider_synthetic.change(update_dataset_view_synthetic, inputs=slider_synthetic,
89
  outputs=view_synthetic)
90
- with gr.Tab("Compare"):
91
  def layout_for_statistics(statistics_group_name):
92
  gr.Markdown(f"### {statistics_group_name}")
93
  stats = STATISTICS[statistics_group_name]
@@ -106,6 +107,9 @@ if __name__ == '__main__':
106
  with gr.Column(scale=1):
107
  layout_for_statistics("synthetic")
108
 
 
 
 
109
  application.load(update_dataset_view_manual, inputs=slider_manual,
110
  outputs=view_manual)
111
 
 
1
  import gradio as gr
2
 
3
+ import analysis_util
4
  import generate_annotated_diffs
5
  import statistics
6
 
 
88
 
89
  slider_synthetic.change(update_dataset_view_synthetic, inputs=slider_synthetic,
90
  outputs=view_synthetic)
91
+ with gr.Tab("Analysis"):
92
  def layout_for_statistics(statistics_group_name):
93
  gr.Markdown(f"### {statistics_group_name}")
94
  stats = STATISTICS[statistics_group_name]
 
107
  with gr.Column(scale=1):
108
  layout_for_statistics("synthetic")
109
 
110
+ gr.Markdown(f"### Reference-only correlations")
111
+ gr.Markdown(value=analysis_util.get_ref_only_correlations_for_groups(df_synthetic).to_markdown())
112
+
113
  application.load(update_dataset_view_manual, inputs=slider_manual,
114
  outputs=view_manual)
115