Petr Tsvetkov commited on
Commit
39950c9
β€’
1 Parent(s): ca11b66

Fix the visualization

Browse files
analysis.ipynb CHANGED
@@ -5,9 +5,12 @@
5
  "id": "initial_id",
6
  "metadata": {
7
  "collapsed": true,
 
 
 
8
  "ExecuteTime": {
9
- "end_time": "2024-05-01T13:07:35.991719Z",
10
- "start_time": "2024-05-01T13:07:16.672667Z"
11
  }
12
  },
13
  "source": [
@@ -15,41 +18,23 @@
15
  "\n",
16
  "import config"
17
  ],
18
- "outputs": [
19
- {
20
- "name": "stderr",
21
- "output_type": "stream",
22
- "text": [
23
- "D:\\petrtsv\\work\\jetbrains\\commit-rewriting-processing\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
24
- " from .autonotebook import tqdm as notebook_tqdm\n",
25
- "[nltk_data] Downloading package wordnet to C:\\Users\\Petr\n",
26
- "[nltk_data] Tsvetkov\\AppData\\Roaming\\nltk_data...\n",
27
- "[nltk_data] Package wordnet is already up-to-date!\n",
28
- "[nltk_data] Downloading package punkt to C:\\Users\\Petr\n",
29
- "[nltk_data] Tsvetkov\\AppData\\Roaming\\nltk_data...\n",
30
- "[nltk_data] Package punkt is already up-to-date!\n",
31
- "[nltk_data] Downloading package omw-1.4 to C:\\Users\\Petr\n",
32
- "[nltk_data] Tsvetkov\\AppData\\Roaming\\nltk_data...\n",
33
- "[nltk_data] Package omw-1.4 is already up-to-date!\n"
34
- ]
35
- }
36
- ],
37
- "execution_count": 20
38
  },
39
  {
 
 
40
  "metadata": {
41
  "ExecuteTime": {
42
- "end_time": "2024-05-01T12:57:08.596650Z",
43
- "start_time": "2024-05-01T12:57:08.435650Z"
44
  }
45
  },
46
- "cell_type": "code",
47
  "source": [
48
  "df = pd.read_csv(config.SYNTHETIC_DATASET_ARTIFACT, index_col=0)\n",
49
  "\n",
50
  "df.head()"
51
  ],
52
- "id": "2ac8757a17e62293",
53
  "outputs": [
54
  {
55
  "data": {
@@ -316,6 +301,30 @@
316
  "</div>"
317
  ]
318
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  "execution_count": 6,
320
  "metadata": {},
321
  "output_type": "execute_result"
@@ -324,18 +333,15 @@
324
  "execution_count": 6
325
  },
326
  {
 
 
 
327
  "metadata": {
328
  "ExecuteTime": {
329
  "end_time": "2024-05-01T13:02:40.761645Z",
330
  "start_time": "2024-05-01T13:02:40.740647Z"
331
  }
332
  },
333
- "cell_type": "code",
334
- "source": [
335
- "rel_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_related\")]\n",
336
- "rel_metrics"
337
- ],
338
- "id": "d19c12dd10b25c75",
339
  "outputs": [
340
  {
341
  "data": {
@@ -348,21 +354,21 @@
348
  "output_type": "execute_result"
349
  }
350
  ],
351
- "execution_count": 15
 
 
 
352
  },
353
  {
 
 
 
354
  "metadata": {
355
  "ExecuteTime": {
356
  "end_time": "2024-05-01T13:02:44.072037Z",
357
  "start_time": "2024-05-01T13:02:44.055039Z"
358
  }
359
  },
360
- "cell_type": "code",
361
- "source": [
362
- "ind_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_independent\")]\n",
363
- "ind_metrics"
364
- ],
365
- "id": "79d644cd780b28a1",
366
  "outputs": [
367
  {
368
  "data": {
@@ -385,96 +391,24 @@
385
  "output_type": "execute_result"
386
  }
387
  ],
388
- "execution_count": 16
 
 
 
389
  },
390
  {
 
 
 
391
  "metadata": {
392
  "ExecuteTime": {
393
  "end_time": "2024-05-01T13:03:52.623346Z",
394
  "start_time": "2024-05-01T13:03:52.577076Z"
395
  }
396
  },
397
- "cell_type": "code",
398
- "source": [
399
- "AGGREGATION = {\"hash\": [\"count\"]}\n",
400
- "\n",
401
- "for metric in rel_metrics:\n",
402
- " AGGREGATION[f\"{metric}_related\"] = [\"mean\"]\n",
403
- "\n",
404
- "for metric in ind_metrics:\n",
405
- " AGGREGATION[f\"{metric}_independent\"] = [\"mean\"]\n",
406
- "\n",
407
- "df.groupby(by=[\"end_to_start\", \"start_to_end\"]).agg(AGGREGATION)"
408
- ],
409
- "id": "fdc5ae636bffbc8b",
410
  "outputs": [
411
  {
412
  "data": {
413
- "text/plain": [
414
- " hash editdist_related edittime_related \\\n",
415
- " count mean mean \n",
416
- "end_to_start start_to_end \n",
417
- "False False 43 355.441860 364099.0625 \n",
418
- " True 129 406.627907 NaN \n",
419
- "True False 129 433.899225 NaN \n",
420
- " True 387 444.509044 NaN \n",
421
- "\n",
422
- " gptscore-ref-1-req_independent \\\n",
423
- " mean \n",
424
- "end_to_start start_to_end \n",
425
- "False False 7.255814 \n",
426
- " True 7.217054 \n",
427
- "True False 7.356589 \n",
428
- " True 7.312661 \n",
429
- "\n",
430
- " gptscore-noref-1-req_independent \\\n",
431
- " mean \n",
432
- "end_to_start start_to_end \n",
433
- "False False 8.116279 \n",
434
- " True 8.178295 \n",
435
- "True False 8.302326 \n",
436
- " True 8.276486 \n",
437
- "\n",
438
- " editdist_independent bleu_independent \\\n",
439
- " mean mean \n",
440
- "end_to_start start_to_end \n",
441
- "False False 491.069767 0.012805 \n",
442
- " True 491.069767 0.012805 \n",
443
- "True False 534.015504 0.009542 \n",
444
- " True 534.015504 0.009542 \n",
445
- "\n",
446
- " meteor_independent rouge1_independent \\\n",
447
- " mean mean \n",
448
- "end_to_start start_to_end \n",
449
- "False False 0.224961 0.202063 \n",
450
- " True 0.224961 0.202063 \n",
451
- "True False 0.221893 0.205151 \n",
452
- " True 0.221893 0.205151 \n",
453
- "\n",
454
- " rouge2_independent rougeL_independent \\\n",
455
- " mean mean \n",
456
- "end_to_start start_to_end \n",
457
- "False False 0.040718 0.136427 \n",
458
- " True 0.040718 0.136427 \n",
459
- "True False 0.039033 0.134114 \n",
460
- " True 0.039033 0.134114 \n",
461
- "\n",
462
- " bertscore_independent chrF_independent \\\n",
463
- " mean mean \n",
464
- "end_to_start start_to_end \n",
465
- "False False 0.780266 32.067005 \n",
466
- " True 0.780266 32.067005 \n",
467
- "True False 0.777162 31.753065 \n",
468
- " True 0.777162 31.753065 \n",
469
- "\n",
470
- " ter_independent \n",
471
- " mean \n",
472
- "end_to_start start_to_end \n",
473
- "False False 312.732989 \n",
474
- " True 312.732989 \n",
475
- "True False 317.717517 \n",
476
- " True 317.717517 "
477
- ],
478
  "text/html": [
479
  "<div>\n",
480
  "<style scoped>\n",
@@ -625,6 +559,71 @@
625
  " </tbody>\n",
626
  "</table>\n",
627
  "</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
  ]
629
  },
630
  "execution_count": 19,
@@ -632,100 +631,31 @@
632
  "output_type": "execute_result"
633
  }
634
  ],
635
- "execution_count": 19
 
 
 
 
 
 
 
 
 
 
636
  },
637
  {
 
 
 
638
  "metadata": {
639
  "ExecuteTime": {
640
  "end_time": "2024-05-01T13:42:57.052768Z",
641
  "start_time": "2024-05-01T13:42:56.812556Z"
642
  }
643
  },
644
- "cell_type": "code",
645
- "source": "",
646
- "id": "3429b60eab154b79",
647
  "outputs": [
648
  {
649
  "data": {
650
- "text/plain": [
651
- " all golden \\\n",
652
- " spearman pearson spearman pearson \n",
653
- "relative independent \n",
654
- "editdist bertscore -0.184962 -0.129057 -0.316215 -0.254700 \n",
655
- " bleu 0.260118 0.185995 0.269028 0.259690 \n",
656
- " chrF -0.199200 -0.129029 -0.343201 -0.300656 \n",
657
- " editdist 0.909934 0.910641 0.710772 0.662808 \n",
658
- " gptscore-noref-1-req 0.032048 0.055364 0.155510 0.048588 \n",
659
- " gptscore-ref-1-req 0.024550 0.035295 -0.009830 -0.062574 \n",
660
- " meteor 0.336016 0.371949 0.068034 0.173237 \n",
661
- " rouge1 -0.077574 -0.043738 -0.187349 -0.163230 \n",
662
- " rouge2 0.414256 0.340732 0.276139 0.332087 \n",
663
- " rougeL 0.006513 -0.008078 -0.041502 -0.034867 \n",
664
- " ter 0.618095 0.385515 0.575614 0.501385 \n",
665
- "edittime bertscore 0.140481 0.158807 0.140481 0.158807 \n",
666
- " bleu 0.302380 0.326167 0.302380 0.326167 \n",
667
- " chrF 0.079802 0.184202 0.079802 0.184202 \n",
668
- " editdist 0.252645 0.411131 0.252645 0.411131 \n",
669
- " gptscore-noref-1-req 0.206465 0.026235 0.206465 0.026235 \n",
670
- " gptscore-ref-1-req 0.130419 -0.055218 0.130419 -0.055218 \n",
671
- " meteor 0.253380 0.403564 0.253380 0.403564 \n",
672
- " rouge1 0.155926 0.136971 0.155926 0.136971 \n",
673
- " rouge2 0.218822 0.281944 0.218822 0.281944 \n",
674
- " rougeL 0.071344 0.091196 0.071344 0.091196 \n",
675
- " ter 0.305601 0.062616 0.305601 0.062616 \n",
676
- "\n",
677
- " +s2e +e2s \\\n",
678
- " spearman pearson spearman pearson \n",
679
- "relative independent \n",
680
- "editdist bertscore -0.308494 -0.113525 -0.181393 -0.165924 \n",
681
- " bleu 0.512841 0.502827 0.109831 0.068138 \n",
682
- " chrF -0.238124 -0.064922 -0.233123 -0.201726 \n",
683
- " editdist 0.950494 0.935064 0.861930 0.878118 \n",
684
- " gptscore-noref-1-req 0.067857 0.047215 -0.029048 -0.013128 \n",
685
- " gptscore-ref-1-req -0.015178 -0.036001 0.071345 0.087584 \n",
686
- " meteor 0.203616 0.425775 0.372598 0.360051 \n",
687
- " rouge1 -0.139874 -0.065543 -0.082093 -0.035603 \n",
688
- " rouge2 0.523559 0.537560 0.323911 0.282872 \n",
689
- " rougeL -0.022288 -0.004664 0.012409 0.016372 \n",
690
- " ter 0.774086 0.462554 0.529338 0.388592 \n",
691
- "edittime bertscore NaN NaN NaN NaN \n",
692
- " bleu NaN NaN NaN NaN \n",
693
- " chrF NaN NaN NaN NaN \n",
694
- " editdist NaN NaN NaN NaN \n",
695
- " gptscore-noref-1-req NaN NaN NaN NaN \n",
696
- " gptscore-ref-1-req NaN NaN NaN NaN \n",
697
- " meteor NaN NaN NaN NaN \n",
698
- " rouge1 NaN NaN NaN NaN \n",
699
- " rouge2 NaN NaN NaN NaN \n",
700
- " rougeL NaN NaN NaN NaN \n",
701
- " ter NaN NaN NaN NaN \n",
702
- "\n",
703
- " +e2s+s2e \n",
704
- " spearman pearson \n",
705
- "relative independent \n",
706
- "editdist bertscore -0.135421 -0.091748 \n",
707
- " bleu 0.229712 0.145062 \n",
708
- " chrF -0.156914 -0.093376 \n",
709
- " editdist 0.939318 0.962305 \n",
710
- " gptscore-noref-1-req 0.012102 0.066882 \n",
711
- " gptscore-ref-1-req 0.013012 0.033618 \n",
712
- " meteor 0.392262 0.401802 \n",
713
- " rouge1 -0.054034 -0.030799 \n",
714
- " rouge2 0.433859 0.324538 \n",
715
- " rougeL 0.021983 -0.010644 \n",
716
- " ter 0.591684 0.354459 \n",
717
- "edittime bertscore NaN NaN \n",
718
- " bleu NaN NaN \n",
719
- " chrF NaN NaN \n",
720
- " editdist NaN NaN \n",
721
- " gptscore-noref-1-req NaN NaN \n",
722
- " gptscore-ref-1-req NaN NaN \n",
723
- " meteor NaN NaN \n",
724
- " rouge1 NaN NaN \n",
725
- " rouge2 NaN NaN \n",
726
- " rougeL NaN NaN \n",
727
- " ter NaN NaN "
728
- ],
729
  "text/html": [
730
  "<div>\n",
731
  "<style scoped>\n",
@@ -1077,32 +1007,7 @@
1077
  " </tbody>\n",
1078
  "</table>\n",
1079
  "</div>"
1080
- ]
1081
- },
1082
- "execution_count": 47,
1083
- "metadata": {},
1084
- "output_type": "execute_result"
1085
- }
1086
- ],
1087
- "execution_count": 47
1088
- },
1089
- {
1090
- "metadata": {
1091
- "ExecuteTime": {
1092
- "end_time": "2024-05-01T13:49:09.514129Z",
1093
- "start_time": "2024-05-01T13:49:09.295101Z"
1094
- }
1095
- },
1096
- "cell_type": "code",
1097
- "source": [
1098
- "from analysis_util import get_ref_only_correlations_for_groups\n",
1099
- "\n",
1100
- "get_ref_only_correlations_for_groups(df)"
1101
- ],
1102
- "id": "a3531f28722fa5bc",
1103
- "outputs": [
1104
- {
1105
- "data": {
1106
  "text/plain": [
1107
  " all golden \\\n",
1108
  " spearman pearson spearman pearson \n",
@@ -1181,7 +1086,28 @@
1181
  " rouge2 NaN NaN \n",
1182
  " rougeL NaN NaN \n",
1183
  " ter NaN NaN "
1184
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1185
  "text/html": [
1186
  "<div>\n",
1187
  "<style scoped>\n",
@@ -1533,6 +1459,85 @@
1533
  " </tbody>\n",
1534
  "</table>\n",
1535
  "</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1536
  ]
1537
  },
1538
  "execution_count": 50,
@@ -1540,26 +1545,78 @@
1540
  "output_type": "execute_result"
1541
  }
1542
  ],
1543
- "execution_count": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1544
  }
1545
  ],
1546
  "metadata": {
1547
  "kernelspec": {
1548
- "display_name": "Python 3",
1549
  "language": "python",
1550
  "name": "python3"
1551
  },
1552
  "language_info": {
1553
  "codemirror_mode": {
1554
  "name": "ipython",
1555
- "version": 2
1556
  },
1557
  "file_extension": ".py",
1558
  "mimetype": "text/x-python",
1559
  "name": "python",
1560
  "nbconvert_exporter": "python",
1561
- "pygments_lexer": "ipython2",
1562
- "version": "2.7.6"
1563
  }
1564
  },
1565
  "nbformat": 4,
 
5
  "id": "initial_id",
6
  "metadata": {
7
  "collapsed": true,
8
+ "jupyter": {
9
+ "outputs_hidden": true
10
+ },
11
  "ExecuteTime": {
12
+ "end_time": "2024-05-01T15:23:17.507403Z",
13
+ "start_time": "2024-05-01T15:23:17.497406Z"
14
  }
15
  },
16
  "source": [
 
18
  "\n",
19
  "import config"
20
  ],
21
+ "outputs": [],
22
+ "execution_count": 7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  },
24
  {
25
+ "cell_type": "code",
26
+ "id": "2ac8757a17e62293",
27
  "metadata": {
28
  "ExecuteTime": {
29
+ "end_time": "2024-05-01T15:23:19.365525Z",
30
+ "start_time": "2024-05-01T15:23:19.120308Z"
31
  }
32
  },
 
33
  "source": [
34
  "df = pd.read_csv(config.SYNTHETIC_DATASET_ARTIFACT, index_col=0)\n",
35
  "\n",
36
  "df.head()"
37
  ],
 
38
  "outputs": [
39
  {
40
  "data": {
 
301
  "</div>"
302
  ]
303
  },
304
+ "execution_count": 8,
305
+ "metadata": {},
306
+ "output_type": "execute_result"
307
+ }
308
+ ],
309
+ "execution_count": 8
310
+ },
311
+ {
312
+ "metadata": {
313
+ "ExecuteTime": {
314
+ "end_time": "2024-05-01T15:11:08.418257Z",
315
+ "start_time": "2024-05-01T15:11:08.408943Z"
316
+ }
317
+ },
318
+ "cell_type": "code",
319
+ "source": "len(set(df['session'].to_list()))",
320
+ "id": "4bcbc0f1d3d6d248",
321
+ "outputs": [
322
+ {
323
+ "data": {
324
+ "text/plain": [
325
+ "9"
326
+ ]
327
+ },
328
  "execution_count": 6,
329
  "metadata": {},
330
  "output_type": "execute_result"
 
333
  "execution_count": 6
334
  },
335
  {
336
+ "cell_type": "code",
337
+ "execution_count": 15,
338
+ "id": "d19c12dd10b25c75",
339
  "metadata": {
340
  "ExecuteTime": {
341
  "end_time": "2024-05-01T13:02:40.761645Z",
342
  "start_time": "2024-05-01T13:02:40.740647Z"
343
  }
344
  },
 
 
 
 
 
 
345
  "outputs": [
346
  {
347
  "data": {
 
354
  "output_type": "execute_result"
355
  }
356
  ],
357
+ "source": [
358
+ "rel_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_related\")]\n",
359
+ "rel_metrics"
360
+ ]
361
  },
362
  {
363
+ "cell_type": "code",
364
+ "execution_count": 16,
365
+ "id": "79d644cd780b28a1",
366
  "metadata": {
367
  "ExecuteTime": {
368
  "end_time": "2024-05-01T13:02:44.072037Z",
369
  "start_time": "2024-05-01T13:02:44.055039Z"
370
  }
371
  },
 
 
 
 
 
 
372
  "outputs": [
373
  {
374
  "data": {
 
391
  "output_type": "execute_result"
392
  }
393
  ],
394
+ "source": [
395
+ "ind_metrics = [col.split(\"_\")[0] for col in df.columns if col.endswith(\"_independent\")]\n",
396
+ "ind_metrics"
397
+ ]
398
  },
399
  {
400
+ "cell_type": "code",
401
+ "execution_count": 19,
402
+ "id": "fdc5ae636bffbc8b",
403
  "metadata": {
404
  "ExecuteTime": {
405
  "end_time": "2024-05-01T13:03:52.623346Z",
406
  "start_time": "2024-05-01T13:03:52.577076Z"
407
  }
408
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  "outputs": [
410
  {
411
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  "text/html": [
413
  "<div>\n",
414
  "<style scoped>\n",
 
559
  " </tbody>\n",
560
  "</table>\n",
561
  "</div>"
562
+ ],
563
+ "text/plain": [
564
+ " hash editdist_related edittime_related \\\n",
565
+ " count mean mean \n",
566
+ "end_to_start start_to_end \n",
567
+ "False False 43 355.441860 364099.0625 \n",
568
+ " True 129 406.627907 NaN \n",
569
+ "True False 129 433.899225 NaN \n",
570
+ " True 387 444.509044 NaN \n",
571
+ "\n",
572
+ " gptscore-ref-1-req_independent \\\n",
573
+ " mean \n",
574
+ "end_to_start start_to_end \n",
575
+ "False False 7.255814 \n",
576
+ " True 7.217054 \n",
577
+ "True False 7.356589 \n",
578
+ " True 7.312661 \n",
579
+ "\n",
580
+ " gptscore-noref-1-req_independent \\\n",
581
+ " mean \n",
582
+ "end_to_start start_to_end \n",
583
+ "False False 8.116279 \n",
584
+ " True 8.178295 \n",
585
+ "True False 8.302326 \n",
586
+ " True 8.276486 \n",
587
+ "\n",
588
+ " editdist_independent bleu_independent \\\n",
589
+ " mean mean \n",
590
+ "end_to_start start_to_end \n",
591
+ "False False 491.069767 0.012805 \n",
592
+ " True 491.069767 0.012805 \n",
593
+ "True False 534.015504 0.009542 \n",
594
+ " True 534.015504 0.009542 \n",
595
+ "\n",
596
+ " meteor_independent rouge1_independent \\\n",
597
+ " mean mean \n",
598
+ "end_to_start start_to_end \n",
599
+ "False False 0.224961 0.202063 \n",
600
+ " True 0.224961 0.202063 \n",
601
+ "True False 0.221893 0.205151 \n",
602
+ " True 0.221893 0.205151 \n",
603
+ "\n",
604
+ " rouge2_independent rougeL_independent \\\n",
605
+ " mean mean \n",
606
+ "end_to_start start_to_end \n",
607
+ "False False 0.040718 0.136427 \n",
608
+ " True 0.040718 0.136427 \n",
609
+ "True False 0.039033 0.134114 \n",
610
+ " True 0.039033 0.134114 \n",
611
+ "\n",
612
+ " bertscore_independent chrF_independent \\\n",
613
+ " mean mean \n",
614
+ "end_to_start start_to_end \n",
615
+ "False False 0.780266 32.067005 \n",
616
+ " True 0.780266 32.067005 \n",
617
+ "True False 0.777162 31.753065 \n",
618
+ " True 0.777162 31.753065 \n",
619
+ "\n",
620
+ " ter_independent \n",
621
+ " mean \n",
622
+ "end_to_start start_to_end \n",
623
+ "False False 312.732989 \n",
624
+ " True 312.732989 \n",
625
+ "True False 317.717517 \n",
626
+ " True 317.717517 "
627
  ]
628
  },
629
  "execution_count": 19,
 
631
  "output_type": "execute_result"
632
  }
633
  ],
634
+ "source": [
635
+ "AGGREGATION = {\"hash\": [\"count\"]}\n",
636
+ "\n",
637
+ "for metric in rel_metrics:\n",
638
+ " AGGREGATION[f\"{metric}_related\"] = [\"mean\"]\n",
639
+ "\n",
640
+ "for metric in ind_metrics:\n",
641
+ " AGGREGATION[f\"{metric}_independent\"] = [\"mean\"]\n",
642
+ "\n",
643
+ "df.groupby(by=[\"end_to_start\", \"start_to_end\"]).agg(AGGREGATION)"
644
+ ]
645
  },
646
  {
647
+ "cell_type": "code",
648
+ "execution_count": 47,
649
+ "id": "3429b60eab154b79",
650
  "metadata": {
651
  "ExecuteTime": {
652
  "end_time": "2024-05-01T13:42:57.052768Z",
653
  "start_time": "2024-05-01T13:42:56.812556Z"
654
  }
655
  },
 
 
 
656
  "outputs": [
657
  {
658
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
  "text/html": [
660
  "<div>\n",
661
  "<style scoped>\n",
 
1007
  " </tbody>\n",
1008
  "</table>\n",
1009
  "</div>"
1010
+ ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1011
  "text/plain": [
1012
  " all golden \\\n",
1013
  " spearman pearson spearman pearson \n",
 
1086
  " rouge2 NaN NaN \n",
1087
  " rougeL NaN NaN \n",
1088
  " ter NaN NaN "
1089
+ ]
1090
+ },
1091
+ "execution_count": 47,
1092
+ "metadata": {},
1093
+ "output_type": "execute_result"
1094
+ }
1095
+ ],
1096
+ "source": []
1097
+ },
1098
+ {
1099
+ "cell_type": "code",
1100
+ "execution_count": 50,
1101
+ "id": "a3531f28722fa5bc",
1102
+ "metadata": {
1103
+ "ExecuteTime": {
1104
+ "end_time": "2024-05-01T13:49:09.514129Z",
1105
+ "start_time": "2024-05-01T13:49:09.295101Z"
1106
+ }
1107
+ },
1108
+ "outputs": [
1109
+ {
1110
+ "data": {
1111
  "text/html": [
1112
  "<div>\n",
1113
  "<style scoped>\n",
 
1459
  " </tbody>\n",
1460
  "</table>\n",
1461
  "</div>"
1462
+ ],
1463
+ "text/plain": [
1464
+ " all golden \\\n",
1465
+ " spearman pearson spearman pearson \n",
1466
+ "relative independent \n",
1467
+ "editdist bertscore -0.184962 -0.129057 -0.316215 -0.254700 \n",
1468
+ " bleu 0.260118 0.185995 0.269028 0.259690 \n",
1469
+ " chrF -0.199200 -0.129029 -0.343201 -0.300656 \n",
1470
+ " editdist 0.909934 0.910641 0.710772 0.662808 \n",
1471
+ " gptscore-noref-1-req 0.032048 0.055364 0.155510 0.048588 \n",
1472
+ " gptscore-ref-1-req 0.024550 0.035295 -0.009830 -0.062574 \n",
1473
+ " meteor 0.336016 0.371949 0.068034 0.173237 \n",
1474
+ " rouge1 -0.077574 -0.043738 -0.187349 -0.163230 \n",
1475
+ " rouge2 0.414256 0.340732 0.276139 0.332087 \n",
1476
+ " rougeL 0.006513 -0.008078 -0.041502 -0.034867 \n",
1477
+ " ter 0.618095 0.385515 0.575614 0.501385 \n",
1478
+ "edittime bertscore 0.140481 0.158807 0.140481 0.158807 \n",
1479
+ " bleu 0.302380 0.326167 0.302380 0.326167 \n",
1480
+ " chrF 0.079802 0.184202 0.079802 0.184202 \n",
1481
+ " editdist 0.252645 0.411131 0.252645 0.411131 \n",
1482
+ " gptscore-noref-1-req 0.206465 0.026235 0.206465 0.026235 \n",
1483
+ " gptscore-ref-1-req 0.130419 -0.055218 0.130419 -0.055218 \n",
1484
+ " meteor 0.253380 0.403564 0.253380 0.403564 \n",
1485
+ " rouge1 0.155926 0.136971 0.155926 0.136971 \n",
1486
+ " rouge2 0.218822 0.281944 0.218822 0.281944 \n",
1487
+ " rougeL 0.071344 0.091196 0.071344 0.091196 \n",
1488
+ " ter 0.305601 0.062616 0.305601 0.062616 \n",
1489
+ "\n",
1490
+ " +s2e +e2s \\\n",
1491
+ " spearman pearson spearman pearson \n",
1492
+ "relative independent \n",
1493
+ "editdist bertscore -0.308494 -0.113525 -0.181393 -0.165924 \n",
1494
+ " bleu 0.512841 0.502827 0.109831 0.068138 \n",
1495
+ " chrF -0.238124 -0.064922 -0.233123 -0.201726 \n",
1496
+ " editdist 0.950494 0.935064 0.861930 0.878118 \n",
1497
+ " gptscore-noref-1-req 0.067857 0.047215 -0.029048 -0.013128 \n",
1498
+ " gptscore-ref-1-req -0.015178 -0.036001 0.071345 0.087584 \n",
1499
+ " meteor 0.203616 0.425775 0.372598 0.360051 \n",
1500
+ " rouge1 -0.139874 -0.065543 -0.082093 -0.035603 \n",
1501
+ " rouge2 0.523559 0.537560 0.323911 0.282872 \n",
1502
+ " rougeL -0.022288 -0.004664 0.012409 0.016372 \n",
1503
+ " ter 0.774086 0.462554 0.529338 0.388592 \n",
1504
+ "edittime bertscore NaN NaN NaN NaN \n",
1505
+ " bleu NaN NaN NaN NaN \n",
1506
+ " chrF NaN NaN NaN NaN \n",
1507
+ " editdist NaN NaN NaN NaN \n",
1508
+ " gptscore-noref-1-req NaN NaN NaN NaN \n",
1509
+ " gptscore-ref-1-req NaN NaN NaN NaN \n",
1510
+ " meteor NaN NaN NaN NaN \n",
1511
+ " rouge1 NaN NaN NaN NaN \n",
1512
+ " rouge2 NaN NaN NaN NaN \n",
1513
+ " rougeL NaN NaN NaN NaN \n",
1514
+ " ter NaN NaN NaN NaN \n",
1515
+ "\n",
1516
+ " +e2s+s2e \n",
1517
+ " spearman pearson \n",
1518
+ "relative independent \n",
1519
+ "editdist bertscore -0.135421 -0.091748 \n",
1520
+ " bleu 0.229712 0.145062 \n",
1521
+ " chrF -0.156914 -0.093376 \n",
1522
+ " editdist 0.939318 0.962305 \n",
1523
+ " gptscore-noref-1-req 0.012102 0.066882 \n",
1524
+ " gptscore-ref-1-req 0.013012 0.033618 \n",
1525
+ " meteor 0.392262 0.401802 \n",
1526
+ " rouge1 -0.054034 -0.030799 \n",
1527
+ " rouge2 0.433859 0.324538 \n",
1528
+ " rougeL 0.021983 -0.010644 \n",
1529
+ " ter 0.591684 0.354459 \n",
1530
+ "edittime bertscore NaN NaN \n",
1531
+ " bleu NaN NaN \n",
1532
+ " chrF NaN NaN \n",
1533
+ " editdist NaN NaN \n",
1534
+ " gptscore-noref-1-req NaN NaN \n",
1535
+ " gptscore-ref-1-req NaN NaN \n",
1536
+ " meteor NaN NaN \n",
1537
+ " rouge1 NaN NaN \n",
1538
+ " rouge2 NaN NaN \n",
1539
+ " rougeL NaN NaN \n",
1540
+ " ter NaN NaN "
1541
  ]
1542
  },
1543
  "execution_count": 50,
 
1545
  "output_type": "execute_result"
1546
  }
1547
  ],
1548
+ "source": [
1549
+ "from analysis_util import get_correlations_for_groups\n",
1550
+ "\n",
1551
+ "get_correlations_for_groups(df, right_side=\"ind\")"
1552
+ ]
1553
+ },
1554
+ {
1555
+ "cell_type": "code",
1556
+ "execution_count": null,
1557
+ "id": "d5dc33a4251baf9a",
1558
+ "metadata": {},
1559
+ "outputs": [],
1560
+ "source": [
1561
+ "get_correlations_for_groups(df, right_side=\"aggr\")"
1562
+ ]
1563
+ },
1564
+ {
1565
+ "metadata": {
1566
+ "ExecuteTime": {
1567
+ "end_time": "2024-05-01T15:25:18.226195Z",
1568
+ "start_time": "2024-05-01T15:25:17.464762Z"
1569
+ }
1570
+ },
1571
+ "cell_type": "code",
1572
+ "source": [
1573
+ "from matplotlib import pyplot as plt\n",
1574
+ "\n",
1575
+ "plt.scatter(x=df['edittime_related'], y=df['editdist_related'])"
1576
+ ],
1577
+ "id": "5df60ac60034b274",
1578
+ "outputs": [
1579
+ {
1580
+ "data": {
1581
+ "text/plain": [
1582
+ "<matplotlib.collections.PathCollection at 0x17c179da970>"
1583
+ ]
1584
+ },
1585
+ "execution_count": 11,
1586
+ "metadata": {},
1587
+ "output_type": "execute_result"
1588
+ },
1589
+ {
1590
+ "data": {
1591
+ "text/plain": [
1592
+ "<Figure size 640x480 with 1 Axes>"
1593
+ ],
1594
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGvCAYAAABxUC54AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAuyklEQVR4nO3df3TU1Z3/8dckkAxiMjTYZCYVNaUVGYP8UIMRbbUGSaU55UjbpV/wsPUHbgqu+LOwu5pNtUZcf3T9UbCuBW20tG4PuqEapVCh1UhsIltCWLSYBZRMYk2ZIG4CzHy+f8SZMuTXzGQyczPzfJwz58Bn7kzen8+ZM/M6937uvTbLsiwBAAAYJC3RBQAAAJyMgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMM6oRBcQDb/fr4MHDyorK0s2my3R5QAAgDBYlqXDhw8rPz9faWkD95GMyIBy8OBBTZgwIdFlAACAKBw4cECnn376gG1GZEDJysqS1HOC2dnZCa4GAACEo7OzUxMmTAj+jg9kRAaUwLBOdnY2AQUAgBEmnNszuEkWAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADDOiFyoDYnl81uqb+lQ++Eu5WbZVVSQo/Q09kQCAMQOAQURqW1qVWVNs1q9XcFjLoddFWVulRa6ElgZACCZMMSDsNU2taq8ujEknEiSx9ul8upG1Ta1JqgyAECyIaAgLD6/pcqaZll9PBc4VlnTLJ+/rxYAAESGgIKw1Ld09Oo5OZElqdXbpfqWjvgVBQBIWgQUhKX9cP/hJJp2AAAMhICCsORm2WPaDgCAgRBQEJaighy5HHb1N5nYpp7ZPEUFOfEsCwCQpAgoCEt6mk0VZW5J6hVSAv+vKHOzHgoAICYIKAhbaaFLqxfNkNMROozjdNi1etEM1kEBAMQMC7UhIqWFLs12O1lJFgAwrAgoiFh6mk3FE8cnugwAQBJjiAcAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYJ6KA4vP5dNddd6mgoEBjxozRxIkTdc8998iyrGAby7J09913y+VyacyYMSopKdF7770X8j4dHR1auHChsrOzNW7cOF133XX65JNPYnNGAABgxIsooKxatUqrV6/W448/rt27d2vVqlV64IEH9NhjjwXbPPDAA3r00Ue1Zs0abd++XWPHjtWcOXPU1dUVbLNw4ULt2rVLmzZt0saNG7Vt2zYtWbIkdmcFAABGNJt1YvfHIL7xjW8oLy9PTz/9dPDY/PnzNWbMGFVXV8uyLOXn5+u2227T7bffLknyer3Ky8vTunXrtGDBAu3evVtut1tvv/22LrjgAklSbW2trrrqKn3wwQfKz88ftI7Ozk45HA55vV5lZ2dHes5G8Pkt1bd0qP1wl3Kz7CoqyFF6mi3RZQEAMGwi+f2OqAfl4osv1ubNm/Xuu+9Kkv77v/9bf/jDH/T1r39dktTS0iKPx6OSkpLgaxwOh2bOnKm6ujpJUl1dncaNGxcMJ5JUUlKitLQ0bd++vc+/293drc7OzpDHSFbb1KpLVm3Rd596Szev36HvPvWWLlm1RbVNrYkuDQAAI0QUUFasWKEFCxbonHPO0ejRozV9+nQtX75cCxculCR5PB5JUl5eXsjr8vLygs95PB7l5uaGPD9q1Cjl5OQE25ysqqpKDocj+JgwYUIkZRultqlV5dWNavV2hRz3eLtUXt1ISAEAQBEGlF/96ld67rnn9Pzzz6uxsVHPPPOMHnzwQT3zzDPDVZ8kaeXKlfJ6vcHHgQMHhvXvDRef31JlTbP6GlMLHKusaZbPH/aoGwAASWlUJI3vuOOOYC+KJE2ZMkX79u1TVVWVFi9eLKfTKUlqa2uTy+UKvq6trU3Tpk2TJDmdTrW3t4e87/Hjx9XR0RF8/ckyMzOVmZkZSalGqm/p6NVzciJLUqu3S/UtHSqeOD5+hQEAYJiIelA+/fRTpaWFviQ9PV1+v1+SVFBQIKfTqc2bNwef7+zs1Pbt21VcXCxJKi4u1qFDh9TQ0BBss2XLFvn9fs2cOTPqExkJ2g/3H06iaQcAQLKKqAelrKxMP/rRj3TGGWfo3HPP1TvvvKOHH35Y1157rSTJZrNp+fLluvfee/XlL39ZBQUFuuuuu5Sfn6958+ZJkiZPnqzS0lLdcMMNWrNmjY4dO6Zly5ZpwYIFYc3gGclys+wxbQcAQLKKKKA89thjuuuuu/T9739f7e3tys/P14033qi777472ObOO+/UkSNHtGTJEh06dEiXXHKJamtrZbf/7Uf3ueee07Jly3TFFVcoLS1N8+fP16OPPhq7szJUUUGOXA67PN6uPu9DsUlyOnqmHAMAkMoiWgfFFCN5HZTALB5JISElsALK6kUzVFro6vU6AABGumFbBwVDV1ro0upFM+R0hA7jOB12wgkAAJ+JaIgHsVFa6NJst5OVZAEA6AcBJUHS02xMJQYAoB8M8QAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxRiW6AMSXz2+pvqVD7Ye7lJtlV1FBjtLTbIkuCwCAEASUFFLb1KrKmma1eruCx1wOuyrK3CotdCWwMgAAQjHEkyJqm1pVXt0YEk4kyePtUnl1o2qbWhNUGQAAvRFQUoDPb6mypllWH88FjlXWNMvn76sFAADxR0BJAfUtHb16Tk5kSWr1dqm+pSN+RQEAMAACSgpoP9x/OImmHQAAw42AkgJys+wxbQcAwHAjoKSAooIcuRx29TeZ2Kae2TxFBTnxLAsAgH4RUFJAeppNFWVuSeoVUgL/ryhzsx4KAMAYBJQUUVro0upFM+R0hA7jOB12rV40g3VQAABGYaG2FFJa6NJst5OVZAEAxiOgpJj0NJuKJ45PdBkAAAyIIR4AAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBxWkg2Tz2+xRDwAAHFCQAlDbVOrKmua1ertCh5zOeyqKHOzyR4AAMOAIZ5B1Da1qry6MSScSJLH26Xy6kbVNrUmqDIAAJIXAWUAPr+lyppmWX08FzhWWdMsn7+vFgAAIFoElAHUt3T06jk5kSWp1dul+paO+BUFAEAKIKAMoP1w/+EkmnYAACA8BJQB5GbZY9oOAACEh4AygKKCHLkcdvU3mdimntk8RQU58SwLAICkR0AZQHqaTRVlbknqFVIC/68oc7MeCgAAMUZAGURpoUurF82Q0xE6jON02LV60QzWQQEAYBiwUFsYSgtdmu12spIsAABxQkAJU3qaTcUTxye6DAAAUgJDPAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYJ+KA8uGHH2rRokUaP368xowZoylTpuiPf/xj8HnLsnT33XfL5XJpzJgxKikp0XvvvRfyHh0dHVq4cKGys7M1btw4XXfddfrkk0+GfjYAACApRBRQ/vrXv2rWrFkaPXq0XnnlFTU3N+uhhx7S5z73uWCbBx54QI8++qjWrFmj7du3a+zYsZozZ466urqCbRYuXKhdu3Zp06ZN2rhxo7Zt26YlS5bE7qwAAMCIZrMsywq38YoVK/TGG2/o97//fZ/PW5al/Px83Xbbbbr99tslSV6vV3l5eVq3bp0WLFig3bt3y+126+2339YFF1wgSaqtrdVVV12lDz74QPn5+YPW0dnZKYfDIa/Xq+zs7HDLBwAACRTJ73dEPSj/9V//pQsuuEDf/va3lZubq+nTp+upp54KPt/S0iKPx6OSkpLgMYfDoZkzZ6qurk6SVFdXp3HjxgXDiSSVlJQoLS1N27dv7/Pvdnd3q7OzM+QxEvj8lur2fqyXdnyour0fy+cPOwsCAJDSRkXS+P3339fq1at166236p/+6Z/09ttv6x//8R+VkZGhxYsXy+PxSJLy8vJCXpeXlxd8zuPxKDc3N7SIUaOUk5MTbHOyqqoqVVZWRlJqwtU2taqyplmt3r8NbbkcdlWUuVVa6EpgZQAAmC+iHhS/368ZM2bovvvu0/Tp07VkyRLdcMMNWrNmzXDVJ0lauXKlvF5v8HHgwIFh/XtDVdvUqvLqxpBwIkkeb5fKqxtV29SaoMoAABgZIgooLpdLbrc75NjkyZO1f/9+SZLT6ZQktbW1hbRpa2sLPud0OtXe3h7y/PHjx9XR0RFsc7LMzExlZ2eHPEzl81uqrGlWX4M5gWOVNc0M9wAAMICIAsqsWbO0Z8+ekGPvvvuuzjzzTElSQUGBnE6nNm/eHHy+s7NT27dvV3FxsSSpuLhYhw4dUkNDQ7DNli1b5Pf7NXPmzKhPxBT1LR29ek5OZElq9XapvqUjfkUBADDCRHQPyi233KKLL75Y9913n77zne+ovr5eP/3pT/XTn/5UkmSz2bR8+XLde++9+vKXv6yCggLdddddys/P17x58yT19LiUlpYGh4aOHTumZcuWacGCBWHN4DFd++H+w0k07QAASEURBZQLL7xQGzZs0MqVK/XDH/5QBQUF+vGPf6yFCxcG29x55506cuSIlixZokOHDumSSy5RbW2t7HZ7sM1zzz2nZcuW6YorrlBaWprmz5+vRx99NHZnlUC5WfbBG0XQDgCAVBTROiimMHkdFJ/f0iWrtsjj7erzPhSbJKfDrj/84GtKT7PFuzwAABJm2NZBweDS02yqKOu5kfjk+BH4f0WZm3ACAMAACCjDoLTQpdWLZsjpCB3GcTrsWr1oBuugAAAwiIjuQUH4Sgtdmu12qr6lQ+2Hu5SbZVdRQQ49JwAAhIGAMozS02wqnjg+0WUAADDiMMQDAACMQ0ABAADGIaAAAADjEFAAAIBxuEnWAD6/xWwfAABOQEBJsNqmVlXWNIdsMOhy2FVR5ma9FABAymKIJ4Fqm1pVXt3Ya/djj7dL5dWNqm1qTVBlAAAkFgElQXx+S5U1zX3u1xM4VlnTLJ9/xG2VBADAkBFQEqS+paNXz8mJLEmt3i7Vt3TErygAAAxBQEmQ9sP9h5No2gEAkEwIKAmSm2UfvFEE7QAASCYElAQpKsiRy2FXf5OJbeqZzVNUkBPPsgAAMAIBJUHS02yqKHNLUq+QEvh/RZmb9VAAACmJgJJApYUurV40Q05H6DCO02HX6kUzWAcFAJCyWKgtwUoLXZrtdrKSLAAAJyCgGCA9zabiieMTXQYAAMZgiAcAABiHgAIAAIxDQAEAAMbhHpQ++PwWN60CAJBABJST1Da1qrKmOWSfHJfDrooyN9N+AQCIE4Z4TlDb1Kry6sZem/h5vF0qr25UbVNrgioDACC1EFA+4/NbqqxpltXHc4FjlTXN8vn7agEAAGKJgPKZ+paOXj0nJ7IktXq7VN/SEb+iAABIUQSUz7Qf7j+cRNMOAABEj4Dymdws++CNImgHAACixyyezxQV5MjlsMvj7erzPhSbejbxKyrIiXdpA2JKNAAgGRFQPpOeZlNFmVvl1Y2ySSEhJfBzX1HmNurHnynRAIBkxRDPCUoLXVq9aIacjtBhHKfDrtWLZhj1o8+UaABAMqMH5SSlhS7NdjuNHjYZbEq0TT1Tome7nUbVDQBAuAgofUhPs6l44vhEl9GvSKZEm3weAAD0h4ASAVNuSGVKNAAg2RFQwmTSDalMiQYAJDtukg2DaTekBqZE99d3Y1NPeDJtSjQAAOEioAzCxD16AlOiJfUKKSdOiZakur0f66UdH6pu78fsIwQAGDEY4hmEqTekBqZEnzzs5Pxs2EmSLlm1xYghKQAAIkVAGYTJN6T2NyV6U7NH5dWNvXp9AkNSpq3pAgDAyQgogzDhhtSBZg+dPCWaNVIAAMmAgDKIRO/RE+nsIVOHpAAAiAQ3yQ4i3BtSh6M3IprZQyYPSQEAEC4CShgSsUdPtLOHTBiSAgBgqBjiCVO89+iJdqgm0UNSAADEAgElAvHcoyfaoZrAkFR5daNsUkhIGe4hKQAAYoUhnhjw+a2YL4g2lKGaRAxJAQAQS/SgDNFw7dEz1KGaeA9JAQAQS/SgDMFw7tETi9lDgSGpb077goonjiecAABGDAJKlOKxRw9DNQCAVMUQT5TitSAaQzUAgFREQIlSPBdEi+fsIQAATMAQT5RYEA0AgOFDQIlSYJZNfwMtNvXM5mFBNAAAIkdAiVIi9+gBACDZEVCGgFk2AAAMD26SHSJm2QAAEHsElBhglg0AALHFEA8AADAOAQUAABiHgAIAAIzDPSgG8fktbrYFAEAEFGPUNrWqsqY5ZH8fl8OuijI305UBACmHIZ4h8vkt1e39WC/t+FB1ez+Oavfi2qZWlVc39tp80OPtUnl1o2qbWmNVLgAAIwI9KEMQi14Pn99SZU2z+oo1lnpWpa2sadZst5PhHgBAyhhSD8r9998vm82m5cuXB491dXVp6dKlGj9+vE499VTNnz9fbW1tIa/bv3+/5s6dq1NOOUW5ubm64447dPz48aGUEnex6vWob+no9R4nsiS1ertU39IxlHIBABhRog4ob7/9tp588kmdd955IcdvueUW1dTU6IUXXtDWrVt18OBBXX311cHnfT6f5s6dq6NHj+rNN9/UM888o3Xr1unuu++O/izibLBeD6mn16O/4Z4Th4Xe+PNfwvqb7Yf7DzEAACSbqIZ4PvnkEy1cuFBPPfWU7r333uBxr9erp59+Ws8//7y+9rWvSZLWrl2ryZMn66233tJFF12k1157Tc3Nzfrtb3+rvLw8TZs2Tffcc49+8IMf6F//9V+VkZERmzMbRpH0epy8wmxfw0LhyM2yD94IAIAkEVUPytKlSzV37lyVlJSEHG9oaNCxY8dCjp9zzjk644wzVFdXJ0mqq6vTlClTlJeXF2wzZ84cdXZ2ateuXX3+ve7ubnV2doY8Einc3oyT2/U3LDQQm3ruaykqyImkRAAARrSIe1DWr1+vxsZGvf32272e83g8ysjI0Lhx40KO5+XlyePxBNucGE4Czwee60tVVZUqKysjLXXYhNubcWK7gYaF+hO4JbaizM0NsgCAlBJRD8qBAwd0880367nnnpPdHr8hh5UrV8rr9QYfBw4ciNvf7ktRQY5cDrv6iwx99XoMNizUF6fDrtWLZrAOCgAg5UTUg9LQ0KD29nbNmDEjeMzn82nbtm16/PHH9eqrr+ro0aM6dOhQSC9KW1ubnE6nJMnpdKq+vj7kfQOzfAJtTpaZmanMzMxISh1W6Wk2VZS5VV7dKJsU0ivSX69HuMNCyy6fqC/nZbGSLAAgpUXUg3LFFVdo586d2rFjR/BxwQUXaOHChcF/jx49Wps3bw6+Zs+ePdq/f7+Ki4slScXFxdq5c6fa29uDbTZt2qTs7Gy53e4YndbwKy10afWiGXI6QnuS+uv1CHdYaNaXPq9vTvuCiieOJ5wAAFJWRD0oWVlZKiwsDDk2duxYjR8/Pnj8uuuu06233qqcnBxlZ2frpptuUnFxsS666CJJ0pVXXim3261rrrlGDzzwgDwej/7lX/5FS5cuNaqXJBylhS7NdjvD2j8nMCzk8Xb1eR+KTT3hhpthAQAYhpVkH3nkEaWlpWn+/Pnq7u7WnDlz9JOf/CT4fHp6ujZu3Kjy8nIVFxdr7NixWrx4sX74wx/GupS4SE+z9ZpK3F+7SIeFAABIVTbLsiLfPCbBOjs75XA45PV6lZ2dnehyImLipoDsogwAiIdIfr/ZiyfOIhkWigcTAxMAAPSgpLDAwnEnfwACUYkpzgCAWIrk93tImwVi5BrqfkIAAAwnAkqKYhdlAIDJCCgpKtr9hAAAiAcCSoqKZj8hAADihYCSoqLZTwgAgHghoMSAz2+pbu/HemnHh6rb+/GIuLE0sHCcpF4hhYXjAACJxjooQzSS1xEJ7Cd0cv3OEVI/ACB5sQ7KECTLOiKsJAsAiAdWko2DwdYRsalnHZHZbqfxP/bh7icEAEC8cA9KlFhHBACA4UMPSpRYRyQ2GF4CAPSFgBIl1hEZupF8gzEAYHgxxBMl1hEZmsANxicPk3m8XSqvblRtU2uCKgMAmICAEqXhXEdkJK6rEgk2KgQADIYhniEYjnVEUmHYI5IbjJldBACpiYAyRKWFLs12O2Nyo2d/66oEhj1Gyroqg+EGYwDAYAgoMRCLdUSSaV2VwXCDMQBgMNyDYohUWleFG4wBAIMhoBgilYY92KgQADAYAoohUm3YI3CDsdMRej5Ohz1p7rUBAESPe1AMERj28Hi7+rwPxaaeH+9kGvaI5Q3GAIDkQkAxRGDYo7y6UTYpJKQk87AHGxUCAPrCEI9BGPYAAKAHPSiGYdgDAAACipEY9gAApDqGeAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGGZXoApKVz2+pvqVD7Ye7lJtlV1FBjtLTbIkuCwCAEYGAMgxqm1pVWdOsVm9X8JjLYVdFmVulha4EVgYAwMjAEE+M1Ta1qry6MSScSJLH26Xy6kbVNrUmqDIAAEYOAkoM+fyWKmuaZfXxXOBYZU2zfP6+WgAAgAACSgzVt3T06jk5kSWp1dul+paO+BUFAMAIRECJofbD/YeTaNoBAJCqCCgxlJtlj2k7AABSFbN4YqioIEcuh10eb1ef96HYJDkdPVOOB8IUZQBAqiOgxFB6mk0VZW6VVzfKJoWElEC8qChzDxg2mKIMAABDPDFXWujS6kUz5HSEDuM4HXatXjRjwJDBFGUAAHrQgzIMSgtdmu12RjRMM9gUZZt6pijPdjsZ7gEAJD0CyjBJT7OpeOL4sNtHMkU5kvcFAGAkYojHEExRBgDgbwgohmCKMgAAf0NAMURginJ/d5fY1DObZ7ApygAAJAMCiiECU5Ql9Qop4U5RBgAgWRBQDDKUKcoAACQTZvEYJpopygAAJBsCioEinaIMAECyYYgHAAAYhx6UGGBzPwAAYouAMkRs7gcAQOwxxDME4W7u5/Nbqtv7sV7a8aHq9n4sn7+vHXcAAEAAPShRCndzP79fuuc39LAAABCJiHpQqqqqdOGFFyorK0u5ubmaN2+e9uzZE9Kmq6tLS5cu1fjx43Xqqadq/vz5amtrC2mzf/9+zZ07V6eccopyc3N1xx136Pjx40M/mzgKd3O/7z8/eA8LAAAIFVFA2bp1q5YuXaq33npLmzZt0rFjx3TllVfqyJEjwTa33HKLampq9MILL2jr1q06ePCgrr766uDzPp9Pc+fO1dGjR/Xmm2/qmWee0bp163T33XfH7qziYCib9gV6XSprmhnuAQCgDzbLsqL+hfzoo4+Um5urrVu36itf+Yq8Xq8+//nP6/nnn9e3vvUtSdL//M//aPLkyaqrq9NFF12kV155Rd/4xjd08OBB5eXlSZLWrFmjH/zgB/roo4+UkZEx6N/t7OyUw+GQ1+tVdnZ2tOUPSd3ej/Xdp94a8vv84oaLWPMEAJASIvn9HtJNsl6vV5KUk9OzgV1DQ4OOHTumkpKSYJtzzjlHZ5xxhurq6iRJdXV1mjJlSjCcSNKcOXPU2dmpXbt29fl3uru71dnZGfJItME29wvXUHpiAABIVlEHFL/fr+XLl2vWrFkqLCyUJHk8HmVkZGjcuHEhbfPy8uTxeIJtTgwngecDz/WlqqpKDocj+JgwYUK0ZcdMOJv7hSM3yz54IwAAUkzUAWXp0qVqamrS+vXrY1lPn1auXCmv1xt8HDhwYNj/ZjgG2tzvJ/9v+oA9LDb1zOYpKsgZ9joBABhpoppmvGzZMm3cuFHbtm3T6aefHjzudDp19OhRHTp0KKQXpa2tTU6nM9imvr4+5P0Cs3wCbU6WmZmpzMzMaEoddgNt7peWZlN5daNsUsh05EBoqShzs+IsAAB9iKgHxbIsLVu2TBs2bNCWLVtUUFAQ8vz555+v0aNHa/PmzcFje/bs0f79+1VcXCxJKi4u1s6dO9Xe3h5ss2nTJmVnZ8vtdg/lXBImsLnfN6d9QcUTxwdDx0A9LKsXzWAdFAAA+hHRLJ7vf//7ev755/XSSy9p0qRJweMOh0NjxoyRJJWXl+vll1/WunXrlJ2drZtuukmS9Oabb0rqmWY8bdo05efn64EHHpDH49E111yj66+/Xvfdd19YdZgwiycS7NUDAEBkv98RBRSbre8f1bVr1+rv//7vJfUs1HbbbbfpF7/4hbq7uzVnzhz95Cc/CRm+2bdvn8rLy/X6669r7NixWrx4se6//36NGhXeiNNICyj9IbgAAFLJsAUUUyRDQGGTQQBAqonbOiiITribDAIAkKoIKBGIxa7Eg20yKLEEPgAA7GYcplgNyYS7yWB9SwdL4AMAUhY9KGGI5ZBMuEvbswQ+ACCVEVAGEeshmXCXtmcJfABAKiOgDCKSIZlwDLbJIEvgAwBAQBlUrIdkwtlkkCXwAQCpjoAyiOEYkmEJfAAABsYsnkEEhmQ83q4+70OxqSdYRDokM9AmgwAApDoCyiACQzLDsStxYJNBAAAQiiGeMDAkAwBAfNGDEiaGZAAAiB8CSgQYkgEAID4Y4gEAAMahByVJ+PwWw08AgKRBQEkCsdrIEAAAUzDEM8LFciNDAABMQUAZwWK9kSEAAKYgoIxgsd7IEAAAUxBQRrBYb2QIAIApCCgj2HBsZAgAgAkIKCNYYCPD/iYT29QzmyfSjQwBAEg0AsoIFtjIUFKvkDLUjQwBAEgkAsoIx0aGAIBkxEJtSYCNDAEAyYaAkiTYyBAAkEwY4gEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGAcAgoAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAcQgoAADAOAQUAABgHAIKAAAwDgEFAAAYZ1SiCzDJ0eN+/bzuf7Wv41OdmXOKrik+Sxmjkj/D+fyW6ls61H64S7lZdhUV5Cg9zZbosmJSl6nnBgCmMuV7M6EB5YknntC//du/yePxaOrUqXrsscdUVFSUkFqqXm7WU79vkd/627EfvbxbN1xaoJVXuRNSUzzUNrWqsqZZrd6u4DGXw66KMrdKC10jui5Tzw0ATGXS92bCugd++ctf6tZbb1VFRYUaGxs1depUzZkzR+3t7XGvperlZj25LTScSJLfkp7c1qKql5vjXlM81Da1qry6MeSDKEkeb5fKqxtV29Q6Yusy9dwAwFSmfW8mLKA8/PDDuuGGG/S9731Pbrdba9as0SmnnKKf/exnca3j6HG/nvp9y4Btnvp9i44e98epovjw+S1V1jTL6uO5wLHKmmb5Tk5twywWdZl6bgBgKhO/NxMSUI4ePaqGhgaVlJT8rZC0NJWUlKiurq5X++7ubnV2doY8YuXndf/bq+fkZH6rp10yqW/p6JWST2RJavV2qb6lI35FKTZ1mXpuAGAqE783ExJQ/vKXv8jn8ykvLy/keF5enjweT6/2VVVVcjgcwceECRNiVsu+jk9j2m6kaD/c/wcxmnaxEou6TD03ADCVid+bI2KKysqVK+X1eoOPAwcOxOy9z8w5JabtRorcLHtM28VKLOoy9dwAwFQmfm8mJKCcdtppSk9PV1tbW8jxtrY2OZ3OXu0zMzOVnZ0d8oiVa4rP0mCzp9JsPe2SSVFBjlwOu/o7dZt67twuKsiJZ1kxqcvUcwMAU5n4vZmQgJKRkaHzzz9fmzdvDh7z+/3avHmziouL41vLqDTdcGnBgG1uuLQg6dZDSU+zqaKsZ/r0yR/IwP8rytxxn/sei7pMPTcAMJWJ35sJ+9W99dZb9dRTT+mZZ57R7t27VV5eriNHjuh73/te3GtZeZVbN36loFdPSppNuvErybsOSmmhS6sXzZDTEdpl53TYtXrRjIStFRKLukw9NwAwlWnfmzbLshI21/Lxxx8PLtQ2bdo0Pfroo5o5c+agr+vs7JTD4ZDX643pcA8ryZq12ioryQJA/A3n92Ykv98JDSjRGq6AAgAAhk8kv9/J3z0AAABGHAIKAAAwDgEFAAAYh4ACAACMQ0ABAADGIaAAAADjEFAAAIBxCCgAAMA4BBQAAGCcUYkuIBqBxW87OzsTXAkAAAhX4Hc7nEXsR2RAOXz4sCRpwoQJCa4EAABE6vDhw3I4HAO2GZF78fj9fh08eFBZWVmy2Ya+gVFnZ6cmTJigAwcOsLfPSbg2feO69I9r0zeuS/+4Nv1LtmtjWZYOHz6s/Px8paUNfJfJiOxBSUtL0+mnnx7z983Ozk6KD8Bw4Nr0jevSP65N37gu/ePa9C+Zrs1gPScB3CQLAACMQ0ABAADGIaBIyszMVEVFhTIzMxNdinG4Nn3juvSPa9M3rkv/uDb9S+VrMyJvkgUAAMmNHhQAAGAcAgoAADAOAQUAABiHgAIAAIyTMgHliSee0FlnnSW73a6ZM2eqvr5+wPYvvPCCzjnnHNntdk2ZMkUvv/xynCqNv0iuzbp162Sz2UIedrs9jtXGx7Zt21RWVqb8/HzZbDa9+OKLg77m9ddf14wZM5SZmakvfelLWrdu3bDXGW+RXpfXX3+91+fFZrPJ4/HEp+A4qqqq0oUXXqisrCzl5uZq3rx52rNnz6CvS/bvmmiuS6p8z6xevVrnnXdecBG24uJivfLKKwO+Jtk/LydKiYDyy1/+UrfeeqsqKirU2NioqVOnas6cOWpvb++z/Ztvvqnvfve7uu666/TOO+9o3rx5mjdvnpqamuJc+fCL9NpIPSsatra2Bh/79u2LY8XxceTIEU2dOlVPPPFEWO1bWlo0d+5cXX755dqxY4eWL1+u66+/Xq+++uowVxpfkV6XgD179oR8ZnJzc4epwsTZunWrli5dqrfeekubNm3SsWPHdOWVV+rIkSP9viYVvmuiuS5SanzPnH766br//vvV0NCgP/7xj/ra176mb37zm9q1a1ef7VPh8xLCSgFFRUXW0qVLg//3+XxWfn6+VVVV1Wf773znO9bcuXNDjs2cOdO68cYbh7XORIj02qxdu9ZyOBxxqs4MkqwNGzYM2ObOO++0zj333JBjf/d3f2fNmTNnGCtLrHCuy+9+9ztLkvXXv/41LjWZpL293ZJkbd26td82qfRdExDOdUnF75mAz33uc9Z//Md/9Plcqn1ekr4H5ejRo2poaFBJSUnwWFpamkpKSlRXV9fna+rq6kLaS9KcOXP6bT9SRXNtJOmTTz7RmWeeqQkTJgyY9lNJqnxmojVt2jS5XC7Nnj1bb7zxRqLLiQuv1ytJysnJ6bdNKn5uwrkuUup9z/h8Pq1fv15HjhxRcXFxn21S7fOS9AHlL3/5i3w+n/Ly8kKO5+Xl9TsO7vF4Imo/UkVzbSZNmqSf/exneumll1RdXS2/36+LL75YH3zwQTxKNlZ/n5nOzk793//9X4KqSjyXy6U1a9bo17/+tX79619rwoQJuuyyy9TY2Jjo0oaV3+/X8uXLNWvWLBUWFvbbLlW+awLCvS6p9D2zc+dOnXrqqcrMzNQ//MM/aMOGDXK73X22TbXPy4jczRiJU1xcHJLuL774Yk2ePFlPPvmk7rnnngRWBhNNmjRJkyZNCv7/4osv1t69e/XII4/o5z//eQIrG15Lly5VU1OT/vCHPyS6FKOEe11S6Xtm0qRJ2rFjh7xer/7zP/9Tixcv1tatW/sNKakk6XtQTjvtNKWnp6utrS3keFtbm5xOZ5+vcTqdEbUfqaK5NicbPXq0pk+frj//+c/DUeKI0d9nJjs7W2PGjElQVWYqKipK6s/LsmXLtHHjRv3ud7/T6aefPmDbVPmukSK7LidL5u+ZjIwMfelLX9L555+vqqoqTZ06Vf/+7//eZ9tU+rxIKRBQMjIydP7552vz5s3BY36/X5s3b+53nK+4uDikvSRt2rSp3/YjVTTX5mQ+n087d+6Uy+UarjJHhFT5zMTCjh07kvLzYlmWli1bpg0bNmjLli0qKCgY9DWp8LmJ5rqcLJW+Z/x+v7q7u/t8LhU+LyESfZduPKxfv97KzMy01q1bZzU3N1tLliyxxo0bZ3k8HsuyLOuaa66xVqxYEWz/xhtvWKNGjbIefPBBa/fu3VZFRYU1evRoa+fOnYk6hWET6bWprKy0Xn31VWvv3r1WQ0ODtWDBAstut1u7du1K1CkMi8OHD1vvvPOO9c4771iSrIcffth65513rH379lmWZVkrVqywrrnmmmD7999/3zrllFOsO+64w9q9e7f1xBNPWOnp6VZtbW2iTmFYRHpdHnnkEevFF1+03nvvPWvnzp3WzTffbKWlpVm//e1vE3UKw6a8vNxyOBzW66+/brW2tgYfn376abBNKn7XRHNdUuV7ZsWKFdbWrVutlpYW609/+pO1YsUKy2azWa+99pplWan5eTlRSgQUy7Ksxx57zDrjjDOsjIwMq6ioyHrrrbeCz331q1+1Fi9eHNL+V7/6lXX22WdbGRkZ1rnnnmv95je/iXPF8RPJtVm+fHmwbV5ennXVVVdZjY2NCah6eAWmx578CFyLxYsXW1/96ld7vWbatGlWRkaG9cUvftFau3Zt3OsebpFel1WrVlkTJ0607Ha7lZOTY1122WXWli1bElP8MOvrukgK+Ryk4ndNNNclVb5nrr32WuvMM8+0MjIyrM9//vPWFVdcEQwnlpWan5cT2SzLsuLXXwMAADC4pL8HBQAAjDwEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA4xBQAACAJGnbtm0qKytTfn6+bDabXnzxxYjfw7IsPfjggzr77LOVmZmpL3zhC/rRj34U8fuwmzEAAJAkHTlyRFOnTtW1116rq6++Oqr3uPnmm/Xaa6/pwQcf1JQpU9TR0aGOjo6I34eVZAEAQC82m00bNmzQvHnzgse6u7v1z//8z/rFL36hQ4cOqbCwUKtWrdJll10mSdq9e7fOO+88NTU1adKkSUP6+wzxAACAsCxbtkx1dXVav369/vSnP+nb3/62SktL9d5770mSampq9MUvflEbN25UQUGBzjrrLF1//fVR9aAQUAAAwKD279+vtWvX6oUXXtCll16qiRMn6vbbb9cll1yitWvXSpLef/997du3Ty+88IKeffZZrVu3Tg0NDfrWt74V8d/jHhQAADConTt3yufz6eyzzw453t3drfHjx0uS/H6/uru79eyzzwbbPf300zr//PO1Z8+eiIZ9CCgAAGBQn3zyidLT09XQ0KD09PSQ50499VRJksvl0qhRo0JCzOTJkyX19MAQUAAAQExNnz5dPp9P7e3tuvTSS/tsM2vWLB0/flx79+7VxIkTJUnvvvuuJOnMM8+M6O8xiwcAAEjq6SX585//LKknkDz88MO6/PLLlZOTozPOOEOLFi3SG2+8oYceekjTp0/XRx99pM2bN+u8887T3Llz5ff7deGFF+rUU0/Vj3/8Y/n9fi1dulTZ2dl67bXXIqqFgAIAACRJr7/+ui6//PJexxcvXqx169bp2LFjuvfee/Xss8/qww8/1GmnnaaLLrpIlZWVmjJliiTp4MGDuummm/Taa69p7Nix+vrXv66HHnpIOTk5EdVCQAEAAMZhmjEAADAOAQUAABiHgAIAAIxDQAEAAMYhoAAAAOMQUAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxvn/RrOP3XuAKTEAAAAASUVORK5CYII="
1595
+ },
1596
+ "metadata": {},
1597
+ "output_type": "display_data"
1598
+ }
1599
+ ],
1600
+ "execution_count": 11
1601
  }
1602
  ],
1603
  "metadata": {
1604
  "kernelspec": {
1605
+ "display_name": "Python 3 (ipykernel)",
1606
  "language": "python",
1607
  "name": "python3"
1608
  },
1609
  "language_info": {
1610
  "codemirror_mode": {
1611
  "name": "ipython",
1612
+ "version": 3
1613
  },
1614
  "file_extension": ".py",
1615
  "mimetype": "text/x-python",
1616
  "name": "python",
1617
  "nbconvert_exporter": "python",
1618
+ "pygments_lexer": "ipython3",
1619
+ "version": "3.9.5"
1620
  }
1621
  },
1622
  "nbformat": 4,
analysis_util.py CHANGED
@@ -1,6 +1,31 @@
 
 
 
1
  import pandas as pd
2
 
3
- from generation_steps.metrics_analysis import correlations_for_group
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  def split_metrics_string(s):
@@ -8,10 +33,10 @@ def split_metrics_string(s):
8
  return tokens[1], tokens[3]
9
 
10
 
11
- def get_ref_only_correlations_df(df):
12
  correlations_raw = correlations_for_group(df)
13
 
14
- idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index))
15
 
16
  data = []
17
  for metrics in idx:
@@ -29,8 +54,8 @@ def get_ref_only_correlations_df(df):
29
  return result
30
 
31
 
32
- def get_ref_only_correlations_for_groups(df):
33
- noref_correlations = {"all": get_ref_only_correlations_df(df)}
34
 
35
  for e2s in (False, True):
36
  for s2e in (False, True):
@@ -43,7 +68,7 @@ def get_ref_only_correlations_for_groups(df):
43
  suffix = "golden"
44
 
45
  subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)]
46
- subdf_noref_corr = get_ref_only_correlations_df(subdf)
47
  noref_correlations[suffix] = subdf_noref_corr
48
 
49
  noref_correlations = pd.concat(noref_correlations, axis=1)
 
1
+ import functools
2
+ import operator
3
+
4
  import pandas as pd
5
 
6
+
7
+ def correlations_for_group(group):
8
+ REL_METRICS = [col.split("_")[0] for col in group.colmns if col.endswith("_related")]
9
+ IND_METRICS = [col.split("_")[0] for col in group.colmns if col.endswith("_independent")]
10
+ AGGR_METRICS = [col.split("_")[0] for col in group.colmns if col.endswith("_aggr")]
11
+
12
+ correlations = []
13
+ for rel_metric in REL_METRICS:
14
+ for ind_metric in IND_METRICS:
15
+ correlations.append({
16
+ f"rel_{rel_metric}_ind_{ind_metric}_pearson": group[f"{rel_metric}_related"].corr(
17
+ group[f"{ind_metric}_independent"], method="pearson"),
18
+ f"rel_{rel_metric}_ind_{ind_metric}_spearman": group[f"{rel_metric}_related"].corr(
19
+ group[f"{ind_metric}_independent"], method="spearman"),
20
+ })
21
+ for aggr_metric in AGGR_METRICS:
22
+ correlations.append({
23
+ f"rel_{rel_metric}_aggr_{aggr_metric}_pearson": group[f"{rel_metric}_related"].corr(
24
+ group[f"{aggr_metric}_aggr"], method="pearson"),
25
+ f"rel_{rel_metric}_aggr_{aggr_metric}_spearman": group[f"{rel_metric}_related"].corr(
26
+ group[f"{aggr_metric}_aggr"], method="spearman"),
27
+ })
28
+ return pd.Series(functools.reduce(operator.ior, correlations, {}))
29
 
30
 
31
  def split_metrics_string(s):
 
33
  return tokens[1], tokens[3]
34
 
35
 
36
+ def get_correlations_df(df, right_side):
37
  correlations_raw = correlations_for_group(df)
38
 
39
+ idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index if right_side in col))
40
 
41
  data = []
42
  for metrics in idx:
 
54
  return result
55
 
56
 
57
+ def get_correlations_for_groups(df, right_side):
58
+ noref_correlations = {"all": get_correlations_df(df, right_side=right_side)}
59
 
60
  for e2s in (False, True):
61
  for s2e in (False, True):
 
68
  suffix = "golden"
69
 
70
  subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)]
71
+ subdf_noref_corr = get_correlations_for_groups(subdf, right_side=right_side)
72
  noref_correlations[suffix] = subdf_noref_corr
73
 
74
  noref_correlations = pd.concat(noref_correlations, axis=1)
change_visualizer.py CHANGED
@@ -108,7 +108,10 @@ if __name__ == '__main__':
108
  layout_for_statistics("synthetic")
109
 
110
  gr.Markdown(f"### Reference-only correlations")
111
- gr.Markdown(value=analysis_util.get_ref_only_correlations_for_groups(df_synthetic).to_markdown())
 
 
 
112
 
113
  application.load(update_dataset_view_manual, inputs=slider_manual,
114
  outputs=view_manual)
 
108
  layout_for_statistics("synthetic")
109
 
110
  gr.Markdown(f"### Reference-only correlations")
111
+ gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown())
112
+
113
+ gr.Markdown(f"### Aggregated correlations")
114
+ gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown())
115
 
116
  application.load(update_dataset_view_manual, inputs=slider_manual,
117
  outputs=view_manual)
generation_steps/metrics_analysis.py CHANGED
@@ -7,6 +7,7 @@ import pandas as pd
7
  from tqdm import tqdm
8
 
9
  import config
 
10
  from api_wrappers import hf_data_loader
11
  from custom_metrics import gpt_eval
12
 
@@ -110,6 +111,10 @@ IND_METRICS = {
110
  "ter": ter_fn,
111
  }
112
 
 
 
 
 
113
  REL_METRICS = {
114
  "editdist": edit_distance_fn,
115
  "edittime": edit_time_fn,
@@ -128,6 +133,22 @@ def compute_metrics(df):
128
  def apply_metric_fn_to_row(row, fn, col_pred, col_ref):
129
  return fn(row[col_pred], row[col_ref], edittime=row['edit_time'], diff=str(row['mods']))
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  for metric in REL_METRICS:
132
  print(f"Computing {metric} for the related pairs")
133
  metric_fn = REL_METRICS[metric]
@@ -158,24 +179,15 @@ def compute_metrics(df):
158
  df[f"rel_{rel_metric}_ind_{ind_metric}_spearman"] = (
159
  df[f"{rel_metric}_related"].corr(df[f"{ind_metric}_independent"], method="spearman"))
160
 
161
- return df
 
 
162
 
 
 
 
 
163
 
164
- def correlations_for_group(group):
165
- correlations = []
166
- for rel_metric in REL_METRICS:
167
- # correlations.append({
168
- # f"{metric}_pearson": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="pearson"),
169
- # f"{metric}_spearman": group[f"{metric}_related"].corr(group[f"{metric}_independent"], method="spearman")
170
- # })
171
- for ind_metric in IND_METRICS:
172
- correlations.append({
173
- f"rel_{rel_metric}_ind_{ind_metric}_pearson": group[f"{rel_metric}_related"].corr(
174
- group[f"{ind_metric}_independent"], method="pearson"),
175
- f"rel_{rel_metric}_ind_{ind_metric}_spearman": group[f"{rel_metric}_related"].corr(
176
- group[f"{ind_metric}_independent"], method="spearman"),
177
- })
178
- return pd.Series(functools.reduce(operator.ior, correlations, {}))
179
 
180
 
181
  def compute_correlations(df: pd.DataFrame):
 
7
  from tqdm import tqdm
8
 
9
  import config
10
+ from analysis_util import correlations_for_group
11
  from api_wrappers import hf_data_loader
12
  from custom_metrics import gpt_eval
13
 
 
111
  "ter": ter_fn,
112
  }
113
 
114
+ AGGR_METRICS = IND_METRICS.copy()
115
+ del AGGR_METRICS["gptscore-ref-1-req"]
116
+ del AGGR_METRICS["gptscore-noref-1-req"]
117
+
118
  REL_METRICS = {
119
  "editdist": edit_distance_fn,
120
  "edittime": edit_time_fn,
 
133
  def apply_metric_fn_to_row(row, fn, col_pred, col_ref):
134
  return fn(row[col_pred], row[col_ref], edittime=row['edit_time'], diff=str(row['mods']))
135
 
136
+ for metric in AGGR_METRICS:
137
+ print(f"Computing {metric} for the aggregated independent pairs")
138
+ values = []
139
+ for i, row in tqdm(df.iterrows(), total=len(df)):
140
+ others = df[(df["hash"] == row["hash"]) & (df["repo"] == row["repo"]) & (
141
+ df["commit_msg_start"] != row["commit_msg_start"])]['commit_msg_end'].to_list()
142
+ others.append(row["reference"])
143
+ others = list(set(others))
144
+ metric_fn = AGGR_METRICS[metric]
145
+ values.append(
146
+ metric_fn(
147
+ row['commit_msg_start'], None, refs=others, edittime=row['edit_time'], diff=str(row['mods'])
148
+ )
149
+ )
150
+ df[f"{metric}_aggr"] = values
151
+
152
  for metric in REL_METRICS:
153
  print(f"Computing {metric} for the related pairs")
154
  metric_fn = REL_METRICS[metric]
 
179
  df[f"rel_{rel_metric}_ind_{ind_metric}_spearman"] = (
180
  df[f"{rel_metric}_related"].corr(df[f"{ind_metric}_independent"], method="spearman"))
181
 
182
+ for aggr_metric in AGGR_METRICS:
183
+ df[f"rel_{rel_metric}_aggr_{aggr_metric}_pearson"] = (
184
+ df[f"{rel_metric}_related"].corr(df[f"{aggr_metric}_aggr"], method="pearson"))
185
 
186
+ df[f"rel_{rel_metric}_ind_{aggr_metric}_spearman"] = (
187
+ df[f"{rel_metric}_related"].corr(df[f"{aggr_metric}_aggr"], method="spearman"))
188
+
189
+ return df
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
 
193
  def compute_correlations(df: pd.DataFrame):
requirements.txt CHANGED
@@ -63,7 +63,6 @@ jsonpointer==2.4
63
  jsonschema==4.21.1
64
  jsonschema-specifications==2023.12.1
65
  kiwisolver==1.4.5
66
- Levenshtein==0.25.1
67
  lxml==5.2.1
68
  markdown-it-py==3.0.0
69
  MarkupSafe==2.1.5
 
63
  jsonschema==4.21.1
64
  jsonschema-specifications==2023.12.1
65
  kiwisolver==1.4.5
 
66
  lxml==5.2.1
67
  markdown-it-py==3.0.0
68
  MarkupSafe==2.1.5