File size: 916 Bytes
f26a894
 
 
 
a8a595d
 
 
 
 
 
 
 
 
 
 
 
f26a894
 
a8a595d
 
 
f26a894
 
 
c151bb0
 
 
a8a595d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
import pandas as pd


def get_statistics(start_msg, end_msg, annotated_msg):
    sum_deletions = 0
    sum_insertions = 0
    for text, change_type in annotated_msg:
        if change_type == '-':
            sum_deletions += len(text)
        elif change_type == '+':
            sum_insertions += len(text)

    sum_changes = sum_deletions + sum_insertions
    end_length = len(end_msg)
    start_length = len(start_msg)

    return {
        "deletions": sum_deletions / start_length,
        "insertions": sum_insertions / end_length,
        "changes": sum_changes / end_length
    }


def get_statistics_for_df(df: pd.DataFrame):
    stats = [get_statistics(row["commit_msg_start"], row["commit_msg_end"], row["annotated_diff"]) for _, row in
             df.iterrows()]

    assert len(stats) > 0

    return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}