Spaces:
Runtime error
Runtime error
import numpy as np | |
import pandas as pd | |
def get_statistics_for_df(df: pd.DataFrame, end_col, annotated_col): | |
relative_deletions = [] | |
relative_insertions = [] | |
relative_changes = [] | |
for _, row in df.iterrows(): | |
sum_deletions = 0 | |
sum_insertions = 0 | |
for text, change_type in row[annotated_col]: | |
if change_type == '-': | |
sum_deletions += len(text) | |
elif change_type == '+': | |
sum_insertions += len(text) | |
sum_changes = sum_deletions + sum_insertions | |
end_length = len(row[end_col]) | |
relative_deletions.append(sum_deletions / end_length) | |
relative_insertions.append(sum_insertions / end_length) | |
relative_changes.append(sum_changes / end_length) | |
return { | |
"deletions": np.asarray(relative_deletions), | |
"insertions": np.asarray(relative_insertions), | |
"changes": np.asarray(relative_changes) | |
} | |
def get_statistics_for_manual_df(df): | |
return get_statistics_for_df(df, end_col='commit_msg_end', annotated_col='annotated_diff') | |
def get_statistics_for_synthetic_df(df): | |
return get_statistics_for_df(df, end_col='reference', annotated_col='annotated_diff') | |