Spaces:
Runtime error
Runtime error
import diff_match_patch as dmp_module | |
import hf_data_loader | |
def get_annotated_diff(start_text, end_text): | |
dmp = dmp_module.diff_match_patch() | |
dmp_mapping = { | |
-1: '-', | |
0: None, | |
1: '+' | |
} | |
diff = dmp.diff_main(start_text, end_text) | |
dmp.diff_cleanupSemantic(diff) | |
result = [[w, dmp_mapping[t]] for t, w in diff] | |
return result | |
def annotated_diff_for_row_manual_df(row): | |
start = row['commit_msg_start'] | |
end = row['commit_msg_end'] | |
return get_annotated_diff(start, end) | |
def annotated_diff_for_row_synthetic_df(row): | |
start = row['initial_msg_pred'] | |
end = row['reference'] | |
return get_annotated_diff(start, end) | |
def manual_data_with_annotated_diffs(): | |
df = hf_data_loader.load_raw_rewriting_dataset_as_pandas() | |
annotated = df.apply(annotated_diff_for_row_manual_df, axis=1) | |
df['annotated_diff'] = annotated | |
return df | |
def synthetic_data_with_annotated_diffs(): | |
df = hf_data_loader.load_synthetic_dataset_as_pandas() | |
annotated = df.apply(annotated_diff_for_row_synthetic_df, axis=1) | |
df['annotated_diff'] = annotated | |
return df | |