Spaces:
Runtime error
Runtime error
| import diff_match_patch as dmp_module | |
| from api_wrappers import hf_data_loader | |
| def get_annotated_diff(start_text, end_text): | |
| dmp = dmp_module.diff_match_patch() | |
| dmp_mapping = { | |
| -1: '-', | |
| 0: None, | |
| 1: '+' | |
| } | |
| diff = dmp.diff_main(start_text, end_text) | |
| dmp.diff_cleanupSemantic(diff) | |
| result = [[w, dmp_mapping[t]] for t, w in diff] | |
| return result | |
| def annotated_diff_for_row(row): | |
| start = row['commit_msg_start'] | |
| end = row['commit_msg_end'] | |
| return get_annotated_diff(start, end) | |
| def manual_data_with_annotated_diffs(): | |
| df = hf_data_loader.load_raw_rewriting_as_pandas() | |
| annotated = df.apply(annotated_diff_for_row, axis=1) | |
| df['annotated_diff'] = annotated | |
| return df | |
| def synthetic_data_with_annotated_diffs(): | |
| df = hf_data_loader.load_synthetic_as_pandas() | |
| annotated = df.apply(annotated_diff_for_row, axis=1) | |
| df['annotated_diff'] = annotated | |
| return df | |