import difflib as dl import re try: from src.parameters import color except: from parameters import color def strike(text): ''' Adds strikesthrough the given text Parameters ---------- text : str String to strikethrough Returns ------- content : str Strikethrough text ''' result = '' for c in text: result = result + c + '\u0336' return result def strikethrough_diff(original_license_text, modified_license_text): ''' Compares the two strings and strikes through all words/characters that exist in the original text and not in input text Parameters ---------- original_license_text : str The text to compare it to. This is usually the official license text modified_license_text : str The text that is being compared with. This is usually the modified license text Returns ------- content : str The strings with the uncommon words/characters strikethroughed ''' original_license_text = original_license_text.replace("\n\n", " __para_break__ ") modified_license_text = modified_license_text.replace("\n\n", " __para_break__ ") original_license_tokens = re.split(" ", original_license_text.strip()) modified_license_tokens = re.split(" ", modified_license_text.strip()) processed_license_word_list = [] for diff in dl.ndiff(original_license_tokens, modified_license_tokens): if diff.strip().endswith('__para_break__'): processed_license_word_list.append("\n\n") elif diff == "- ": processed_license_word_list.append((diff[2:] + "")) elif diff.startswith('- '): processed_license_word_list.append(f"""{strike(diff.strip("- "))}""") elif diff == "+ ": processed_license_word_list.append((diff[2:] + "")) elif diff.startswith("+ "): processed_license_word_list.append( f"""{diff.strip("+ ")}""") elif diff.startswith("? "): continue else: processed_license_word_list.append((diff[2:] + "")) return " ".join(processed_license_word_list).replace(" __para_break__ ", "\n\n")