import re # constants num_of_characters_to_keep = 2000 # regex html_tag_pattern = re.compile(r"<.*?>") multi_line_pattern = re.compile(r"\n+") multi_space_pattern = re.compile(r"( )") multi_br_tag_pattern = re.compile(re.compile(r'
\s*(
\s*)*')) # repl is short for replacement repl_linebreak = "\n" repl_empty_str = "" repl_br_tag = "
" repl_span_tag_multispace = ' '