import cleantext import ftfy import json import hashlib from io import StringIO from datetime import datetime def clean_text(input): text = ftfy.fix_text(input) text = cleantext.clean( text, extra_spaces=True, stemming=False, stopwords=False, lowercase=True, numbers=False, punct=False ) return(text) def df_to_csv(df): csv = StringIO() df.to_csv(csv, index=False) csv.seek(0) csv_data = csv.getvalue() return(csv_data) def serialize_data(data): def converter(o): if isinstance(o, datetime): return o.__str__() return json.dumps(data, default=converter) def hash(input): sha1 = hashlib.sha1() encoded = json.dumps([input], sort_keys=True).encode() sha1.update(encoded) return sha1.hexdigest()