from string import punctuation import re def text_cleaning(text): # Remove URLs starting with http, https and www, as well as quotes result = re.sub(r'http\S+|www\S+|\"', '', text) # Split the text into a list of words words = result.split() # Remove mentions and hashtags words = [word for word in words if not word.startswith(('@', '#'))] # Remove leading/trailing punctuation, and individual punctuation marks words = [word.strip(punctuation) for word in words if word not in punctuation] filtered_list = [item for item in words if item != ''] # Remove words starting with digits words = [word for word in filtered_list if not word[0].isdigit()] # Convert all words to lowercase words = [w.lower() for w in words] return " ".join(words)