#!/usr/bin/env python # coding: utf-8 import re def clean_text(text): # Eliminar emojis text = re.sub(r'\\x[a-zA-Z0-9]+', '', text) text = re.sub(r'\\u[a-zA-Z0-9]+', '', text) text = re.sub(r'\\U[a-zA-Z0-9]+', '', text) text = re.sub(r'\\n', ' ', text) # Eliminar URLs text = re.sub(r'http\S+|www\S+|https\S+', '', text) # Eliminar caracteres no alfabéticos y números text = re.sub(r'[^a-zA-Z\s]', '', text) # Eliminar espacios en blanco adicionales text = re.sub(r'\s+', ' ', text).strip() return text