Update preprocesamiento_articulos.py
Browse files
preprocesamiento_articulos.py
CHANGED
@@ -30,6 +30,10 @@ def remove_html_markup(s):
|
|
30 |
out = out + c
|
31 |
|
32 |
return out
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def eliminar_puntuacion(articulo):
|
35 |
deletetion_symbols = ['!','(',')',"'",'-','[',']','{','}',';',':','"','“','’','”',"'",'`','‘','``','\\' ,'/','|',',','|','<','>','.','..','...','?','@',"#",'$','^','&','*','_','~','+','%','=','¿','¡',"''"]
|
|
|
30 |
out = out + c
|
31 |
|
32 |
return out
|
33 |
+
|
34 |
+
def remove_URL(s):
|
35 |
+
"""Remove URLs from a sample string"""
|
36 |
+
return re.sub(r"http\S+", "", s)
|
37 |
|
38 |
def eliminar_puntuacion(articulo):
|
39 |
deletetion_symbols = ['!','(',')',"'",'-','[',']','{','}',';',':','"','“','’','”',"'",'`','‘','``','\\' ,'/','|',',','|','<','>','.','..','...','?','@',"#",'$','^','&','*','_','~','+','%','=','¿','¡',"''"]
|