Spaces:
Sleeping
Sleeping
LeireTedCas
commited on
Commit
•
1941e58
1
Parent(s):
a2504f3
Update app.py
Browse files
app.py
CHANGED
@@ -24,40 +24,19 @@ xlrd .xlsx .Element_has_iter =True #line:26
|
|
24 |
|
25 |
def remove_non_printable(value):
|
26 |
if isinstance(value, str):
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# Eliminar \xa0 antes o después de ciertos caracteres
|
41 |
-
#value = re.sub(r'\xa0(?=[.,;:!?¿)\d])|\xa0(?<=[.,;:!?¿)\d])', ' ', value)
|
42 |
-
# Eliminar \n específico después de un punto y antes \
|
43 |
-
#value = re.sub(r'\.\n(?=\\)', '. ', value)
|
44 |
-
# Eliminar \t específico después de un punto y antes \
|
45 |
-
#value = re.sub(r'\.\t(?=\\)', '. ', value)
|
46 |
-
# Eliminar \xa0 específico después de un punto y antes \
|
47 |
-
#value = re.sub(r'\.\xa0(?=\\)', '. ', value)
|
48 |
-
#return value
|
49 |
-
#return re.sub(r'[\n[\n\t\xa0]*\t[\n\t\xa0]*\xa0[\n\t\xa0]*]',' ',value)
|
50 |
-
#return re.sub(r'[^\x20-\x7E\n\t\xa0]|(?<=\w)\n|(?<=\w)\t', '', value)
|
51 |
-
#return re.sub(r'\n', 'WW', value)
|
52 |
-
#return re.sub(r'[\xa0\n\t\s]+|(?<=[.,;:!?)])\\n|\n(?<=\w)\\n|\n(?<=\w)|(?<=[.,;:!?)])\\t|(?<=\w)\\t|\t(?<=\w)\\t|\t(?<=\w)|(?<=[.,;:!?)])\\xa0|(?<=\w)\\xa0|\xa0(?<=\w)\\xa0|\xa0(?<=\w)', ' ', value)
|
53 |
-
#return re.sub(r'(?<=\w)[\n\t\xa0]+|[\n\t\xa0]+(?=\w)|^\s*[\n\t\xa0]+|[\n\t\xa0]+$|(?<=[.,:?)¿!])[\n\t\xa0]+|\b[\n\t\xa0]+\b', ' ', value)
|
54 |
-
#return re.sub(r'\s*[\n\t\xa0]+\s*', ' ', value)
|
55 |
-
#return re.sub(r'[\xa0\n\t\s]|(?<=\w)\\n|(?<=[.,;:!?¿])\\n|\n(?<=\w)\\n|\n(?<=\w)|\n(?<=[.,;:!?¿])|\t(?<=[.,;:!?¿])|(?<=[.,;:!?¿])\\t|(?<=\w)\\t|\t(?<=\w)\\t|\t(?<=\w)|(?<=[.,;:!?¿])\\xa0|(?<=\w)\\xa0|\xa0(?<=\w)\\xa0|\xa0(?<=\w)\xa0(?<=[.,;:!?¿])|', 'ww', value)
|
56 |
-
#return re.sub(r'\s*[\n\t\xa0]+\s*', ' ', value).strip()
|
57 |
-
#value = value.replace("\xa0", "").replace("\n", "").replace("\t", "")
|
58 |
-
#value = re.sub(r'[\xa0]*|(?<=\s)\\xa0(?=\s)|(?<=\s)\\xa0(?=\S)|(?<=\S)\\xa0(?=\s)|(?<=\S)\\xa0(?=\S)', ' ', value)
|
59 |
-
#value = re.sub(r'[\n]*|(?<=\s)\\n(?<=\s)|(?<=\s)\\n(?<=\S)|(?<=\S)\\n(?<=\s)|(?<=\S)\\n(?<=\S)', ' ', value)
|
60 |
-
#value = re.sub(r'[\t]*|(?<=\s)\\t(?<=\s)|(?<=\s)\\t(?<=\S)|(?<=\S)\\t(?<=\s)|(?<=\S)\\t(?<=\S)', ' ', value)
|
61 |
return value
|
62 |
return value
|
63 |
|
|
|
24 |
|
25 |
def remove_non_printable(value):
|
26 |
if isinstance(value, str):
|
27 |
+
value = re.sub(r'\n(?=[^\s\d])|(?<=[^\s\d])\\n', ' ', value)
|
28 |
+
value = re.sub(r'\t(?=[^\s\d])|(?<=[^\s\d])\\t', ' ', value)
|
29 |
+
value = re.sub(r'\xa0(?=[^\s\d])|(?<=[^\s\d])\\xa0', ' ', value)
|
30 |
+
value = re.sub(r'\?\\n¿', '? ¿', value)
|
31 |
+
value = re.sub(r'\?\\t¿', '? ¿', value)
|
32 |
+
value = re.sub(r'\?\\xa0¿', '? ¿', value)
|
33 |
+
value = re.sub(r'(\d)\\t', r'\1 ', value)
|
34 |
+
value = re.sub(r'(\d)\\n', r'\1 ', value)
|
35 |
+
value = re.sub(r'(\d)\\xa0', r'\1 ', value)
|
36 |
+
value = re.sub(r'(\s)+\\t', ' ', value)
|
37 |
+
value = re.sub(r'(\s)+\\n', ' ', value)
|
38 |
+
value = re.sub(r'(\s)+\\xa0', ' ', value)
|
39 |
+
value = re.sub(r'\?\s*\\u200b+', '? ', value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
return value
|
41 |
return value
|
42 |
|