LeireTedCas commited on
Commit
1941e58
1 Parent(s): a2504f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -34
app.py CHANGED
@@ -24,40 +24,19 @@ xlrd .xlsx .Element_has_iter =True #line:26
24
 
25
  def remove_non_printable(value):
26
  if isinstance(value, str):
27
- print("Leire")
28
- # Eliminar \n, \t, \xa0 por un espacio en blanco
29
- #value = re.sub(r'[\n\t\xa0]', ' ', value)
30
- # Eliminar \n entre palabras
31
- #value = re.sub(r'(?<=[a-zA-Z])\n(?=[a-zA-Z])', ' ', value)
32
- # Eliminar \t entre letras
33
- #value = re.sub(r'(?<=[a-zA-Z])\t(?=[a-zA-Z])', ' ', value)
34
- # Eliminar \xa0 entre letras
35
- #value = re.sub(r'(?<=[a-zA-Z])\xa0(?=[a-zA-Z])', ' ', value)
36
- # Eliminar \n antes o después de ciertos caracteres
37
- #value = re.sub(r'\n(?=[.,;:!?¿)\d])|\n(?<=[.,;:!?¿)\d])', ' ', value)
38
- # Eliminar \t antes o después de ciertos caracteres
39
- #value = re.sub(r'\t(?=[.,;:!?¿)\d])|\t(?<=[.,;:!?¿)\d])', ' ', value)
40
- # Eliminar \xa0 antes o después de ciertos caracteres
41
- #value = re.sub(r'\xa0(?=[.,;:!?¿)\d])|\xa0(?<=[.,;:!?¿)\d])', ' ', value)
42
- # Eliminar \n específico después de un punto y antes \
43
- #value = re.sub(r'\.\n(?=\\)', '. ', value)
44
- # Eliminar \t específico después de un punto y antes \
45
- #value = re.sub(r'\.\t(?=\\)', '. ', value)
46
- # Eliminar \xa0 específico después de un punto y antes \
47
- #value = re.sub(r'\.\xa0(?=\\)', '. ', value)
48
- #return value
49
- #return re.sub(r'[\n[\n\t\xa0]*\t[\n\t\xa0]*\xa0[\n\t\xa0]*]',' ',value)
50
- #return re.sub(r'[^\x20-\x7E\n\t\xa0]|(?<=\w)\n|(?<=\w)\t', '', value)
51
- #return re.sub(r'\n', 'WW', value)
52
- #return re.sub(r'[\xa0\n\t\s]+|(?<=[.,;:!?)])\\n|\n(?<=\w)\\n|\n(?<=\w)|(?<=[.,;:!?)])\\t|(?<=\w)\\t|\t(?<=\w)\\t|\t(?<=\w)|(?<=[.,;:!?)])\\xa0|(?<=\w)\\xa0|\xa0(?<=\w)\\xa0|\xa0(?<=\w)', ' ', value)
53
- #return re.sub(r'(?<=\w)[\n\t\xa0]+|[\n\t\xa0]+(?=\w)|^\s*[\n\t\xa0]+|[\n\t\xa0]+$|(?<=[.,:?)¿!])[\n\t\xa0]+|\b[\n\t\xa0]+\b', ' ', value)
54
- #return re.sub(r'\s*[\n\t\xa0]+\s*', ' ', value)
55
- #return re.sub(r'[\xa0\n\t\s]|(?<=\w)\\n|(?<=[.,;:!?¿])\\n|\n(?<=\w)\\n|\n(?<=\w)|\n(?<=[.,;:!?¿])|\t(?<=[.,;:!?¿])|(?<=[.,;:!?¿])\\t|(?<=\w)\\t|\t(?<=\w)\\t|\t(?<=\w)|(?<=[.,;:!?¿])\\xa0|(?<=\w)\\xa0|\xa0(?<=\w)\\xa0|\xa0(?<=\w)\xa0(?<=[.,;:!?¿])|', 'ww', value)
56
- #return re.sub(r'\s*[\n\t\xa0]+\s*', ' ', value).strip()
57
- #value = value.replace("\xa0", "").replace("\n", "").replace("\t", "")
58
- #value = re.sub(r'[\xa0]*|(?<=\s)\\xa0(?=\s)|(?<=\s)\\xa0(?=\S)|(?<=\S)\\xa0(?=\s)|(?<=\S)\\xa0(?=\S)', ' ', value)
59
- #value = re.sub(r'[\n]*|(?<=\s)\\n(?<=\s)|(?<=\s)\\n(?<=\S)|(?<=\S)\\n(?<=\s)|(?<=\S)\\n(?<=\S)', ' ', value)
60
- #value = re.sub(r'[\t]*|(?<=\s)\\t(?<=\s)|(?<=\s)\\t(?<=\S)|(?<=\S)\\t(?<=\s)|(?<=\S)\\t(?<=\S)', ' ', value)
61
  return value
62
  return value
63
 
 
24
 
25
  def remove_non_printable(value):
26
  if isinstance(value, str):
27
+ value = re.sub(r'\n(?=[^\s\d])|(?<=[^\s\d])\\n', ' ', value)
28
+ value = re.sub(r'\t(?=[^\s\d])|(?<=[^\s\d])\\t', ' ', value)
29
+ value = re.sub(r'\xa0(?=[^\s\d])|(?<=[^\s\d])\\xa0', ' ', value)
30
+ value = re.sub(r'\?\\n¿', '? ¿', value)
31
+ value = re.sub(r'\?\\t¿', '? ¿', value)
32
+ value = re.sub(r'\?\\xa0¿', '? ¿', value)
33
+ value = re.sub(r'(\d)\\t', r'\1 ', value)
34
+ value = re.sub(r'(\d)\\n', r'\1 ', value)
35
+ value = re.sub(r'(\d)\\xa0', r'\1 ', value)
36
+ value = re.sub(r'(\s)+\\t', ' ', value)
37
+ value = re.sub(r'(\s)+\\n', ' ', value)
38
+ value = re.sub(r'(\s)+\\xa0', ' ', value)
39
+ value = re.sub(r'\?\s*\\u200b+', '? ', value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  return value
41
  return value
42