Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -48,19 +48,29 @@ def parens_to_angles(s):
|
|
48 |
def split_num(num):
|
49 |
num = num.group()
|
50 |
if '.' in num:
|
|
|
51 |
a, b = num.split('.')
|
52 |
return ' point '.join([a, ' '.join(b)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
year = int(num[:4])
|
54 |
if year < 1100 or year % 1000 < 10:
|
55 |
return num
|
56 |
-
left, right = num[:2], num[2:4]
|
57 |
s = 's' if num.endswith('s') else ''
|
58 |
if 100 <= year % 1000 <= 999:
|
59 |
-
if right ==
|
60 |
return f'{left} hundred{s}'
|
61 |
-
elif
|
62 |
return f'{left} oh {right}{s}'
|
63 |
-
return f'{left} {right}{s}'
|
64 |
|
65 |
def normalize(text):
|
66 |
# TODO: Custom text normalization rules?
|
@@ -75,11 +85,9 @@ def normalize(text):
|
|
75 |
text = re.sub(r'[^\S \n]', ' ', text)
|
76 |
text = re.sub(r' +', ' ', text)
|
77 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
78 |
-
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b', split_num, text)
|
79 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
80 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
81 |
-
text = re.sub(r'(?<!:)\b(?:[1-9]|1[0-2]):00\b(?!:)', lambda m: m.group()[:-3] + " o'clock", text)
|
82 |
-
text = re.sub(r'(?<=\d):(?=\d)', ' ', text)
|
83 |
text = re.sub(r'(?<=\d)S', ' S', text)
|
84 |
text = re.sub(r"(?<=[A-Z])'?s", lambda m: m.group().upper(), text)
|
85 |
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
|
|
|
48 |
def split_num(num):
|
49 |
num = num.group()
|
50 |
if '.' in num:
|
51 |
+
# Decimal
|
52 |
a, b = num.split('.')
|
53 |
return ' point '.join([a, ' '.join(b)])
|
54 |
+
elif ':' in num:
|
55 |
+
# Time
|
56 |
+
h, m = [int(n) for n in num.split(':')]
|
57 |
+
if m == 0:
|
58 |
+
return f"{h} o'clock"
|
59 |
+
elif m < 10:
|
60 |
+
return f'{h} oh {m}'
|
61 |
+
return f'{h} {m}'
|
62 |
+
# Year
|
63 |
year = int(num[:4])
|
64 |
if year < 1100 or year % 1000 < 10:
|
65 |
return num
|
66 |
+
left, right = num[:2], int(num[2:4])
|
67 |
s = 's' if num.endswith('s') else ''
|
68 |
if 100 <= year % 1000 <= 999:
|
69 |
+
if right == 0:
|
70 |
return f'{left} hundred{s}'
|
71 |
+
elif right < 10:
|
72 |
return f'{left} oh {right}{s}'
|
73 |
+
return f'{left} {right:02}{s}'
|
74 |
|
75 |
def normalize(text):
|
76 |
# TODO: Custom text normalization rules?
|
|
|
85 |
text = re.sub(r'[^\S \n]', ' ', text)
|
86 |
text = re.sub(r' +', ' ', text)
|
87 |
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
88 |
+
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
89 |
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
90 |
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
|
|
|
|
|
91 |
text = re.sub(r'(?<=\d)S', ' S', text)
|
92 |
text = re.sub(r"(?<=[A-Z])'?s", lambda m: m.group().upper(), text)
|
93 |
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
|