hexgrad commited on
Commit
dd3c146
β€’
1 Parent(s): 59ea41a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -48,19 +48,29 @@ def parens_to_angles(s):
48
  def split_num(num):
49
  num = num.group()
50
  if '.' in num:
 
51
  a, b = num.split('.')
52
  return ' point '.join([a, ' '.join(b)])
 
 
 
 
 
 
 
 
 
53
  year = int(num[:4])
54
  if year < 1100 or year % 1000 < 10:
55
  return num
56
- left, right = num[:2], num[2:4],
57
  s = 's' if num.endswith('s') else ''
58
  if 100 <= year % 1000 <= 999:
59
- if right == '00':
60
  return f'{left} hundred{s}'
61
- elif int(right) < 10:
62
  return f'{left} oh {right}{s}'
63
- return f'{left} {right}{s}'
64
 
65
  def normalize(text):
66
  # TODO: Custom text normalization rules?
@@ -75,11 +85,9 @@ def normalize(text):
75
  text = re.sub(r'[^\S \n]', ' ', text)
76
  text = re.sub(r' +', ' ', text)
77
  text = re.sub(r'(?<=\n) +(?=\n)', '', text)
78
- text = re.sub(r'\d*\.\d+|\b\d{4}s?\b', split_num, text)
79
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
80
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
81
- text = re.sub(r'(?<!:)\b(?:[1-9]|1[0-2]):00\b(?!:)', lambda m: m.group()[:-3] + " o'clock", text)
82
- text = re.sub(r'(?<=\d):(?=\d)', ' ', text)
83
  text = re.sub(r'(?<=\d)S', ' S', text)
84
  text = re.sub(r"(?<=[A-Z])'?s", lambda m: m.group().upper(), text)
85
  text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
 
48
  def split_num(num):
49
  num = num.group()
50
  if '.' in num:
51
+ # Decimal
52
  a, b = num.split('.')
53
  return ' point '.join([a, ' '.join(b)])
54
+ elif ':' in num:
55
+ # Time
56
+ h, m = [int(n) for n in num.split(':')]
57
+ if m == 0:
58
+ return f"{h} o'clock"
59
+ elif m < 10:
60
+ return f'{h} oh {m}'
61
+ return f'{h} {m}'
62
+ # Year
63
  year = int(num[:4])
64
  if year < 1100 or year % 1000 < 10:
65
  return num
66
+ left, right = num[:2], int(num[2:4])
67
  s = 's' if num.endswith('s') else ''
68
  if 100 <= year % 1000 <= 999:
69
+ if right == 0:
70
  return f'{left} hundred{s}'
71
+ elif right < 10:
72
  return f'{left} oh {right}{s}'
73
+ return f'{left} {right:02}{s}'
74
 
75
  def normalize(text):
76
  # TODO: Custom text normalization rules?
 
85
  text = re.sub(r'[^\S \n]', ' ', text)
86
  text = re.sub(r' +', ' ', text)
87
  text = re.sub(r'(?<=\n) +(?=\n)', '', text)
88
+ text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
89
  text = re.sub(r'(?<=\d),(?=\d)', '', text)
90
  text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text) # TODO: could be minus
 
 
91
  text = re.sub(r'(?<=\d)S', ' S', text)
92
  text = re.sub(r"(?<=[A-Z])'?s", lambda m: m.group().upper(), text)
93
  text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)