import re import io import pathlib from text.soros import compile filepath = pathlib.Path(__file__).parent.absolute() with io.open(f"{filepath}/ca.sor", 'r', encoding="utf-8") as prg: num2text = compile(prg.read(), 'ca') _separador_milers_re = re.compile(r'([0-9][0-9\.]+[0-9]{3})') _decimal_re = re.compile(r'([0-9]+\,[0-9]+)') _ordinal_ms_re = re.compile(r'([0-9]+)(r|er|n|on|t|rt|è|e|ne|nè)+(\b)') _ordinal_mp_re = re.compile(r'([0-9]+)(rs|ns|ts|ns)+(\b)') _ordinal_fs_re = re.compile(r'([0-9]+)(a|ra|na|ta)+(\b)') _ordinal_fp_re = re.compile(r'([0-9]+)(es)+(\b)') _cardinal_re = re.compile(r'[0-9]+') _fraccions_re = re.compile(r'(\b)([0-9]+\/[0-9]+)(\b)') _hores_re = re.compile(r'(\b)([0-9]{1,2}):([0-9]{2})(\b)') def _esborra_separador_milers(m): return m.group(1).replace('.', '') def _num2text(m): return num2text.run(m.group(0)) def _ordinal_ms(m): return num2text.run(f"ordinal {m.group(1)}") + m.group(3) def _ordinal_mp(m): return num2text.run(f"ordinal-masculine-plural {m.group(1)}") + m.group(3) def _ordinal_fs(m): return num2text.run(f"ordinal-feminine {m.group(1)}") + m.group(3) def _ordinal_fp(m): return num2text.run(f"ordinal-feminine-plural {m.group(1)}") + m.group(3) def _fraccions(m): return m.group(1) + num2text.run(f"fraction {m.group(2)}") + m.group(3) def _hores(m): return m.group(1) + num2text.run(m.group(2)) + " i " + num2text.run(m.group(3)) + m.group(4) def normalize_numbers_ca(text): text = re.sub(_separador_milers_re, _esborra_separador_milers, text) text = re.sub(_decimal_re, _num2text, text) text = re.sub(_ordinal_ms_re, _ordinal_ms, text) text = re.sub(_ordinal_mp_re, _ordinal_mp, text) text = re.sub(_ordinal_fs_re, _ordinal_fs, text) text = re.sub(_ordinal_fp_re, _ordinal_fp, text) text = re.sub(_fraccions_re, _fraccions, text) text = re.sub(_hores_re, _hores, text) text = re.sub(_cardinal_re, _num2text, text) return text