radiobee-aligner / radiobee /detect_alt.py
freemt
Update slow-track for more lang pairs
7fd4e54
raw
history blame
No virus
1.83 kB
"""Detect language via polyglot and fastlid."""
# pylint: disable=
from typing import Any, Callable, List, Optional
from polyglot.text import Detector
import polyglot.detect.base
from polyglot.detect.base import UnknownLanguage
from fastlid import fastlid
from logzero import logger
polyglot.detect.base.logger.setLevel("ERROR")
def with_func_attrs(**attrs: Any) -> Callable:
"""Define func_attrs."""
def with_attrs(fct: Callable) -> Callable:
for key, val in attrs.items():
setattr(fct, key, val)
return fct
return with_attrs
# @with_func_attrs(set_languages=None)
# def detect(text: str) -> str:
def detect(text: str, set_languages: Optional[List[str]] = None) -> str:
"""Detect language via polyglot and fastlid."""
# if not text.strip(): return "en"
try:
_ = [(elm.code[:2], elm.confidence) for elm in Detector(text).languages]
detect.lang_conf = _
lang, conf = _[0]
except UnknownLanguage:
if set_languages is None:
def_lang = "en"
else:
# def_lang = set_languages[-1]
def_lang = set_languages[0]
logger.warning(" UnknownLanguage exception: probably snippet too short, setting to %s", def_lang)
lang, conf = def_lang, 0
except Exception as exc:
logger.error(exc)
lang, conf = "en", 0
del conf
# if set_languages is None,
# trust polyglot.text.Detector
if set_languages is None:
return lang
# set_languages is set
if not isinstance(set_languages, (list, tuple)):
logger.warning("set_languages (%s) ought to be a list/tuple")
if lang in set_languages:
return lang
# lang not in set_languages, use fastlid
fastlid.set_languages = set_languages
lang, _ = fastlid(text)
return lang