| from typing import Literal |
|
|
| Lang = Literal["vi", "en", "mix", "unknown"] |
|
|
| VI_CHARS = set("ăâđêôơưáàảãạấầẩẫậắằẳẵặéèẻẽẹếềểễệíìỉĩịóòỏõọốồổỗộớờởỡợúùủũụứừửữựýỳỷỹỵ") |
| EN_CHARS = set("abcdefghijklmnopqrstuvwxyz") |
|
|
| def detect_language(text: str) -> Lang: |
| """Very small heuristic detector for Vietnamese vs English vs mixed.""" |
| t = text.lower() |
|
|
| has_vi = any(ch in VI_CHARS for ch in t) |
| has_en = any(ch in EN_CHARS for ch in t) |
|
|
| if has_vi and has_en: |
| return "mix" |
| if has_vi: |
| return "vi" |
| if has_en: |
| return "en" |
| return "unknown" |
|
|
| if __name__ == "__main__": |
| tests = [ |
| "Đi tới phòng khách", |
| "Turn on the lights", |
| "Đi tới living room", |
| "12345 !!!", |
| ] |
| for t in tests: |
| print(t, "->", detect_language(t)) |
|
|