| { | |
| "total_toponyms": 66924548, | |
| "in_training_namespaces": 57593810, | |
| "with_ipa": 31113585, | |
| "with_panphon_embedding": 31113585, | |
| "panphon_coverage_pct": 54.02244616218305, | |
| "from_db_cache": 31113562, | |
| "from_precomputed": 2, | |
| "from_epitran": 21, | |
| "by_script": { | |
| "CYRILLIC": 3614762, | |
| "LATIN": 55617677, | |
| "CJK": 2973525, | |
| "ARABIC": 2098089, | |
| "HEBREW": 151960, | |
| "KATAKANA": 340555, | |
| "MALAYALAM": 68176, | |
| "HIRAGANA": 151980, | |
| "OTHER": 342642, | |
| "GEORGIAN": 105902, | |
| "GREEK": 217997, | |
| "DEVANAGARI": 166957, | |
| "ARMENIAN": 153467, | |
| "THAI": 251458, | |
| "KANNADA": 43155, | |
| "HANGUL": 393996, | |
| "GUJARATI": 21428, | |
| "BENGALI": 106896, | |
| "TAMIL": 52486, | |
| "TELUGU": 51440 | |
| }, | |
| "by_script_lang_ipa": { | |
| "LATIN:en": 8039689, | |
| "LATIN:fr": 2311837, | |
| "LATIN:nl": 2292068, | |
| "LATIN:de": 2063027, | |
| "LATIN:sv": 1715947, | |
| "LATIN:es": 1518527, | |
| "CJK:zh": 1306961, | |
| "LATIN:id": 931192, | |
| "LATIN:tr": 843744, | |
| "LATIN:it": 815130, | |
| "CYRILLIC:ru": 803734, | |
| "LATIN:pt": 711346, | |
| "LATIN:pl": 655880, | |
| "LATIN:cs": 593385, | |
| "ARABIC:fa": 576743, | |
| "LATIN:fi": 532010, | |
| "CYRILLIC:uk": 435644, | |
| "LATIN:no": 428240, | |
| "ARABIC:ar": 412306, | |
| "LATIN:ro": 375292, | |
| "KATAKANA:ja": 310410, | |
| "LATIN:da": 297142, | |
| "LATIN:ms": 285578, | |
| "LATIN:vi": 267432, | |
| "LATIN:hu": 247134, | |
| "CYRILLIC:bg": 235749, | |
| "CYRILLIC:sr": 235582, | |
| "HANGUL:ko": 228523, | |
| "THAI:th": 210310, | |
| "GREEK:el": 168827, | |
| "ARMENIAN:hy": 143819, | |
| "HEBREW:he": 127337, | |
| "LATIN:sw": 113131, | |
| "ARABIC:ur": 109688, | |
| "GEORGIAN:ka": 86021, | |
| "BENGALI:bn": 77935, | |
| "LATIN:la": 77703, | |
| "CYRILLIC:mk": 61607, | |
| "DEVANAGARI:hi": 60800, | |
| "MALAYALAM:ml": 53546, | |
| "CJK:wuu": 48883, | |
| "TAMIL:ta": 47700, | |
| "TELUGU:te": 47617, | |
| "HIRAGANA:ja": 47533, | |
| "CJK:gan": 37097, | |
| "CJK:yue": 31345, | |
| "DEVANAGARI:mr": 24452, | |
| "KANNADA:kn": 20962, | |
| "GUJARATI:gu": 20329, | |
| "LATIN:yue": 13719, | |
| "DEVANAGARI:ne": 10249, | |
| "CJK:ko": 2060, | |
| "LATIN:wuu": 254, | |
| "KATAKANA:zh": 69, | |
| "LATIN:gan": 60, | |
| "CYRILLIC:zh": 53, | |
| "OTHER:zh": 51, | |
| "OTHER:ko": 21, | |
| "HIRAGANA:yue": 15, | |
| "ARABIC:yue": 14, | |
| "HANGUL:zh": 14, | |
| "HIRAGANA:zh": 14, | |
| "ARABIC:zh": 13, | |
| "OTHER:he": 10, | |
| "GREEK:zh": 9, | |
| "THAI:yue": 7, | |
| "THAI:zh": 7, | |
| "CYRILLIC:yue": 7, | |
| "OTHER:yue": 5, | |
| "KATAKANA:ko": 4, | |
| "HEBREW:yue": 4, | |
| "THAI:wuu": 3, | |
| "CYRILLIC:ko": 3, | |
| "BENGALI:yue": 3, | |
| "DEVANAGARI:zh": 3, | |
| "KATAKANA:yue": 3, | |
| "TAMIL:yue": 2, | |
| "ARMENIAN:yue": 2, | |
| "TELUGU:zh": 2, | |
| "HANGUL:yue": 2, | |
| "CYRILLIC:wuu": 2, | |
| "GREEK:ko": 1, | |
| "OTHER:wuu": 1, | |
| "ARABIC:ko": 1, | |
| "GEORGIAN:yue": 1, | |
| "HEBREW:zh": 1, | |
| "OTHER:gan": 1, | |
| "TAMIL:zh": 1 | |
| }, | |
| "training_namespaces": [ | |
| "gn", | |
| "wd", | |
| "tgn" | |
| ], | |
| "num_workers": 62, | |
| "db_engine": "DuckDB", | |
| "ipa_backends": [ | |
| "epitran", | |
| "phonikud", | |
| "charsiu_g2p" | |
| ] | |
| } |