File size: 2,369 Bytes
68a8c29
6570b48
68a8c29
5a1315d
3689459
39897d9
a0fdec6
 
abed01c
1392687
39897d9
1392687
39897d9
 
e3d850e
 
ff7c666
67daf03
cde8835
e3ca56d
726336c
cde8835
e3d850e
21247cf
606d796
a397155
0fef655
 
 
 
d6aa39c
0fef655
947dc2d
0fef655
2f590b1
16fc4ca
b65ecd9
878ffe0
 
163a18d
1392687
431f228
878ffe0
 
1392687
 
 
 
2c50de3
 
293724f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
import langcodes

# https://huggingface.co/blog/streamlit-spaces
langtext = st.text_input("language lookup using https://github.com/rspeer/langcodes, see also https://r12a.github.io/app-subtags/", "english")

st.write("Checking whether the tag is valid. That is, the language, script, territory, and variants (if present) are all tags that have meanings assigned by IANA.")

if langcodes.tag_is_valid(langtext):
  st.write(f"...True! '{langtext}' parses meaningfully as a language tag according to IANA.")
else:
  st.write(f"...False! '{langtext}' doesn't parse meaningfully as a language tag according to IANA, some of its subcomponents may be invalid or it might be a natural language description.")
    

try:
  lang = langcodes.Language.get(langtext)
#  st.write(f"{lang} is the BCP-47 tag.")
  if "unknown" in lang.display_name().lower():
    st.write(f"Attempting to lookup the code directly gives us '{lang.display_name()}', attempting to search for it as a natural language string.")
    lang = None
except langcodes.LanguageTagError as e: 
  st.write(f"Could not lookup code directly, attempting to search for it as a natural language string.")
  lang = None
  


if lang is None:
  try:
    found = langcodes.find(langtext)
    lang = found
    st.write(f"natural language search found the following BCP-47 tag: {lang}")
  except LookupError as e:
    st.write(f"Unable to look up language code.")
    lang = None


#st.write(f"langcodes found the following tag: {type(found)}") # a Language object
if lang is not None: 
  display = lang.display_name()

  st.write(f"Best-match BCP-47 tag for '{langtext}', according to the langcodes library: {lang}")  
  st.write(f"Breakdown of tag components: {lang.describe()}")  
  st.write(f"Display name for {lang}: {lang.display_name()}")
  st.write(f"Autonym for {lang}: {lang.autonym()}")
  b_variant = lang.to_alpha3(variant='B')
  t_variant = lang.to_alpha3(variant='T')
  st.write(f"ISO 639-3 'alpha3' code, 'terminology' variant (deprecated): {t_variant}")
  st.write(f"ISO 639-3 'alpha3' code, 'bibliographic' variant (deprecated): {b_variant}")
  broader_tags = lang.broader_tags()
  st.write(f"Broader tags for this language, if any: {broader_tags}")
  st.write(f"Correct, standardized, BCP-47 tag for {langtext}, according to the langcodes library: {langcodes.standardize_tag(lang)}")