import streamlit as st
import langcodes

# https://huggingface.co/blog/streamlit-spaces
langtext = st.text_input("language lookup using https://github.com/rspeer/langcodes, see also https://r12a.github.io/app-subtags/", "english")

st.write("Checking whether the tag is valid. That is, the language, script, territory, and variants (if present) are all tags that have meanings assigned by IANA.")

if langcodes.tag_is_valid(langtext):
  st.write(f"...True! '{langtext}' parses meaningfully as a language tag according to IANA.")
else:
  st.write(f"...False! '{langtext}' doesn't parse meaningfully as a language tag according to IANA, some of its subcomponents may be invalid or it might be a natural language description.")
    

try:
  lang = langcodes.Language.get(langtext)
#  st.write(f"{lang} is the BCP-47 tag.")
  if "unknown" in lang.display_name().lower():
    st.write(f"Attempting to lookup the code directly gives us '{lang.display_name()}', attempting to search for it as a natural language string.")
    lang = None
except langcodes.LanguageTagError as e: 
  st.write(f"Could not lookup code directly, attempting to search for it as a natural language string.")
  lang = None
  

if lang is None:
  try:
    found = langcodes.find(langtext)
    lang = found
    st.write(f"natural language search found the following BCP-47 tag: {lang}")
  except LookupError as e:
    st.write(f"Unable to look up language code.")
    lang = None


#st.write(f"langcodes found the following tag: {type(found)}") # a Language object
if lang is not None: 
  display = lang.display_name()

  st.write(f"Best-match BCP-47 tag for '{langtext}', according to the langcodes library: {lang}")  
  st.write(f"Breakdown of tag components: {lang.describe()}")  
  st.write(f"Display name for {lang}: {lang.display_name()}")
  st.write(f"Autonym for {lang}: {lang.autonym()}")
  b_variant = lang.to_alpha3(variant='B')
  t_variant = lang.to_alpha3(variant='T')
  st.write(f"ISO 639-3 'alpha3' code, 'terminology' variant (deprecated): {t_variant}")
  st.write(f"ISO 639-3 'alpha3' code, 'bibliographic' variant (deprecated): {b_variant}")
  broader_tags = lang.broader_tags()
  st.write(f"Broader tags for this language, if any: {broader_tags}")
  st.write(f"Correct, standardized, BCP-47 tag for {langtext}, according to the langcodes library: {langcodes.standardize_tag(lang)}")