cdleong commited on
Commit
54d3963
1 Parent(s): 741cd0d

minor fix on "Python"

Browse files
Files changed (1) hide show
  1. app.py +47 -26
app.py CHANGED
@@ -5,15 +5,29 @@ import urllib
5
  import requests
6
 
7
  # FEATURE: get wikipedia codes, e.g. from https://en.wikipedia.org/wiki/List_of_Wikipedias or https://meta.wikimedia.org/wiki/List_of_Wikipedias, some of which are nonstandard. Then output f"{code}.wikipedia.org"
 
8
  # TODO: fix 'knh', it has an empty ISO section. Turns out some languages only have 639-3
9
  # TODO: add in some nice things from https://docs.streamlit.io/library/cheatsheet like error codes and status messages.
10
  # TODO: add in vachan search even if lang not found
11
  # TODO: results from glottolog even if none from others
 
12
  things_to_test = [
13
  "knh", # deprecated code on ISO
14
  "khn", # only has 639-3 on ISO
15
  "xxx", # no such code on ISO or glottolog
16
  "Chinese", # Vachan struggles.
 
 
 
 
 
 
 
 
 
 
 
 
17
  ]
18
 
19
 
@@ -76,26 +90,26 @@ def main():
76
  return
77
 
78
  if langtext.lower() == "python":
79
- st.success("[Python is the best language!(https://www.python.org/)")
80
  return
81
 
82
  # TODO: st.code() for these "lookup in progress" outputs.
83
- st.write("* Checking whether the tag is valid. That is, the language, script, territory, and variants (if present) are all tags that have meanings assigned by IANA.")
84
 
85
  if langcodes.tag_is_valid(langtext):
86
- st.write(f"* ...True! '{langtext}' parses meaningfully as a language tag according to IANA.")
87
  else:
88
- st.write(f"* ...False! '{langtext}' doesn't parse meaningfully as a language tag according to IANA, some of its subcomponents may be invalid or it might be a natural language description.")
89
 
90
 
91
  try:
92
  lang = langcodes.Language.get(langtext)
93
  # st.write(f"{lang} is the BCP-47 tag.")
94
  if "unknown" in lang.display_name().lower():
95
- st.write(f"* Attempting to lookup the code directly gives us '{lang.display_name()}', attempting to search for it as a natural language string.")
96
  lang = None
97
  except langcodes.LanguageTagError as e:
98
- st.write(f"* Could not lookup code directly, attempting to search for it as a natural language string.")
99
  lang = None
100
 
101
 
@@ -104,16 +118,19 @@ def main():
104
  try:
105
  found = langcodes.find(langtext)
106
  lang = found
107
- st.write(f"* Natural language search found the following BCP-47 tag: {lang}")
108
  except LookupError as e:
109
- st.write("## Result: failure!")
110
- st.write(f"Unable to look up language code. But all hope is not lost...")
111
  st.write(f"* You can also try https://r12a.github.io/app-subtags/")
112
  st.write(f"* Or possibly https://glottolog.org/glottolog?search={urllib.parse.quote(langtext)}")
113
  lang = None
114
 
115
 
 
116
 
 
 
117
 
118
  #st.write(f"langcodes found the following tag: {type(found)}") # a Language object
119
  if lang is not None:
@@ -121,12 +138,12 @@ def main():
121
  b_variant = lang.to_alpha3(variant='B')
122
  t_variant = lang.to_alpha3(variant='T')
123
  broader_tags = lang.broader_tags()
124
- results_from_vachan = try_searching_vachan_engine(langtext)
125
  standardized_tag = langcodes.standardize_tag(lang)
126
- languoid_id = try_retrieving_glottolog_id(langtext)
127
 
128
 
129
- st.write(f"## Results: probably use '{standardized_tag}'")
130
  # TODO: make a results dictionary so it's easy to copy-paste?
131
  st.write(f"Best-match BCP-47 tag for '{langtext}', according to the langcodes library: {lang}")
132
  st.write(f"Breakdown of tag components:")
@@ -143,15 +160,8 @@ def main():
143
 
144
  st.write(f"### Language Subtag Search Tool")
145
  st.write(f"Try also: https://r12a.github.io/app-subtags/?lookup={lang}, which will likely have links to Ethnologue, Wikipedia, and Character usage. You can also try searching for '{langtext}' there!")
146
-
147
- st.write(f"### Glottolog")
148
- if languoid_id:
149
- st.write(f"**Glottolog Languoid ID:** Searching for '{langtext}' on Glottolog returns the following 'languoid ID': [{languoid_id}](https://glottolog.org/resource/languoid/id/{languoid_id})")
150
- st.write(f"https://glottolog.org/glottolog?search={t_variant} may be also of interest, with links to various resources including WALS, Wikidata, Odin, and OLAC. ")
151
- if t_variant != b_variant:
152
- st.write(f"If that doesn't work, try https://glottolog.org/glottolog?search={b_variant}, or put in a [custom search query](https://glottolog.org/glottolog)")
153
- st.write(f"https://glottolog.org/glottolog?search={urllib.parse.quote(langtext)} may pull up something as well.")
154
-
155
  st.write("### Older / Related Codes")
156
 
157
  st.write(f"ISO 639-3 'alpha3' code, 'terminology' or 'T' variant (deprecated): {t_variant}")
@@ -172,11 +182,22 @@ def main():
172
  elif b_obsolete_codes:
173
  st.write(f"Obsolete codes from previous ISO-639 iterations, pulled from https://iso639-3.sil.org/code/{b_variant}:")
174
  st.write(b_obsolete_codes)
175
-
176
-
177
- if results_from_vachan:
178
- st.write("### Other potential matches, from [Vachan Engine](https://github.com/Bridgeconn/vachan-api/tree/version-2) (experimental)")
179
- st.write(results_from_vachan)
 
 
 
 
 
 
 
 
 
 
 
180
 
181
 
182
  if __name__ == "__main__":
5
  import requests
6
 
7
  # FEATURE: get wikipedia codes, e.g. from https://en.wikipedia.org/wiki/List_of_Wikipedias or https://meta.wikimedia.org/wiki/List_of_Wikipedias, some of which are nonstandard. Then output f"{code}.wikipedia.org"
8
+ # Big TODO: collate all the results into a big dictionary? Then display that. Reduces if statements?
9
  # TODO: fix 'knh', it has an empty ISO section. Turns out some languages only have 639-3
10
  # TODO: add in some nice things from https://docs.streamlit.io/library/cheatsheet like error codes and status messages.
11
  # TODO: add in vachan search even if lang not found
12
  # TODO: results from glottolog even if none from others
13
+ # TODO: check glottolog results to see if they find anything!
14
  things_to_test = [
15
  "knh", # deprecated code on ISO
16
  "khn", # only has 639-3 on ISO
17
  "xxx", # no such code on ISO or glottolog
18
  "Chinese", # Vachan struggles.
19
+ "Mandarin", # Vachan struggles.
20
+ "zh-CN",
21
+ "Chinese",
22
+ "zh-Latn-pinyin",
23
+ "en-Latn-US",
24
+ "en",
25
+ "English",
26
+ "fr-CA",
27
+ "French (Canada)",
28
+ "français",
29
+ "法语",
30
+ "", # empty string
31
  ]
32
 
33
 
90
  return
91
 
92
  if langtext.lower() == "python":
93
+ st.success("[Python is the best language!](https://www.python.org/)")
94
  return
95
 
96
  # TODO: st.code() for these "lookup in progress" outputs.
97
+ st.info("* Checking whether the tag is valid. That is, the language, script, territory, and variants (if present) are all tags that have meanings assigned by IANA.")
98
 
99
  if langcodes.tag_is_valid(langtext):
100
+ st.info(f"* ...True! '{langtext}' parses meaningfully as a language tag according to IANA.")
101
  else:
102
+ st.info(f"* ...False! '{langtext}' doesn't parse meaningfully as a language tag according to IANA, some of its subcomponents may be invalid or it might be a natural language description.")
103
 
104
 
105
  try:
106
  lang = langcodes.Language.get(langtext)
107
  # st.write(f"{lang} is the BCP-47 tag.")
108
  if "unknown" in lang.display_name().lower():
109
+ st.info(f"* Attempting to lookup the code directly gives us '{lang.display_name()}', attempting to search for it as a natural language string.")
110
  lang = None
111
  except langcodes.LanguageTagError as e:
112
+ st.info(f"* Could not lookup code directly, attempting to search for it as a natural language string.")
113
  lang = None
114
 
115
 
118
  try:
119
  found = langcodes.find(langtext)
120
  lang = found
121
+ st.success(f"* Natural language search found the following BCP-47 tag: {lang}")
122
  except LookupError as e:
123
+ st.error("## Result: failure!")
124
+ st.error(f"Unable to look up language code. But all hope is not lost...")
125
  st.write(f"* You can also try https://r12a.github.io/app-subtags/")
126
  st.write(f"* Or possibly https://glottolog.org/glottolog?search={urllib.parse.quote(langtext)}")
127
  lang = None
128
 
129
 
130
+
131
 
132
+ t_variant = None
133
+
134
 
135
  #st.write(f"langcodes found the following tag: {type(found)}") # a Language object
136
  if lang is not None:
138
  b_variant = lang.to_alpha3(variant='B')
139
  t_variant = lang.to_alpha3(variant='T')
140
  broader_tags = lang.broader_tags()
141
+
142
  standardized_tag = langcodes.standardize_tag(lang)
143
+
144
 
145
 
146
+ st.write(f"## BCP-47 Results: probably use '{standardized_tag}'")
147
  # TODO: make a results dictionary so it's easy to copy-paste?
148
  st.write(f"Best-match BCP-47 tag for '{langtext}', according to the langcodes library: {lang}")
149
  st.write(f"Breakdown of tag components:")
160
 
161
  st.write(f"### Language Subtag Search Tool")
162
  st.write(f"Try also: https://r12a.github.io/app-subtags/?lookup={lang}, which will likely have links to Ethnologue, Wikipedia, and Character usage. You can also try searching for '{langtext}' there!")
163
+
164
+
 
 
 
 
 
 
 
165
  st.write("### Older / Related Codes")
166
 
167
  st.write(f"ISO 639-3 'alpha3' code, 'terminology' or 'T' variant (deprecated): {t_variant}")
182
  elif b_obsolete_codes:
183
  st.write(f"Obsolete codes from previous ISO-639 iterations, pulled from https://iso639-3.sil.org/code/{b_variant}:")
184
  st.write(b_obsolete_codes)
185
+
186
+ st.write(f"### Glottolog")
187
+ languoid_id = try_retrieving_glottolog_id(langtext)
188
+ if languoid_id:
189
+ st.write(f"**Glottolog Languoid ID:** Searching for '{langtext}' on Glottolog returns the following 'languoid ID': [{languoid_id}](https://glottolog.org/resource/languoid/id/{languoid_id})")
190
+ # FIXME: fix this to display something if there's an ISO code to try
191
+ if t_variant:
192
+ st.write(f"https://glottolog.org/glottolog?search={t_variant} may be also of interest, with links to various resources including WALS, Wikidata, Odin, and OLAC. ")
193
+ if t_variant != b_variant:
194
+ st.write(f"If that doesn't work, try https://glottolog.org/glottolog?search={b_variant}, or put in a [custom search query](https://glottolog.org/glottolog)")
195
+ st.write(f"https://glottolog.org/glottolog?search={urllib.parse.quote(langtext)} may pull up something as well.")
196
+
197
+ results_from_vachan = try_searching_vachan_engine(langtext)
198
+ if results_from_vachan:
199
+ st.write("### Other potential matches, from [Vachan Engine](https://github.com/Bridgeconn/vachan-api/tree/version-2) (experimental)")
200
+ st.write(results_from_vachan)
201
 
202
 
203
  if __name__ == "__main__":