cdleong commited on
Commit
c95a8ea
1 Parent(s): 145304e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -15
app.py CHANGED
@@ -1,32 +1,85 @@
1
  import streamlit as st
2
  import epitran
3
-
 
4
  # TODO: reverse transliterate?
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  if __name__ == "__main__":
7
- iso_lang_code = st.text_input(
8
- label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code",
9
- value="swa"
10
- )
11
- st.write(f"iso code is {iso_lang_code}")
12
-
13
- iso_script_code = st.text_input(
14
- label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.",
15
- value="Latn"
16
- )
17
- st.write(f'iso code is {iso_script_code }')
 
 
 
 
 
 
 
18
 
19
  input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
20
 
21
- combined_code = "-".join([iso_lang_code, iso_script_code])
22
- st.write(f"Combined code: {combined_code}")
23
  st.info("attempting to instantiate epitran transliterator for your language/script")
24
- epi = epitran.Epitran(combined_code)
25
 
26
  st.info(f"transliterating `{input_text}`\n\tusing {epi}...")
27
  transliteration = epi.transliterate(input_text)
28
 
29
  st.success(transliteration)
 
30
 
31
 
32
 
 
1
  import streamlit as st
2
  import epitran
3
+ import langcodes
4
+ from langcodes import LanguageTagError
5
  # TODO: reverse transliterate?
6
 
7
+
8
+ def get_lang_description_from_mapping_name(string_to_check):
9
+ if "generic-Latn" == string_to_check:
10
+ return "Generic Latin Script"
11
+
12
+ if len(string_to_check)<2:
13
+ return None
14
+
15
+ try:
16
+ description = None
17
+ lang = langcodes.get(string_to_check)
18
+ if lang:
19
+ items = []
20
+ for key, value in lang.describe().items():
21
+ items.append(f"{key}: {value}")
22
+
23
+
24
+ description = ", ".join(items))
25
+ return description
26
+
27
+ except LanguageTagError as e:
28
+ if any(["out of place" in str(e), "must be followed by something" in str(e)]):
29
+ # print("*****")
30
+ # print(e)
31
+ # LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string.
32
+ # LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string.
33
+ # LanguageTagError: The subtag 'p' must be followed by something
34
+ substrings = string_to_check.split("-")
35
+ substrings = substrings[:-1] # remove the last one
36
+ string_to_check = "-".join(substrings)
37
+ return get_lang_from_mapping_name(string_to_check)
38
+ else:
39
+ print("*****")
40
+ print(e)
41
+ return None
42
+
43
+
44
+ def get_valid_epitran_mappings_list():
45
+ map_path = Path(epitran.__path__[0]) / "data"/"map"
46
+ map_files = list(map_path.glob("*.*"))
47
+ valid_mappings = [map_file.stem for map_file in map_files]
48
+ return valid_mappings
49
+
50
+
51
  if __name__ == "__main__":
52
+
53
+ valid_epitran_mappings = get_valid_epitran_mappings_list()
54
+ selected_mapping = st.selectbox("Which language/script pair would you like to use?", valid_epitran_mappings)
55
+ description = get_lang_description_from_mapping_name(selected_mapping)
56
+ st.write(f"You selected {selected_mapping}")
57
+
58
+
59
+ # iso_lang_code = st.text_input(
60
+ # label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code",
61
+ # value="swa"
62
+ # )
63
+ # st.write(f"iso code is {iso_lang_code}")
64
+ #
65
+ # iso_script_code = st.text_input(
66
+ # label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.",
67
+ # value="Latn"
68
+ # )
69
+ # st.write(f'iso code is {iso_script_code}')
70
 
71
  input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
72
 
73
+ # combined_code = "-".join([iso_lang_code, iso_script_code])
74
+ # st.write(f"Combined code: {combined_code}")
75
  st.info("attempting to instantiate epitran transliterator for your language/script")
76
+ epi = epitran.Epitran(selected_mapping )
77
 
78
  st.info(f"transliterating `{input_text}`\n\tusing {epi}...")
79
  transliteration = epi.transliterate(input_text)
80
 
81
  st.success(transliteration)
82
+
83
 
84
 
85