cointegrated commited on
Commit
1cfde26
1 Parent(s): d8067a5

The first version of the app

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +1 -1
  3. app.py +43 -0
  4. myv_translit.py +268 -0
  5. test_translit.py +23 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Myv Translit
3
  emoji: 📈
4
  colorFrom: gray
5
  colorTo: green
 
1
  ---
2
+ title: Erzya Translit
3
  emoji: 📈
4
  colorFrom: gray
5
  colorTo: green
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from myv_translit import lat2cyr, cyr2lat
4
+
5
+
6
+ def transliterator(input_text, direction_to_latn=1, joint_acute=True, not_first_e_with_hacek=False, not_soft_l_after_vowels=True):
7
+ first_e_with_hacek = not not_first_e_with_hacek
8
+ soft_l_after_vowels = not not_soft_l_after_vowels
9
+ if direction_to_latn:
10
+ result = cyr2lat(input_text, joint_acute=joint_acute, first_e_with_hacek=first_e_with_hacek, soft_l_after_vowels=soft_l_after_vowels)
11
+ else:
12
+ result = lat2cyr(input_text, joint_acute=joint_acute, first_e_with_hacek=first_e_with_hacek, soft_l_after_vowels=soft_l_after_vowels)
13
+ return result
14
+
15
+
16
+ article = """
17
+ Это автоматический транслитератор между кириллицей и латиницей для эрянского языка.
18
+
19
+ В основе - алгоритм Михаила Потапова:
20
+ - https://github.com/potapoff271083/automatic_translation_latin_to_cyrillic
21
+ - http://valks.erzja.info/2020/04/30/эрзянский-алфавит/
22
+ """
23
+
24
+ directions = ['lat -> кир', 'кир -> lat']
25
+
26
+
27
+ interface = gr.Interface(
28
+ transliterator,
29
+ [
30
+ gr.Textbox(label="Text", lines=2, placeholder='text to transliterate'),
31
+ gr.Radio(choices=directions, type="index", interactive=True, value=directions[0]),
32
+ gr.Checkbox(value=True, label='L + ́ -> Ĺ'),
33
+ gr.Checkbox(value=False, label='ěrzä -> erzä'),
34
+ gr.Checkbox(value=False, label='peĺks -> pelks'),
35
+ ],
36
+ "text",
37
+ title='Эрзянь транслитератор',
38
+ article=article,
39
+ )
40
+
41
+
42
+ if __name__ == '__main__':
43
+ interface.launch()
myv_translit.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ _cyr2lat = [
5
+ {'find_what': 'А', 'replacer': 'A', 're': False},
6
+ {'find_what': 'а', 'replacer': 'a', 're': False},
7
+ {'find_what': 'О', 'replacer': 'O', 're': False},
8
+ {'find_what': 'о', 'replacer': 'o', 're': False},
9
+ {'find_what': 'У', 'replacer': 'U', 're': False},
10
+ {'find_what': 'у', 'replacer': 'u', 're': False},
11
+ {'find_what': 'Ы', 'replacer': 'Y', 're': False},
12
+ {'find_what': 'ы', 'replacer': 'y', 're': False},
13
+ {'find_what': 'И', 'replacer': 'I', 're': False},
14
+ {'find_what': 'и', 'replacer': 'i', 're': False},
15
+ {'find_what': 'Е', 'replacer': 'E', 're': False},
16
+ {'find_what': 'е', 'replacer': 'e', 're': False},
17
+ {'find_what': 'Б', 'replacer': 'B', 're': False},
18
+ {'find_what': 'б', 'replacer': 'b', 're': False},
19
+ {'find_what': 'В', 'replacer': 'V', 're': False},
20
+ {'find_what': 'в', 'replacer': 'v', 're': False},
21
+ {'find_what': 'Г', 'replacer': 'G', 're': False},
22
+ {'find_what': 'г', 'replacer': 'g', 're': False},
23
+ {'find_what': 'Д', 'replacer': 'D', 're': False},
24
+ {'find_what': 'д', 'replacer': 'd', 're': False},
25
+ {'find_what': 'З', 'replacer': 'Z', 're': False},
26
+ {'find_what': 'з', 'replacer': 'z', 're': False},
27
+ {'find_what': 'К', 'replacer': 'K', 're': False},
28
+ {'find_what': 'к', 'replacer': 'k', 're': False},
29
+ {'find_what': 'Л', 'replacer': 'L', 're': False},
30
+ {'find_what': 'л', 'replacer': 'l', 're': False},
31
+ {'find_what': 'М', 'replacer': 'M', 're': False},
32
+ {'find_what': 'м', 'replacer': 'm', 're': False},
33
+ {'find_what': 'Н', 'replacer': 'N', 're': False},
34
+ {'find_what': 'н', 'replacer': 'n', 're': False},
35
+ {'find_what': 'П', 'replacer': 'P', 're': False},
36
+ {'find_what': 'п', 'replacer': 'p', 're': False},
37
+ {'find_what': 'Р', 'replacer': 'R', 're': False},
38
+ {'find_what': 'р', 'replacer': 'r', 're': False},
39
+ {'find_what': 'С', 'replacer': 'S', 're': False},
40
+ {'find_what': 'с', 'replacer': 's', 're': False},
41
+ {'find_what': 'Т', 'replacer': 'T', 're': False},
42
+ {'find_what': 'т', 'replacer': 't', 're': False},
43
+ {'find_what': 'Ф', 'replacer': 'F', 're': False},
44
+ {'find_what': 'ф', 'replacer': 'f', 're': False},
45
+ {'find_what': 'Х', 'replacer': 'H', 're': False},
46
+ {'find_what': 'х', 'replacer': 'h', 're': False},
47
+ {'find_what': 'Ц', 'replacer': 'C', 're': False},
48
+ {'find_what': 'ц', 'replacer': 'c', 're': False},
49
+ {'find_what': 'Ч', 'replacer': 'Č', 're': False},
50
+ {'find_what': 'ч', 'replacer': 'č', 're': False},
51
+ {'find_what': 'Ш', 'replacer': 'Š', 're': False},
52
+ {'find_what': 'ш', 'replacer': 'š', 're': False},
53
+ {'find_what': 'Ж', 'replacer': 'Ž', 're': False},
54
+ {'find_what': 'ж', 'replacer': 'ž', 're': False},
55
+ {'find_what': 'Щ', 'replacer': 'Čš', 're': False},
56
+ {'find_what': 'щ', 'replacer': 'čš', 're': False},
57
+ {'find_what': 'Ь', 'replacer': '́', 're': False},
58
+ {'find_what': 'ь', 'replacer': '́', 're': False},
59
+ {'find_what': 'Й', 'replacer': 'J', 're': False},
60
+ {'find_what': 'й', 'replacer': 'j', 're': False},
61
+ {'find_what': 'Ъ', 'replacer': '', 're': False},
62
+ {'find_what': 'ъ', 'replacer': '', 're': False},
63
+ {'find_what': 'Э', 'replacer': 'Ě', 're': False},
64
+ {'find_what': 'э', 'replacer': 'ě', 're': False},
65
+ {'find_what': 'Я', 'replacer': 'Ä', 're': False},
66
+ {'find_what': 'я', 'replacer': 'ä', 're': False},
67
+ {'find_what': 'Ю', 'replacer': 'Ü', 're': False},
68
+ {'find_what': 'ю', 'replacer': 'ü', 're': False},
69
+ {'find_what': 'Ё', 'replacer': 'Ö', 're': False},
70
+ {'find_what': 'ё', 'replacer': 'ö', 're': False},
71
+ {'find_what': '\\bö\\b', 'replacer': 'jo', 're': True},
72
+ {'find_what': '\\bÖ\\b', 'replacer': 'Jo', 're': True},
73
+ {'find_what': '\\bü\\b', 'replacer': 'ju', 're': True},
74
+ {'find_what': '\\bÜ\\b', 'replacer': 'Ju', 're': True},
75
+ {'find_what': '\\bä\\b', 'replacer': 'ja', 're': True},
76
+ {'find_what': '\\bÄ\\b', 'replacer': 'Ja', 're': True},
77
+ {'find_what': '(\\bö)([a-zöäüšžčě])', 'replacer': 'jo\\2', 're': True},
78
+ {'find_what': '(\\bä)([a-zöäüšžčě])', 'replacer': 'ja\\2', 're': True},
79
+ {'find_what': '(\\bü)([a-zöäüšžčě])', 'replacer': 'ju\\2', 're': True},
80
+ {'find_what': '(\\bÖ)([a-zöäüšžčě])', 'replacer': 'Jo\\2', 're': True},
81
+ {'find_what': '(\\bÄ)([a-zöäüšžčě])', 'replacer': 'Ja\\2', 're': True},
82
+ {'find_what': '(\\bÜ)([a-zöäüšžčě])', 'replacer': 'Ju\\2', 're': True},
83
+ {'find_what': '(\\bö)([A-ZÖÄÜŠŽČĚ])', 'replacer': 'jo\\2', 're': True},
84
+ {'find_what': '(\\bä)([A-ZÖÄÜŠŽČĚ])', 'replacer': 'ja\\2', 're': True},
85
+ {'find_what': '(\\bü)([A-ZÖÄÜŠŽČĚ])', 'replacer': 'ju\\2', 're': True},
86
+ {'find_what': '(\\bÖ)([A-ZÖÄÜŠŽČĚ])', 'replacer': 'JO\\2', 're': True},
87
+ {'find_what': '(\\bÄ)([A-ZÖÄÜŠŽČĚ])', 'replacer': 'JA\\2', 're': True},
88
+ {'find_what': '(\\bÜ)([A-ZÖÄÜŠŽČĚ])', 'replacer': 'JU\\2', 're': True},
89
+ {'find_what': '([aouiěyeöüäAOUIĚYEÖÜÄ])(ä)', 'replacer': '\\1ja', 're': True},
90
+ {'find_what': '([aouiěyeöüäAOUIĚYEÖÜÄ])(Ä)', 'replacer': '\\1JA', 're': True},
91
+ {'find_what': '([aouiěyeöüäAOUIĚYEÖÜÄ])(ö)', 'replacer': '\\1jo', 're': True},
92
+ {'find_what': '([aouiěyeöüäAOUIĚYEÖÜÄ])(Ö)', 'replacer': '\\1JO', 're': True},
93
+ {'find_what': '([aouiěyeöüäAOUIĚYEÖÜÄ])(ü)', 'replacer': '\\1ju', 're': True},
94
+ {'find_what': '([aouiěyeöüäAOUIĚYEÖÜÄ])(Ü)', 'replacer': '\\1JU', 're': True},
95
+ ]
96
+
97
+ _cyr2lat_joint_acutes = [
98
+ {'find_what': 'ś', 'replacer': 'ś', 're': False},
99
+ {'find_what': 'ź', 'replacer': 'ź', 're': False},
100
+ {'find_what': 'ć', 'replacer': 'ć', 're': False},
101
+ {'find_what': 'ń', 'replacer': 'ń', 're': False},
102
+ {'find_what': 'ŕ', 'replacer': 'ŕ', 're': False},
103
+ {'find_what': 't́', 'replacer': 'ť', 're': False},
104
+ {'find_what': 'd́', 'replacer': 'ď', 're': False},
105
+ {'find_what': 'ĺ', 'replacer': 'ĺ', 're': False},
106
+ {'find_what': 'Ś', 'replacer': 'Ś', 're': False},
107
+ {'find_what': 'Ź', 'replacer': 'Ź', 're': False},
108
+ {'find_what': 'Ć', 'replacer': 'Ć', 're': False},
109
+ {'find_what': 'Ń', 'replacer': 'Ń', 're': False},
110
+ {'find_what': 'T́', 'replacer': 'Ť', 're': False},
111
+ {'find_what': 'D́', 'replacer': 'Ď', 're': False},
112
+ {'find_what': 'Ĺ', 'replacer': 'Ĺ', 're': False},
113
+ {'find_what': 'Ŕ', 'replacer': 'Ŕ', 're': False},
114
+ ]
115
+
116
+ _cyr2lat_first_e = [
117
+ {'find_what': '\\bĚ', 'replacer': 'E', 're': True},
118
+ {'find_what': '\\bě', 'replacer': 'e', 're': True},
119
+ ]
120
+
121
+ _cyr2lat_soft_l_after_vowels = [
122
+ # joint acutes | disjoint acutes
123
+ {'find_what': '([yiěeYIĚE])(Ĺ|Ĺ)', 'replacer': '\\1L', 're': True},
124
+ {'find_what': '([yiěeYIĚE])(ĺ|ĺ)', 'replacer': '\\1l', 're': True},
125
+ ]
126
+
127
+ _lat2cyr = [
128
+ {'find_what': 'Ŕ', 'replacer': 'Ŕ', 're': False},
129
+ {'find_what': 'Ĺ', 'replacer': 'Ĺ', 're': False},
130
+ {'find_what': 'Ď', 'replacer': 'D́', 're': False},
131
+ {'find_what': 'Ť', 'replacer': 'T́', 're': False},
132
+ {'find_what': 'Ń', 'replacer': 'Ń', 're': False},
133
+ {'find_what': 'Ć', 'replacer': 'Ć', 're': False},
134
+ {'find_what': 'Ź', 'replacer': 'Ź', 're': False},
135
+ {'find_what': 'Ś', 'replacer': 'Ś', 're': False},
136
+ {'find_what': 'ĺ', 'replacer': 'ĺ', 're': False},
137
+ {'find_what': 'ď', 'replacer': 'd́', 're': False},
138
+ {'find_what': 'ť', 'replacer': 't́', 're': False},
139
+ {'find_what': 'ŕ', 'replacer': 'ŕ', 're': False},
140
+ {'find_what': 'ń', 'replacer': 'ń', 're': False},
141
+ {'find_what': 'ć', 'replacer': 'ć', 're': False},
142
+ {'find_what': 'ź', 'replacer': 'ź', 're': False},
143
+ {'find_what': 'ś', 'replacer': 'ś', 're': False},
144
+ # {'find_what': '\\1JU', 'replacer': '([aouiěyeöüäAOUIĚYEÖÜÄ])(Ü)', 're': True},
145
+ # {'find_what': '\\1ju', 'replacer': '([aouiěyeöüäAOUIĚYEÖÜÄ])(ü)', 're': True},
146
+ # {'find_what': '\\1JO', 'replacer': '([aouiěyeöüäAOUIĚYEÖÜÄ])(Ö)', 're': True},
147
+ # {'find_what': '\\1jo', 'replacer': '([aouiěyeöüäAOUIĚYEÖÜÄ])(ö)', 're': True},
148
+ # {'find_what': '\\1JA', 'replacer': '([aouiěyeöüäAOUIĚYEÖÜÄ])(Ä)', 're': True},
149
+ # {'find_what': '\\1ja', 'replacer': '([aouiěyeöüäAOUIĚYEÖÜÄ])(ä)', 're': True},
150
+ # {'find_what': 'JU\\2', 'replacer': '(\\bÜ)([A-ZÖÄÜŠŽČĚ])', 're': True},
151
+ # {'find_what': 'JA\\2', 'replacer': '(\\bÄ)([A-ZÖÄÜŠŽČĚ])', 're': True},
152
+ # {'find_what': 'JO\\2', 'replacer': '(\\bÖ)([A-ZÖÄÜŠŽČĚ])', 're': True},
153
+ # {'find_what': 'ju\\2', 'replacer': '(\\bü)([A-ZÖÄÜŠŽČĚ])', 're': True},
154
+ # {'find_what': 'ja\\2', 'replacer': '(\\bä)([A-ZÖÄÜŠŽČĚ])', 're': True},
155
+ # {'find_what': 'jo\\2', 'replacer': '(\\bö)([A-ZÖÄÜŠŽČĚ])', 're': True},
156
+ # {'find_what': 'Ju\\2', 'replacer': '(\\bÜ)([a-zöäüšžčě])', 're': True},
157
+ # {'find_what': 'Ja\\2', 'replacer': '(\\bÄ)([a-zöäüšžčě])', 're': True},
158
+ # {'find_what': 'Jo\\2', 'replacer': '(\\bÖ)([a-zöäüšžčě])', 're': True},
159
+ # {'find_what': 'ju\\2', 'replacer': '(\\bü)([a-zöäüšžčě])', 're': True},
160
+ # {'find_what': 'ja\\2', 'replacer': '(\\bä)([a-zöäüšžčě])', 're': True},
161
+ # {'find_what': 'jo\\2', 'replacer': '(\\bö)([a-zöäüšžčě])', 're': True},
162
+ # {'find_what': 'Ja', 'replacer': '\\bÄ\\b', 're': True},
163
+ # {'find_what': 'ja', 'replacer': '\\bä\\b', 're': True},
164
+ # {'find_what': 'Ju', 'replacer': '\\bÜ\\b', 're': True},
165
+ # {'find_what': 'ju', 'replacer': '\\bü\\b', 're': True},
166
+ # {'find_what': 'Jo', 'replacer': '\\bÖ\\b', 're': True},
167
+ # {'find_what': 'jo', 'replacer': '\\bö\\b', 're': True},
168
+ {'find_what': 'ö', 'replacer': 'ё', 're': False},
169
+ {'find_what': 'Ö', 'replacer': 'Ё', 're': False},
170
+ {'find_what': 'ü', 'replacer': 'ю', 're': False},
171
+ {'find_what': 'Ü', 'replacer': 'Ю', 're': False},
172
+ {'find_what': 'ä', 'replacer': 'я', 're': False},
173
+ {'find_what': 'Ä', 'replacer': 'Я', 're': False},
174
+ {'find_what': 'ě', 'replacer': 'э', 're': False},
175
+ {'find_what': 'Ě', 'replacer': 'Э', 're': False},
176
+ # {'find_what': '', 'replacer': 'ъ', 're': False},
177
+ # {'find_what': '', 'replacer': 'Ъ', 're': False},
178
+ {'find_what': 'j', 'replacer': 'й', 're': False},
179
+ {'find_what': 'J', 'replacer': 'Й', 're': False},
180
+ {'find_what': '́', 'replacer': 'ь', 're': False},
181
+ {'find_what': '́', 'replacer': 'Ь', 're': False},
182
+ {'find_what': 'čš', 'replacer': 'щ', 're': False},
183
+ {'find_what': 'Čš', 'replacer': 'Щ', 're': False},
184
+ {'find_what': 'ž', 'replacer': 'ж', 're': False},
185
+ {'find_what': 'Ž', 'replacer': 'Ж', 're': False},
186
+ {'find_what': 'š', 'replacer': 'ш', 're': False},
187
+ {'find_what': 'Š', 'replacer': 'Ш', 're': False},
188
+ {'find_what': 'č', 'replacer': 'ч', 're': False},
189
+ {'find_what': 'Č', 'replacer': 'Ч', 're': False},
190
+ {'find_what': 'c', 'replacer': 'ц', 're': False},
191
+ {'find_what': 'C', 'replacer': 'Ц', 're': False},
192
+ {'find_what': 'h', 'replacer': 'х', 're': False},
193
+ {'find_what': 'H', 'replacer': 'Х', 're': False},
194
+ {'find_what': 'f', 'replacer': 'ф', 're': False},
195
+ {'find_what': 'F', 'replacer': 'Ф', 're': False},
196
+ {'find_what': 't', 'replacer': 'т', 're': False},
197
+ {'find_what': 'T', 'replacer': 'Т', 're': False},
198
+ {'find_what': 's', 'replacer': 'с', 're': False},
199
+ {'find_what': 'S', 'replacer': 'С', 're': False},
200
+ {'find_what': 'r', 'replacer': 'р', 're': False},
201
+ {'find_what': 'R', 'replacer': 'Р', 're': False},
202
+ {'find_what': 'p', 'replacer': 'п', 're': False},
203
+ {'find_what': 'P', 'replacer': 'П', 're': False},
204
+ {'find_what': 'n', 'replacer': 'н', 're': False},
205
+ {'find_what': 'N', 'replacer': 'Н', 're': False},
206
+ {'find_what': 'm', 'replacer': 'м', 're': False},
207
+ {'find_what': 'M', 'replacer': 'М', 're': False},
208
+ {'find_what': 'l', 'replacer': 'л', 're': False},
209
+ {'find_what': 'L', 'replacer': 'Л', 're': False},
210
+ {'find_what': 'k', 'replacer': 'к', 're': False},
211
+ {'find_what': 'K', 'replacer': 'К', 're': False},
212
+ {'find_what': 'z', 'replacer': 'з', 're': False},
213
+ {'find_what': 'Z', 'replacer': 'З', 're': False},
214
+ {'find_what': 'd', 'replacer': 'д', 're': False},
215
+ {'find_what': 'D', 'replacer': 'Д', 're': False},
216
+ {'find_what': 'g', 'replacer': 'г', 're': False},
217
+ {'find_what': 'G', 'replacer': 'Г', 're': False},
218
+ {'find_what': 'v', 'replacer': 'в', 're': False},
219
+ {'find_what': 'V', 'replacer': 'В', 're': False},
220
+ {'find_what': 'b', 'replacer': 'б', 're': False},
221
+ {'find_what': 'B', 'replacer': 'Б', 're': False},
222
+ {'find_what': 'e', 'replacer': 'е', 're': False},
223
+ {'find_what': 'E', 'replacer': 'Е', 're': False},
224
+ {'find_what': 'i', 'replacer': 'и', 're': False},
225
+ {'find_what': 'I', 'replacer': 'И', 're': False},
226
+ {'find_what': 'y', 'replacer': 'ы', 're': False},
227
+ {'find_what': 'Y', 'replacer': 'Ы', 're': False},
228
+ {'find_what': 'u', 'replacer': 'у', 're': False},
229
+ {'find_what': 'U', 'replacer': 'У', 're': False},
230
+ {'find_what': 'o', 'replacer': 'о', 're': False},
231
+ {'find_what': 'O', 'replacer': 'О', 're': False},
232
+ {'find_what': 'a', 'replacer': 'а', 're': False},
233
+ {'find_what': 'A', 'replacer': 'А', 're': False},
234
+ # ya, yo, yu
235
+ {'find_what': 'Й[Аа]', 'replacer': 'Я', 're': True},
236
+ {'find_what': 'й[Аа]', 'replacer': 'я', 're': True},
237
+ {'find_what': 'Й[Ее]', 'replacer': 'Ё', 're': True},
238
+ {'find_what': 'й[Ее]', 'replacer': 'ё', 're': True},
239
+ {'find_what': 'Й[Уу]', 'replacer': 'Ю', 're': True},
240
+ {'find_what': 'й[Уу]', 'replacer': 'ю', 're': True},
241
+ # todo: introduce Ъ when appropriate
242
+ ]
243
+
244
+
245
+ def transliterate_with_rules(text, rules):
246
+ for item in rules:
247
+ if item.get('re'):
248
+ text = re.sub(item['find_what'], item['replacer'], text)
249
+ else:
250
+ text = text.replace(item['find_what'], item['replacer'])
251
+ return text
252
+
253
+
254
+ def cyr2lat(text, joint_acute=True, first_e_with_hacek=True, soft_l_after_vowels=True):
255
+ # todo: support all the optional settings
256
+ text = transliterate_with_rules(text, _cyr2lat)
257
+ if joint_acute:
258
+ text = transliterate_with_rules(text, _cyr2lat_joint_acutes)
259
+ if not first_e_with_hacek:
260
+ text = transliterate_with_rules(text, _cyr2lat_first_e)
261
+ if not soft_l_after_vowels:
262
+ text = transliterate_with_rules(text, _cyr2lat_soft_l_after_vowels)
263
+ return text
264
+
265
+
266
+ def lat2cyr(text, joint_acute=True, first_e_with_hacek=True, soft_l_after_vowels=True):
267
+ # todo: support all the optional settings
268
+ return transliterate_with_rules(text, _lat2cyr)
test_translit.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from myv_translit import cyr2lat
2
+
3
+
4
+ def test_join_acute():
5
+ assert cyr2lat('кель') == 'keĺ'
6
+ assert cyr2lat('кель', joint_acute=False) == 'keĺ'
7
+ assert len(cyr2lat('кель')) == 3
8
+ assert len(cyr2lat('кель', joint_acute=False)) == 4
9
+
10
+
11
+ def test_first_e():
12
+ assert cyr2lat('эрзя') == 'ěrzä'
13
+ assert cyr2lat('эрзя', first_e_with_hacek=False) == 'erzä'
14
+
15
+
16
+ def test_soft_l():
17
+ assert cyr2lat('пелькс') == 'peĺks'
18
+ assert cyr2lat('пелькс', soft_l_after_vowels=False) == 'pelks'
19
+
20
+
21
+ # todo: test on a larger corpus
22
+ # todo: test cyclical consistency
23
+