TheComputerMan commited on
Commit
e1ce12a
1 Parent(s): 1fe6b04

Upload papercup_features.py

Browse files
Files changed (1) hide show
  1. papercup_features.py +637 -0
papercup_features.py ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Derived from an open-source resource provided by Papercup Technologies Limited
2
+ # Resource-Author: Marlene Staib
3
+ # Modified by Florian Lux, 2021
4
+
5
+ def generate_feature_lookup():
6
+ return {
7
+ '~': {'symbol_type': 'silence'},
8
+ '#': {'symbol_type': 'end of sentence'},
9
+ '?': {'symbol_type': 'questionmark'},
10
+ '!': {'symbol_type': 'exclamationmark'},
11
+ '.': {'symbol_type': 'fullstop'},
12
+ 'ɜ': {
13
+ 'symbol_type' : 'phoneme',
14
+ 'vowel_consonant' : 'vowel',
15
+ 'VUV' : 'voiced',
16
+ 'vowel_frontness' : 'central',
17
+ 'vowel_openness' : 'open-mid',
18
+ 'vowel_roundedness': 'unrounded',
19
+ },
20
+ 'ɫ': {
21
+ 'symbol_type' : 'phoneme',
22
+ 'vowel_consonant' : 'consonant',
23
+ 'VUV' : 'voiced',
24
+ 'consonant_place' : 'alveolar',
25
+ 'consonant_manner': 'lateral-approximant',
26
+ },
27
+ 'ə': {
28
+ 'symbol_type' : 'phoneme',
29
+ 'vowel_consonant' : 'vowel',
30
+ 'VUV' : 'voiced',
31
+ 'vowel_frontness' : 'central',
32
+ 'vowel_openness' : 'mid',
33
+ 'vowel_roundedness': 'unrounded',
34
+ },
35
+ 'ɚ': {
36
+ 'symbol_type' : 'phoneme',
37
+ 'vowel_consonant' : 'vowel',
38
+ 'VUV' : 'voiced',
39
+ 'vowel_frontness' : 'central',
40
+ 'vowel_openness' : 'mid',
41
+ 'vowel_roundedness': 'unrounded',
42
+ },
43
+ 'a': {
44
+ 'symbol_type' : 'phoneme',
45
+ 'vowel_consonant' : 'vowel',
46
+ 'VUV' : 'voiced',
47
+ 'vowel_frontness' : 'front',
48
+ 'vowel_openness' : 'open',
49
+ 'vowel_roundedness': 'unrounded',
50
+ },
51
+ 'ð': {
52
+ 'symbol_type' : 'phoneme',
53
+ 'vowel_consonant' : 'consonant',
54
+ 'VUV' : 'voiced',
55
+ 'consonant_place' : 'dental',
56
+ 'consonant_manner': 'fricative'
57
+ },
58
+ 'ɛ': {
59
+ 'symbol_type' : 'phoneme',
60
+ 'vowel_consonant' : 'vowel',
61
+ 'VUV' : 'voiced',
62
+ 'vowel_frontness' : 'front',
63
+ 'vowel_openness' : 'open-mid',
64
+ 'vowel_roundedness': 'unrounded',
65
+ },
66
+ 'ɪ': {
67
+ 'symbol_type' : 'phoneme',
68
+ 'vowel_consonant' : 'vowel',
69
+ 'VUV' : 'voiced',
70
+ 'vowel_frontness' : 'front_central',
71
+ 'vowel_openness' : 'close_close-mid',
72
+ 'vowel_roundedness': 'unrounded',
73
+ },
74
+ 'ᵻ': {
75
+ 'symbol_type' : 'phoneme',
76
+ 'vowel_consonant' : 'vowel',
77
+ 'VUV' : 'voiced',
78
+ 'vowel_frontness' : 'central',
79
+ 'vowel_openness' : 'close',
80
+ 'vowel_roundedness': 'unrounded',
81
+ },
82
+ 'ŋ': {
83
+ 'symbol_type' : 'phoneme',
84
+ 'vowel_consonant' : 'consonant',
85
+ 'VUV' : 'voiced',
86
+ 'consonant_place' : 'velar',
87
+ 'consonant_manner': 'nasal'
88
+ },
89
+ 'ɔ': {
90
+ 'symbol_type' : 'phoneme',
91
+ 'vowel_consonant' : 'vowel',
92
+ 'VUV' : 'voiced',
93
+ 'vowel_frontness' : 'back',
94
+ 'vowel_openness' : 'open-mid',
95
+ 'vowel_roundedness': 'rounded',
96
+ },
97
+ 'ɒ': {
98
+ 'symbol_type' : 'phoneme',
99
+ 'vowel_consonant' : 'vowel',
100
+ 'VUV' : 'voiced',
101
+ 'vowel_frontness' : 'back',
102
+ 'vowel_openness' : 'open',
103
+ 'vowel_roundedness': 'rounded',
104
+ },
105
+ 'ɾ': {
106
+ 'symbol_type' : 'phoneme',
107
+ 'vowel_consonant' : 'consonant',
108
+ 'VUV' : 'voiced',
109
+ 'consonant_place' : 'alveolar',
110
+ 'consonant_manner': 'tap'
111
+ },
112
+ 'ʃ': {
113
+ 'symbol_type' : 'phoneme',
114
+ 'vowel_consonant' : 'consonant',
115
+ 'VUV' : 'unvoiced',
116
+ 'consonant_place' : 'postalveolar',
117
+ 'consonant_manner': 'fricative'
118
+ },
119
+ 'θ': {
120
+ 'symbol_type' : 'phoneme',
121
+ 'vowel_consonant' : 'consonant',
122
+ 'VUV' : 'unvoiced',
123
+ 'consonant_place' : 'dental',
124
+ 'consonant_manner': 'fricative'
125
+ },
126
+ 'ʊ': {
127
+ 'symbol_type' : 'phoneme',
128
+ 'vowel_consonant' : 'vowel',
129
+ 'VUV' : 'voiced',
130
+ 'vowel_frontness' : 'central_back',
131
+ 'vowel_openness' : 'close_close-mid',
132
+ 'vowel_roundedness': 'unrounded'
133
+ },
134
+ 'ʌ': {
135
+ 'symbol_type' : 'phoneme',
136
+ 'vowel_consonant' : 'vowel',
137
+ 'VUV' : 'voiced',
138
+ 'vowel_frontness' : 'back',
139
+ 'vowel_openness' : 'open-mid',
140
+ 'vowel_roundedness': 'unrounded'
141
+ },
142
+ 'ʒ': {
143
+ 'symbol_type' : 'phoneme',
144
+ 'vowel_consonant' : 'consonant',
145
+ 'VUV' : 'voiced',
146
+ 'consonant_place' : 'postalveolar',
147
+ 'consonant_manner': 'fricative'
148
+ },
149
+ 'æ': {
150
+ 'symbol_type' : 'phoneme',
151
+ 'vowel_consonant' : 'vowel',
152
+ 'VUV' : 'voiced',
153
+ 'vowel_frontness' : 'front',
154
+ 'vowel_openness' : 'open-mid_open',
155
+ 'vowel_roundedness': 'unrounded'
156
+ },
157
+ 'b': {
158
+ 'symbol_type' : 'phoneme',
159
+ 'vowel_consonant' : 'consonant',
160
+ 'VUV' : 'voiced',
161
+ 'consonant_place' : 'bilabial',
162
+ 'consonant_manner': 'stop'
163
+ },
164
+ 'ʔ': {
165
+ 'symbol_type' : 'phoneme',
166
+ 'vowel_consonant' : 'consonant',
167
+ 'VUV' : 'unvoiced',
168
+ 'consonant_place' : 'glottal',
169
+ 'consonant_manner': 'stop'
170
+ },
171
+ 'd': {
172
+ 'symbol_type' : 'phoneme',
173
+ 'vowel_consonant' : 'consonant',
174
+ 'VUV' : 'voiced',
175
+ 'consonant_place' : 'alveolar',
176
+ 'consonant_manner': 'stop'
177
+ },
178
+ 'e': {
179
+ 'symbol_type' : 'phoneme',
180
+ 'vowel_consonant' : 'vowel',
181
+ 'VUV' : 'voiced',
182
+ 'vowel_frontness' : 'front',
183
+ 'vowel_openness' : 'close-mid',
184
+ 'vowel_roundedness': 'unrounded'
185
+ },
186
+ 'f': {
187
+ 'symbol_type' : 'phoneme',
188
+ 'vowel_consonant' : 'consonant',
189
+ 'VUV' : 'unvoiced',
190
+ 'consonant_place' : 'labiodental',
191
+ 'consonant_manner': 'fricative'
192
+ },
193
+ 'g': {
194
+ 'symbol_type' : 'phoneme',
195
+ 'vowel_consonant' : 'consonant',
196
+ 'VUV' : 'voiced',
197
+ 'consonant_place' : 'velar',
198
+ 'consonant_manner': 'stop'
199
+ },
200
+ 'h': {
201
+ 'symbol_type' : 'phoneme',
202
+ 'vowel_consonant' : 'consonant',
203
+ 'VUV' : 'unvoiced',
204
+ 'consonant_place' : 'glottal',
205
+ 'consonant_manner': 'fricative'
206
+ },
207
+ 'i': {
208
+ 'symbol_type' : 'phoneme',
209
+ 'vowel_consonant' : 'vowel',
210
+ 'VUV' : 'voiced',
211
+ 'vowel_frontness' : 'front',
212
+ 'vowel_openness' : 'close',
213
+ 'vowel_roundedness': 'unrounded'
214
+ },
215
+ 'j': {
216
+ 'symbol_type' : 'phoneme',
217
+ 'vowel_consonant' : 'consonant',
218
+ 'VUV' : 'voiced',
219
+ 'consonant_place' : 'palatal',
220
+ 'consonant_manner': 'approximant'
221
+ },
222
+ 'k': {
223
+ 'symbol_type' : 'phoneme',
224
+ 'vowel_consonant' : 'consonant',
225
+ 'VUV' : 'unvoiced',
226
+ 'consonant_place' : 'velar',
227
+ 'consonant_manner': 'stop'
228
+ },
229
+ 'l': {
230
+ 'symbol_type' : 'phoneme',
231
+ 'vowel_consonant' : 'consonant',
232
+ 'VUV' : 'voiced',
233
+ 'consonant_place' : 'alveolar',
234
+ 'consonant_manner': 'lateral-approximant'
235
+ },
236
+ 'm': {
237
+ 'symbol_type' : 'phoneme',
238
+ 'vowel_consonant' : 'consonant',
239
+ 'VUV' : 'voiced',
240
+ 'consonant_place' : 'bilabial',
241
+ 'consonant_manner': 'nasal'
242
+ },
243
+ 'n': {
244
+ 'symbol_type' : 'phoneme',
245
+ 'vowel_consonant' : 'consonant',
246
+ 'VUV' : 'voiced',
247
+ 'consonant_place' : 'alveolar',
248
+ 'consonant_manner': 'nasal'
249
+ },
250
+ 'ɳ': {
251
+ 'symbol_type' : 'phoneme',
252
+ 'vowel_consonant' : 'consonant',
253
+ 'VUV' : 'voiced',
254
+ 'consonant_place' : 'palatal',
255
+ 'consonant_manner': 'nasal'
256
+ },
257
+ 'o': {
258
+ 'symbol_type' : 'phoneme',
259
+ 'vowel_consonant' : 'vowel',
260
+ 'VUV' : 'voiced',
261
+ 'vowel_frontness' : 'back',
262
+ 'vowel_openness' : 'close-mid',
263
+ 'vowel_roundedness': 'rounded'
264
+ },
265
+ 'p': {
266
+ 'symbol_type' : 'phoneme',
267
+ 'vowel_consonant' : 'consonant',
268
+ 'VUV' : 'unvoiced',
269
+ 'consonant_place' : 'bilabial',
270
+ 'consonant_manner': 'stop'
271
+ },
272
+ 'ɡ': {
273
+ 'symbol_type' : 'phoneme',
274
+ 'vowel_consonant' : 'consonant',
275
+ 'VUV' : 'voiced',
276
+ 'consonant_place' : 'velar',
277
+ 'consonant_manner': 'stop'
278
+ },
279
+ 'ɹ': {
280
+ 'symbol_type' : 'phoneme',
281
+ 'vowel_consonant' : 'consonant',
282
+ 'VUV' : 'voiced',
283
+ 'consonant_place' : 'alveolar',
284
+ 'consonant_manner': 'approximant'
285
+ },
286
+ 'r': {
287
+ 'symbol_type' : 'phoneme',
288
+ 'vowel_consonant' : 'consonant',
289
+ 'VUV' : 'voiced',
290
+ 'consonant_place' : 'alveolar',
291
+ 'consonant_manner': 'trill'
292
+ },
293
+ 's': {
294
+ 'symbol_type' : 'phoneme',
295
+ 'vowel_consonant' : 'consonant',
296
+ 'VUV' : 'unvoiced',
297
+ 'consonant_place' : 'alveolar',
298
+ 'consonant_manner': 'fricative'
299
+ },
300
+ 't': {
301
+ 'symbol_type' : 'phoneme',
302
+ 'vowel_consonant' : 'consonant',
303
+ 'VUV' : 'unvoiced',
304
+ 'consonant_place' : 'alveolar',
305
+ 'consonant_manner': 'stop'
306
+ },
307
+ 'u': {
308
+ 'symbol_type' : 'phoneme',
309
+ 'vowel_consonant' : 'vowel',
310
+ 'VUV' : 'voiced',
311
+ 'vowel_frontness' : 'back',
312
+ 'vowel_openness' : 'close',
313
+ 'vowel_roundedness': 'rounded',
314
+ },
315
+ 'v': {
316
+ 'symbol_type' : 'phoneme',
317
+ 'vowel_consonant' : 'consonant',
318
+ 'VUV' : 'voiced',
319
+ 'consonant_place' : 'labiodental',
320
+ 'consonant_manner': 'fricative'
321
+ },
322
+ 'w': {
323
+ 'symbol_type' : 'phoneme',
324
+ 'vowel_consonant' : 'consonant',
325
+ 'VUV' : 'voiced',
326
+ 'consonant_place' : 'labial-velar',
327
+ 'consonant_manner': 'approximant'
328
+ },
329
+ 'x': {
330
+ 'symbol_type' : 'phoneme',
331
+ 'vowel_consonant' : 'consonant',
332
+ 'VUV' : 'unvoiced',
333
+ 'consonant_place' : 'velar',
334
+ 'consonant_manner': 'fricative'
335
+ },
336
+ 'z': {
337
+ 'symbol_type' : 'phoneme',
338
+ 'vowel_consonant' : 'consonant',
339
+ 'VUV' : 'voiced',
340
+ 'consonant_place' : 'alveolar',
341
+ 'consonant_manner': 'fricative'
342
+ },
343
+ 'ʀ': {
344
+ 'symbol_type' : 'phoneme',
345
+ 'vowel_consonant' : 'consonant',
346
+ 'VUV' : 'voiced',
347
+ 'consonant_place' : 'uvular',
348
+ 'consonant_manner': 'trill'
349
+ },
350
+ 'ø': {
351
+ 'symbol_type' : 'phoneme',
352
+ 'vowel_consonant' : 'vowel',
353
+ 'VUV' : 'voiced',
354
+ 'vowel_frontness' : 'front',
355
+ 'vowel_openness' : 'close-mid',
356
+ 'vowel_roundedness': 'rounded'
357
+ },
358
+ 'ç': {
359
+ 'symbol_type' : 'phoneme',
360
+ 'vowel_consonant' : 'consonant',
361
+ 'VUV' : 'unvoiced',
362
+ 'consonant_place' : 'palatal',
363
+ 'consonant_manner': 'fricative'
364
+ },
365
+ 'ɐ': {
366
+ 'symbol_type' : 'phoneme',
367
+ 'vowel_consonant' : 'vowel',
368
+ 'VUV' : 'voiced',
369
+ 'vowel_frontness' : 'central',
370
+ 'vowel_openness' : 'open',
371
+ 'vowel_roundedness': 'unrounded'
372
+ },
373
+ 'œ': {
374
+ 'symbol_type' : 'phoneme',
375
+ 'vowel_consonant' : 'vowel',
376
+ 'VUV' : 'voiced',
377
+ 'vowel_frontness' : 'front',
378
+ 'vowel_openness' : 'open-mid',
379
+ 'vowel_roundedness': 'rounded'
380
+ },
381
+ 'y': {
382
+ 'symbol_type' : 'phoneme',
383
+ 'vowel_consonant' : 'vowel',
384
+ 'VUV' : 'voiced',
385
+ 'vowel_frontness' : 'front',
386
+ 'vowel_openness' : 'close',
387
+ 'vowel_roundedness': 'rounded'
388
+ },
389
+ 'ʏ': {
390
+ 'symbol_type' : 'phoneme',
391
+ 'vowel_consonant' : 'vowel',
392
+ 'VUV' : 'voiced',
393
+ 'vowel_frontness' : 'front_central',
394
+ 'vowel_openness' : 'close_close-mid',
395
+ 'vowel_roundedness': 'rounded'
396
+ },
397
+ 'ɑ': {
398
+ 'symbol_type' : 'phoneme',
399
+ 'vowel_consonant' : 'vowel',
400
+ 'VUV' : 'voiced',
401
+ 'vowel_frontness' : 'back',
402
+ 'vowel_openness' : 'open',
403
+ 'vowel_roundedness': 'unrounded'
404
+ },
405
+ 'c': {
406
+ 'symbol_type' : 'phoneme',
407
+ 'vowel_consonant' : 'consonant',
408
+ 'VUV' : 'unvoiced',
409
+ 'consonant_place' : 'palatal',
410
+ 'consonant_manner': 'stop'
411
+ },
412
+ 'ɲ': {
413
+ 'symbol_type' : 'phoneme',
414
+ 'vowel_consonant' : 'consonant',
415
+ 'VUV' : 'voiced',
416
+ 'consonant_place' : 'palatal',
417
+ 'consonant_manner': 'nasal'
418
+ },
419
+ 'ɣ': {
420
+ 'symbol_type' : 'phoneme',
421
+ 'vowel_consonant' : 'consonant',
422
+ 'VUV' : 'voiced',
423
+ 'consonant_place' : 'velar',
424
+ 'consonant_manner': 'fricative'
425
+ },
426
+ 'ʎ': {
427
+ 'symbol_type' : 'phoneme',
428
+ 'vowel_consonant' : 'consonant',
429
+ 'VUV' : 'voiced',
430
+ 'consonant_place' : 'palatal',
431
+ 'consonant_manner': 'lateral-approximant'
432
+ },
433
+ 'β': {
434
+ 'symbol_type' : 'phoneme',
435
+ 'vowel_consonant' : 'consonant',
436
+ 'VUV' : 'voiced',
437
+ 'consonant_place' : 'bilabial',
438
+ 'consonant_manner': 'fricative'
439
+ },
440
+ 'ʝ': {
441
+ 'symbol_type' : 'phoneme',
442
+ 'vowel_consonant' : 'consonant',
443
+ 'VUV' : 'voiced',
444
+ 'consonant_place' : 'palatal',
445
+ 'consonant_manner': 'fricative'
446
+ },
447
+ 'ɟ': {
448
+ 'symbol_type' : 'phoneme',
449
+ 'vowel_consonant' : 'consonant',
450
+ 'VUV' : 'voiced',
451
+ 'consonant_place' : 'palatal',
452
+ 'consonant_manner': 'stop'
453
+ },
454
+ 'q': {
455
+ 'symbol_type' : 'phoneme',
456
+ 'vowel_consonant' : 'consonant',
457
+ 'VUV' : 'unvoiced',
458
+ 'consonant_place' : 'uvular',
459
+ 'consonant_manner': 'stop'
460
+ },
461
+ 'ɕ': {
462
+ 'symbol_type' : 'phoneme',
463
+ 'vowel_consonant' : 'consonant',
464
+ 'VUV' : 'unvoiced',
465
+ 'consonant_place' : 'alveolopalatal',
466
+ 'consonant_manner': 'fricative'
467
+ },
468
+ 'ʲ': {
469
+ 'symbol_type' : 'phoneme',
470
+ 'vowel_consonant' : 'consonant',
471
+ 'VUV' : 'voiced',
472
+ 'consonant_place' : 'palatal',
473
+ 'consonant_manner': 'approximant'
474
+ },
475
+ 'ɭ': {
476
+ 'symbol_type' : 'phoneme',
477
+ 'vowel_consonant' : 'consonant',
478
+ 'VUV' : 'voiced',
479
+ 'consonant_place' : 'palatal', # should be retroflex, but palatal should be close enough
480
+ 'consonant_manner': 'lateral-approximant'
481
+ },
482
+ 'ɵ': {
483
+ 'symbol_type' : 'phoneme',
484
+ 'vowel_consonant' : 'vowel',
485
+ 'VUV' : 'voiced',
486
+ 'vowel_frontness' : 'central',
487
+ 'vowel_openness' : 'open-mid',
488
+ 'vowel_roundedness': 'rounded'
489
+ },
490
+ 'ʑ': {
491
+ 'symbol_type' : 'phoneme',
492
+ 'vowel_consonant' : 'consonant',
493
+ 'VUV' : 'voiced',
494
+ 'consonant_place' : 'alveolopalatal',
495
+ 'consonant_manner': 'fricative'
496
+ },
497
+ 'ʋ': {
498
+ 'symbol_type' : 'phoneme',
499
+ 'vowel_consonant' : 'consonant',
500
+ 'VUV' : 'voiced',
501
+ 'consonant_place' : 'labiodental',
502
+ 'consonant_manner': 'approximant'
503
+ },
504
+ 'ʁ': {
505
+ 'symbol_type' : 'phoneme',
506
+ 'vowel_consonant' : 'consonant',
507
+ 'VUV' : 'voiced',
508
+ 'consonant_place' : 'uvular',
509
+ 'consonant_manner': 'fricative'
510
+ },
511
+ 'ɨ': {
512
+ 'symbol_type' : 'phoneme',
513
+ 'vowel_consonant' : 'vowel',
514
+ 'VUV' : 'voiced',
515
+ 'vowel_frontness' : 'central',
516
+ 'vowel_openness' : 'close',
517
+ 'vowel_roundedness': 'unrounded'
518
+ },
519
+ 'ʂ': {
520
+ 'symbol_type' : 'phoneme',
521
+ 'vowel_consonant' : 'consonant',
522
+ 'VUV' : 'unvoiced',
523
+ 'consonant_place' : 'palatal', # should be retroflex, but palatal should be close enough
524
+ 'consonant_manner': 'fricative'
525
+ },
526
+ 'ɬ': {
527
+ 'symbol_type' : 'phoneme',
528
+ 'vowel_consonant' : 'consonant',
529
+ 'VUV' : 'unvoiced',
530
+ 'consonant_place' : 'alveolar', # should be noted it's also lateral, but should be close enough
531
+ 'consonant_manner': 'fricative'
532
+ },
533
+ } # REMEMBER to also add the phonemes added here to the ID lookup table in the TextFrontend as the new highest ID
534
+
535
+
536
+ def generate_feature_table():
537
+ ipa_to_phonemefeats = generate_feature_lookup()
538
+
539
+ feat_types = set()
540
+ for ipa in ipa_to_phonemefeats:
541
+ if len(ipa) == 1:
542
+ [feat_types.add(feat) for feat in ipa_to_phonemefeats[ipa].keys()]
543
+
544
+ feat_to_val_set = dict()
545
+ for feat in feat_types:
546
+ feat_to_val_set[feat] = set()
547
+ for ipa in ipa_to_phonemefeats:
548
+ if len(ipa) == 1:
549
+ for feat in ipa_to_phonemefeats[ipa]:
550
+ feat_to_val_set[feat].add(ipa_to_phonemefeats[ipa][feat])
551
+
552
+ # print(feat_to_val_set)
553
+
554
+ value_list = set()
555
+ for val_set in [feat_to_val_set[feat] for feat in feat_to_val_set]:
556
+ for value in val_set:
557
+ value_list.add(value)
558
+ # print("{")
559
+ # for index, value in enumerate(list(value_list)):
560
+ # print('"{}":{},'.format(value,index))
561
+ # print("}")
562
+
563
+ value_to_index = {
564
+ "dental" : 0,
565
+ "postalveolar" : 1,
566
+ "mid" : 2,
567
+ "close-mid" : 3,
568
+ "vowel" : 4,
569
+ "silence" : 5,
570
+ "consonant" : 6,
571
+ "close" : 7,
572
+ "velar" : 8,
573
+ "stop" : 9,
574
+ "palatal" : 10,
575
+ "nasal" : 11,
576
+ "glottal" : 12,
577
+ "central" : 13,
578
+ "back" : 14,
579
+ "approximant" : 15,
580
+ "uvular" : 16,
581
+ "open-mid" : 17,
582
+ "front_central" : 18,
583
+ "front" : 19,
584
+ "end of sentence" : 20,
585
+ "labiodental" : 21,
586
+ "close_close-mid" : 22,
587
+ "labial-velar" : 23,
588
+ "unvoiced" : 24,
589
+ "central_back" : 25,
590
+ "trill" : 26,
591
+ "rounded" : 27,
592
+ "open-mid_open" : 28,
593
+ "tap" : 29,
594
+ "alveolar" : 30,
595
+ "bilabial" : 31,
596
+ "phoneme" : 32,
597
+ "open" : 33,
598
+ "fricative" : 34,
599
+ "unrounded" : 35,
600
+ "lateral-approximant": 36,
601
+ "voiced" : 37,
602
+ "questionmark" : 38,
603
+ "exclamationmark" : 39,
604
+ "fullstop" : 40,
605
+ "alveolopalatal" : 41
606
+ }
607
+
608
+ phone_to_vector = dict()
609
+ for ipa in ipa_to_phonemefeats:
610
+ if len(ipa) == 1:
611
+ phone_to_vector[ipa] = [0] * sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]])
612
+ for feat in ipa_to_phonemefeats[ipa]:
613
+ if ipa_to_phonemefeats[ipa][feat] in value_to_index:
614
+ phone_to_vector[ipa][value_to_index[ipa_to_phonemefeats[ipa][feat]]] = 1
615
+
616
+ for feat in feat_to_val_set:
617
+ for value in feat_to_val_set[feat]:
618
+ if value not in value_to_index:
619
+ print(f"Unknown feature value in featureset! {value}")
620
+
621
+ # print(f"{sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]])} should be 42")
622
+
623
+ return phone_to_vector
624
+
625
+
626
+ def generate_phone_to_id_lookup():
627
+ ipa_to_phonemefeats = generate_feature_lookup()
628
+ count = 0
629
+ phone_to_id = dict()
630
+ for key in sorted(list(ipa_to_phonemefeats)): # careful: non-deterministic
631
+ phone_to_id[key] = count
632
+ count += 1
633
+ return phone_to_id
634
+
635
+
636
+ if __name__ == '__main__':
637
+ print(generate_phone_to_id_lookup())