# Derived from an open-source resource provided by Papercup Technologies Limited # Resource-Author: Marlene Staib # Modified by Florian Lux, 2021 def generate_feature_lookup(): return { '~': {'symbol_type': 'silence'}, '#': {'symbol_type': 'end of sentence'}, '?': {'symbol_type': 'questionmark'}, '!': {'symbol_type': 'exclamationmark'}, '.': {'symbol_type': 'fullstop'}, 'ɜ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'open-mid', 'vowel_roundedness': 'unrounded', }, 'ɫ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'lateral-approximant', }, 'ə': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'mid', 'vowel_roundedness': 'unrounded', }, 'ɚ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'mid', 'vowel_roundedness': 'unrounded', }, 'a': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'open', 'vowel_roundedness': 'unrounded', }, 'ð': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'dental', 'consonant_manner': 'fricative' }, 'ɛ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'open-mid', 'vowel_roundedness': 'unrounded', }, 'ɪ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front_central', 'vowel_openness' : 'close_close-mid', 'vowel_roundedness': 'unrounded', }, 'ᵻ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'close', 'vowel_roundedness': 'unrounded', }, 'ŋ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'velar', 'consonant_manner': 'nasal' }, 'ɔ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'back', 'vowel_openness' : 'open-mid', 'vowel_roundedness': 'rounded', }, 'ɒ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'back', 'vowel_openness' : 'open', 'vowel_roundedness': 'rounded', }, 'ɾ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'tap' }, 'ʃ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'postalveolar', 'consonant_manner': 'fricative' }, 'θ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'dental', 'consonant_manner': 'fricative' }, 'ʊ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central_back', 'vowel_openness' : 'close_close-mid', 'vowel_roundedness': 'unrounded' }, 'ʌ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'back', 'vowel_openness' : 'open-mid', 'vowel_roundedness': 'unrounded' }, 'ʒ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'postalveolar', 'consonant_manner': 'fricative' }, 'æ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'open-mid_open', 'vowel_roundedness': 'unrounded' }, 'b': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'bilabial', 'consonant_manner': 'stop' }, 'ʔ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'glottal', 'consonant_manner': 'stop' }, 'd': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'stop' }, 'e': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'close-mid', 'vowel_roundedness': 'unrounded' }, 'f': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'labiodental', 'consonant_manner': 'fricative' }, 'g': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'velar', 'consonant_manner': 'stop' }, 'h': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'glottal', 'consonant_manner': 'fricative' }, 'i': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'close', 'vowel_roundedness': 'unrounded' }, 'j': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'approximant' }, 'k': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'velar', 'consonant_manner': 'stop' }, 'l': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'lateral-approximant' }, 'm': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'bilabial', 'consonant_manner': 'nasal' }, 'n': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'nasal' }, 'ɳ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'nasal' }, 'o': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'back', 'vowel_openness' : 'close-mid', 'vowel_roundedness': 'rounded' }, 'p': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'bilabial', 'consonant_manner': 'stop' }, 'ɡ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'velar', 'consonant_manner': 'stop' }, 'ɹ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'approximant' }, 'r': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'trill' }, 's': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'fricative' }, 't': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'stop' }, 'u': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'back', 'vowel_openness' : 'close', 'vowel_roundedness': 'rounded', }, 'v': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'labiodental', 'consonant_manner': 'fricative' }, 'w': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'labial-velar', 'consonant_manner': 'approximant' }, 'x': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'velar', 'consonant_manner': 'fricative' }, 'z': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolar', 'consonant_manner': 'fricative' }, 'ʀ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'uvular', 'consonant_manner': 'trill' }, 'ø': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'close-mid', 'vowel_roundedness': 'rounded' }, 'ç': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'palatal', 'consonant_manner': 'fricative' }, 'ɐ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'open', 'vowel_roundedness': 'unrounded' }, 'œ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'open-mid', 'vowel_roundedness': 'rounded' }, 'y': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front', 'vowel_openness' : 'close', 'vowel_roundedness': 'rounded' }, 'ʏ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'front_central', 'vowel_openness' : 'close_close-mid', 'vowel_roundedness': 'rounded' }, 'ɑ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'back', 'vowel_openness' : 'open', 'vowel_roundedness': 'unrounded' }, 'c': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'palatal', 'consonant_manner': 'stop' }, 'ɲ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'nasal' }, 'ɣ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'velar', 'consonant_manner': 'fricative' }, 'ʎ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'lateral-approximant' }, 'β': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'bilabial', 'consonant_manner': 'fricative' }, 'ʝ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'fricative' }, 'ɟ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'stop' }, 'q': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'uvular', 'consonant_manner': 'stop' }, 'ɕ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'alveolopalatal', 'consonant_manner': 'fricative' }, 'ʲ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', 'consonant_manner': 'approximant' }, 'ɭ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'palatal', # should be retroflex, but palatal should be close enough 'consonant_manner': 'lateral-approximant' }, 'ɵ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'open-mid', 'vowel_roundedness': 'rounded' }, 'ʑ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'alveolopalatal', 'consonant_manner': 'fricative' }, 'ʋ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'labiodental', 'consonant_manner': 'approximant' }, 'ʁ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'voiced', 'consonant_place' : 'uvular', 'consonant_manner': 'fricative' }, 'ɨ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'vowel', 'VUV' : 'voiced', 'vowel_frontness' : 'central', 'vowel_openness' : 'close', 'vowel_roundedness': 'unrounded' }, 'ʂ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'palatal', # should be retroflex, but palatal should be close enough 'consonant_manner': 'fricative' }, 'ɬ': { 'symbol_type' : 'phoneme', 'vowel_consonant' : 'consonant', 'VUV' : 'unvoiced', 'consonant_place' : 'alveolar', # should be noted it's also lateral, but should be close enough 'consonant_manner': 'fricative' }, } # REMEMBER to also add the phonemes added here to the ID lookup table in the TextFrontend as the new highest ID def generate_feature_table(): ipa_to_phonemefeats = generate_feature_lookup() feat_types = set() for ipa in ipa_to_phonemefeats: if len(ipa) == 1: [feat_types.add(feat) for feat in ipa_to_phonemefeats[ipa].keys()] feat_to_val_set = dict() for feat in feat_types: feat_to_val_set[feat] = set() for ipa in ipa_to_phonemefeats: if len(ipa) == 1: for feat in ipa_to_phonemefeats[ipa]: feat_to_val_set[feat].add(ipa_to_phonemefeats[ipa][feat]) # print(feat_to_val_set) value_list = set() for val_set in [feat_to_val_set[feat] for feat in feat_to_val_set]: for value in val_set: value_list.add(value) # print("{") # for index, value in enumerate(list(value_list)): # print('"{}":{},'.format(value,index)) # print("}") value_to_index = { "dental" : 0, "postalveolar" : 1, "mid" : 2, "close-mid" : 3, "vowel" : 4, "silence" : 5, "consonant" : 6, "close" : 7, "velar" : 8, "stop" : 9, "palatal" : 10, "nasal" : 11, "glottal" : 12, "central" : 13, "back" : 14, "approximant" : 15, "uvular" : 16, "open-mid" : 17, "front_central" : 18, "front" : 19, "end of sentence" : 20, "labiodental" : 21, "close_close-mid" : 22, "labial-velar" : 23, "unvoiced" : 24, "central_back" : 25, "trill" : 26, "rounded" : 27, "open-mid_open" : 28, "tap" : 29, "alveolar" : 30, "bilabial" : 31, "phoneme" : 32, "open" : 33, "fricative" : 34, "unrounded" : 35, "lateral-approximant": 36, "voiced" : 37, "questionmark" : 38, "exclamationmark" : 39, "fullstop" : 40, "alveolopalatal" : 41 } phone_to_vector = dict() for ipa in ipa_to_phonemefeats: if len(ipa) == 1: phone_to_vector[ipa] = [0] * sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]]) for feat in ipa_to_phonemefeats[ipa]: if ipa_to_phonemefeats[ipa][feat] in value_to_index: phone_to_vector[ipa][value_to_index[ipa_to_phonemefeats[ipa][feat]]] = 1 for feat in feat_to_val_set: for value in feat_to_val_set[feat]: if value not in value_to_index: print(f"Unknown feature value in featureset! {value}") # print(f"{sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]])} should be 42") return phone_to_vector def generate_phone_to_id_lookup(): ipa_to_phonemefeats = generate_feature_lookup() count = 0 phone_to_id = dict() for key in sorted(list(ipa_to_phonemefeats)): # careful: non-deterministic phone_to_id[key] = count count += 1 return phone_to_id if __name__ == '__main__': print(generate_phone_to_id_lookup())