Spaces:
Sleeping
Sleeping
| import Levenshtein | |
| ARPABET_TO_IPA = { | |
| "AA": "ɑ", # father | |
| "AE": "æ", # cat | |
| "AH": "ʌ", # strut | |
| "AO": "ɔ", # thought | |
| "AW": "aʊ", # now | |
| "AY": "aɪ", # my | |
| "B": "b", | |
| "CH": "tʃ", | |
| "D": "d", | |
| "DH": "ð", | |
| "EH": "ɛ", # bed | |
| "ER": "ɝ", # bird (rhotic); could also map to ɜː in non-rhotic | |
| "EY": "eɪ", # face | |
| "F": "f", | |
| "G": "ɡ", | |
| "HH": "h", | |
| "IH": "ɪ", # sit | |
| "IY": "iː", # seat <-- changed | |
| "JH": "dʒ", | |
| "K": "k", | |
| "L": "l", | |
| "M": "m", | |
| "N": "n", | |
| "NG": "ŋ", | |
| "OW": "oʊ", # goat | |
| "OY": "ɔɪ", # boy | |
| "P": "p", | |
| "R": "ɹ", | |
| "S": "s", | |
| "SH": "ʃ", | |
| "T": "t", | |
| "TH": "θ", | |
| "UH": "ʊ", # foot | |
| "UW": "uː", # goose <-- changed | |
| "V": "v", | |
| "W": "w", | |
| "Y": "j", | |
| "Z": "z", | |
| "ZH": "ʒ" | |
| } | |
| def arpabet_to_ipa_seq(arpabet_seq): | |
| # remove stress numbers like UW1 → UW | |
| return [ARPABET_TO_IPA.get(sym.rstrip("012").upper(), sym) for sym in arpabet_seq] | |
| def levenshtein_similarity_score(seq1, seq2): | |
| """ | |
| Calculate the Levenshtein distance between two sequences. | |
| """ | |
| str1 = "".join(seq1) | |
| str2 = "".join(seq2) | |
| distance = Levenshtein.distance(str1, str2) | |
| max_len = max(len(str1), len(str2)) | |
| if max_len == 0: # Handle empty strings to prevent division by zero | |
| normalized_distance = 0.0 | |
| else: | |
| normalized_distance = distance / max_len | |
| similarity_score = 1 - normalized_distance | |
| return int(similarity_score * 100) | |
| if __name__ == "__main__": | |
| # print(arpabet_to_ipa_seq(['ah', 'l', 'ow', 'ay', 'd', 'ow', 'n', 't', 'r', 'ih'])) | |
| corrected_ipa = ["ð", "ɛ", "ɹ"] | |
| user_ipa = ["ʌ", "ð", "ɛ", "ɹ"] | |
| print(levenshtein_similarity_score(corrected_ipa, user_ipa)) |