|
|
|
|
|
const vowels = {
|
|
|
|
'AA0': 'ɑː',
|
|
'AA1': 'ɑː',
|
|
'AA2': 'ɑː',
|
|
'AE0': 'æ',
|
|
'AE1': 'æ',
|
|
'AE2': 'æ',
|
|
'AH0': 'ə',
|
|
'AH1': 'ʌ',
|
|
'AH2': 'ə',
|
|
'AO0': 'ɔ',
|
|
'AO1': 'ɔ',
|
|
'AO2': 'ɔ',
|
|
'AW0': 'aʊ',
|
|
'AW1': 'aʊ',
|
|
'AW2': 'aʊ',
|
|
'AY0': 'aɪ',
|
|
'AY1': 'aɪ',
|
|
'AY2': 'aɪ',
|
|
'EH0': 'ɛ',
|
|
'EH1': 'ɛ',
|
|
'EH2': 'ɛ',
|
|
'ER0': 'ɝ',
|
|
'ER1': 'ɝː',
|
|
'ER2': 'ɝː',
|
|
'EY0': 'eɪ',
|
|
'EY1': 'eɪ',
|
|
'EY2': 'eɪ',
|
|
'IH0': 'ɪ',
|
|
'IH1': 'ɪ',
|
|
'IH2': 'ɪ',
|
|
'IY0': 'iː',
|
|
'IY1': 'iː',
|
|
'IY2': 'iː',
|
|
'OW0': 'oʊ',
|
|
'OW1': 'oʊ',
|
|
'OW2': 'oʊ',
|
|
'OY0': 'ɔɪ',
|
|
'OY1': 'ɔɪ',
|
|
'OY2': 'ɔɪ',
|
|
'UH0': 'ʊ',
|
|
'UH1': 'ʊ',
|
|
'UH2': 'ʊ',
|
|
'UW0': 'uː',
|
|
'UW1': 'uː',
|
|
'UW2': 'uː'
|
|
};
|
|
|
|
const consonants = {
|
|
'B': 'b',
|
|
'CH': 'tʃ',
|
|
'D': 'd',
|
|
'DH': 'ð',
|
|
'F': 'f',
|
|
'G': 'g',
|
|
'HH': 'h',
|
|
'JH': 'dʒ',
|
|
'K': 'k',
|
|
'L': 'l',
|
|
'M': 'm',
|
|
'N': 'n',
|
|
'NG': 'ŋ',
|
|
'P': 'p',
|
|
'R': 'r',
|
|
'S': 's',
|
|
'SH': 'ʃ',
|
|
'T': 't',
|
|
'TH': 'θ',
|
|
'V': 'v',
|
|
'W': 'w',
|
|
'Y': 'j',
|
|
'Z': 'z',
|
|
'ZH': 'ʒ'
|
|
};
|
|
|
|
|
|
const AccentMode ={
|
|
SIMPLIFIED_VOWEL_ALIGNED:"SIMPLIFIED_VOWEL_ALIGNED",
|
|
STANDARD:"STANDARd",
|
|
NONE:"NONE"
|
|
};
|
|
|
|
const arpa_to_ipa_lookup_tables = {
|
|
...vowels,
|
|
...consonants
|
|
};
|
|
|
|
class Syllable {
|
|
constructor(ontop,nucleus, coder,accent,ontop_arpa) {
|
|
this.ontop = ontop;
|
|
this.ontop_arpa = ontop_arpa
|
|
this.nucleus = nucleus;
|
|
|
|
this.coder = coder
|
|
this.accent = accent;
|
|
}
|
|
|
|
|
|
display() {
|
|
console.log(`Ontop: ${this.ontop} Nucleus: ${this.nucleus}, Coder: ${this.coder}, Accent: ${this.accent}`);
|
|
}
|
|
}
|
|
|
|
|
|
const consonantClusters = [
|
|
"PL", "PR", "TR", "BR", "KR", "GR", "DR", "GL", "FL", "BL", "KL",
|
|
|
|
"TN", "DN", "PN", "GN", "BM", "DM", "PM", "GM", "TM",
|
|
|
|
"SL", "SW", "SHL", "SHR", "VL", "VR", "ZL", "ZR", "THL", "THR",
|
|
"FTH", "VTH", "ZTH",
|
|
|
|
"FY", "KY", "MY", "NY", "HY", "BY", "PY", "LY",
|
|
|
|
"KW","DW",
|
|
|
|
"SPR", "STR", "SKR", "SPL", "STL", "SKL", "SHT", "SPT", "STK", "SPN"
|
|
];
|
|
|
|
|
|
function splitCodaOnset(consonants,pre_nucleus=null,post_nucleus=null) {
|
|
if (consonants.length==0){
|
|
return [[],[]];
|
|
}else if (consonants.length==1){
|
|
return [[],consonants];
|
|
}
|
|
let peakIndex = 1
|
|
const cluster=consonants.join("")
|
|
if ((cluster == "DM" || cluster == "DN") && (pre_nucleus == "ə" || pre_nucleus=="æ")){
|
|
peakIndex = 1
|
|
}else if (consonantClusters.includes(cluster)){
|
|
return [[],consonants];
|
|
}
|
|
|
|
|
|
if (cluster == "RDV"){
|
|
peakIndex = 2
|
|
}
|
|
else{
|
|
if (consonants.length>3){
|
|
const last_cluster=consonants.slice(1).join("")
|
|
|
|
if (consonantClusters.includes(last_cluster)){
|
|
peakIndex = 1
|
|
}else{
|
|
peakIndex = 2
|
|
}
|
|
}
|
|
}
|
|
|
|
const coda = consonants.slice(0, peakIndex);
|
|
const onset = consonants.slice(peakIndex);
|
|
|
|
return [ coda, onset ];
|
|
}
|
|
|
|
|
|
function arpa_to_ipa(arpa_text,accent_mode=AccentMode.SIMPLIFIED_VOWEL_ALIGNED) {
|
|
arpa_text = arpa_text.replaceAll(",","\t,").replaceAll(".","\t.").replaceAll("?","\t?").replaceAll("!","\t!")
|
|
console.log(arpa_text)
|
|
const words = arpa_text.split("\t")
|
|
const ipa_texts = []
|
|
words.forEach(function(word){
|
|
word = word.trim()
|
|
|
|
if (word == ""){
|
|
return
|
|
}
|
|
else if (word == "." || word ==","|| word =="!"|| word =="?"){
|
|
ipa_texts.push(word)
|
|
}else{
|
|
|
|
let syllable = arpa_to_ipa_with_syllables(word)
|
|
const ipa_text = syallablesToString(syllable,accent_mode)
|
|
|
|
ipa_texts.push(ipa_text)
|
|
ipa_texts.push(" ")
|
|
}
|
|
|
|
});
|
|
|
|
return ipa_texts.join("").replaceAll(" .",".").replaceAll(" ,",",").replaceAll(" ?","?").replaceAll(" !","!")
|
|
|
|
|
|
}
|
|
|
|
function arpas_symbol_to_ipa(phonemes){
|
|
let ipaText = ""
|
|
for (let i = 0; i < phonemes.length; i++) {
|
|
const phoneme = phonemes[i];
|
|
let ipaSymbol = arpa_to_ipa_lookup_tables[phoneme];
|
|
if (ipaSymbol === undefined) {
|
|
console.log(`Invalid Arpabet phoneme: ${phoneme}`);
|
|
continue;
|
|
}
|
|
ipaText+=ipaSymbol
|
|
}
|
|
return ipaText
|
|
}
|
|
|
|
|
|
function arpa_to_ipa_with_syllables(arpa) {
|
|
arpa = arpa.toUpperCase();
|
|
const phonemes = arpa.split(' ');
|
|
let syllables = [];
|
|
let currentSyllable = { nucleus: null, ontop: "", coder:"", accent: -1 ,ontop_arpa:[]};
|
|
|
|
for (let i = 0; i < phonemes.length; i++) {
|
|
const phoneme = phonemes[i];
|
|
let ipaSymbol = arpa_to_ipa_lookup_tables[phoneme];
|
|
if (ipaSymbol === undefined) {
|
|
ipaSymbol = arpa_to_ipa_lookup_tables[phoneme+"0"];
|
|
}
|
|
|
|
if (ipaSymbol === undefined) {
|
|
console.log(`Invalid Arpabet phoneme: ${phoneme}`);
|
|
continue;
|
|
}
|
|
|
|
|
|
if (phoneme in vowels) {
|
|
|
|
let accent = -1;
|
|
const lastChar = phoneme.slice(-1);
|
|
if (!isNaN(lastChar)) {
|
|
accent = parseInt(lastChar, 10);
|
|
}
|
|
|
|
syllables.push(new Syllable(currentSyllable.ontop,ipaSymbol, currentSyllable.coder, accent,currentSyllable.ontop_arpa));
|
|
|
|
|
|
currentSyllable = { nucleus: null, ontop: "", coder:"",accent: -1 ,ontop_arpa:[]};
|
|
} else {
|
|
currentSyllable.ontop += ipaSymbol;
|
|
currentSyllable.ontop_arpa.push(phoneme)
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (currentSyllable.nucleus !== null || currentSyllable.ontop !== "") {
|
|
syllables.push(new Syllable(currentSyllable.ontop,currentSyllable.nucleus, currentSyllable.coder, currentSyllable.accent));
|
|
}
|
|
|
|
|
|
let last_syallable = syllables[syllables.length-1]
|
|
|
|
if (last_syallable.nucleus == null){
|
|
const pre_syallable = syllables[syllables.length-2]
|
|
pre_syallable.coder += last_syallable.ontop
|
|
syllables = syllables.slice(0,syllables.length-1)
|
|
}
|
|
|
|
for(let i=1;i<syllables.length;i++){
|
|
const result = splitCodaOnset(syllables[i].ontop_arpa, syllables[i-1].nucleus, syllables[i].nucleus)
|
|
const coder = arpas_symbol_to_ipa(result[0])
|
|
const onset = arpas_symbol_to_ipa(result[1])
|
|
syllables[i-1].coder = coder
|
|
syllables[i].ontop = onset
|
|
}
|
|
|
|
|
|
|
|
|
|
last_syallable = syllables[syllables.length-1]
|
|
if (last_syallable.nucleus!=null){
|
|
if (last_syallable.accent<1){
|
|
if(last_syallable.nucleus.endsWith("iː") && last_syallable.coder==""){
|
|
last_syallable.nucleus = last_syallable.nucleus.substring(0, last_syallable.nucleus.length-1)
|
|
}
|
|
else if(last_syallable.nucleus.endsWith("ɝː")){
|
|
last_syallable.nucleus = last_syallable.nucleus.substring(0, last_syallable.nucleus.length-1)
|
|
}
|
|
}
|
|
}
|
|
|
|
return syllables;
|
|
}
|
|
|
|
function syallablesToString(syllables,accent_mode=AccentMode.SIMPLIFIED_VOWEL_ALIGNED) {
|
|
let ipaString = "";
|
|
|
|
|
|
for (let i = 0; i < syllables.length; i++) {
|
|
const syllable = syllables[i];
|
|
|
|
const nucleus = (syllable.nucleus != null) ? syllable.nucleus : "";
|
|
let accent = ""
|
|
|
|
|
|
|
|
if (syllable.accent === 1) {
|
|
accent = "ˈ";
|
|
} else if (syllable.accent === 2) {
|
|
accent = "ˌ";
|
|
} else if (syllable.accent === 0) {
|
|
|
|
}
|
|
if (accent_mode == AccentMode.STANDARD){
|
|
ipaString += accent+syllable.ontop + nucleus+syllable.coder;
|
|
}else if (accent_mode == AccentMode.SIMPLIFIED_VOWEL_ALIGNED){
|
|
ipaString += syllable.ontop + accent+nucleus+syllable.coder;
|
|
}else{
|
|
ipaString += syllable.ontop + nucleus+syllable.coder;
|
|
}
|
|
|
|
}
|
|
|
|
return ipaString;
|
|
}
|
|
|
|
export { arpa_to_ipa }; |