DebasishDhal99
commited on
Commit
•
4ed0334
1
Parent(s):
adb1b0c
Debugging the pronunciation with words ending with a puncutation
Browse files- turkish.py +28 -1
turkish.py
CHANGED
@@ -40,9 +40,33 @@ def turkish_letter_to_eng(letter):
|
|
40 |
else:
|
41 |
return letter
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def turkish_word_to_latin(word):
|
45 |
assert type(word)==str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
word = check_special_comb(word)
|
47 |
|
48 |
if word.endswith("ı"):
|
@@ -54,6 +78,9 @@ def turkish_word_to_latin(word):
|
|
54 |
|
55 |
word = ''.join([turkish_letter_to_eng(letter) for letter in word])
|
56 |
word = cyrillic_to_eng(word)
|
|
|
|
|
|
|
57 |
return word
|
58 |
|
59 |
|
|
|
40 |
else:
|
41 |
return letter
|
42 |
|
43 |
+
def check_punc(word): #The pronunciation of some Turkish chars change if they're at the string end. However, puncutation marks like ., etc can hamper this.
|
44 |
+
punc_list = list(string.punctuation)
|
45 |
+
|
46 |
+
ending_punc = False
|
47 |
+
#Modify it such that even it there are >1 puncutaiton marks at the word ending, it'll detect that
|
48 |
+
for punc in punc_list:
|
49 |
+
if word.endswith(punc):
|
50 |
+
ending_punc = True
|
51 |
+
|
52 |
+
how_many_punc_end = 0
|
53 |
+
for i in range(1,len(word)):
|
54 |
+
if word[-i] in punc_list:
|
55 |
+
how_many_punc_end += 1
|
56 |
+
return ending_punc, how_many_punc_end
|
57 |
+
|
58 |
+
|
59 |
def turkish_word_to_latin(word):
|
60 |
assert type(word)==str
|
61 |
+
|
62 |
+
last_letter = ""
|
63 |
+
|
64 |
+
ending_punc, how_many_punc_end = check_punc(word) #If a word ends with a puncutation letter. "deniz." will be True. How many puncutation chars. "deniz!!!" = 3
|
65 |
+
|
66 |
+
if ending_punc:
|
67 |
+
last_letter = word[-1*how_many_punc_end:]
|
68 |
+
word = word[:-1*how_many_punc_end]
|
69 |
+
|
70 |
word = check_special_comb(word)
|
71 |
|
72 |
if word.endswith("ı"):
|
|
|
78 |
|
79 |
word = ''.join([turkish_letter_to_eng(letter) for letter in word])
|
80 |
word = cyrillic_to_eng(word)
|
81 |
+
|
82 |
+
if last_letter: #If a punctuation mark is there in the original word, it'll be added here.
|
83 |
+
word = word + last_letter
|
84 |
return word
|
85 |
|
86 |
|