DebasishDhal99
commited on
Commit
•
718e9ec
1
Parent(s):
36e9b92
Debugging Turkish end char pronunciation
Browse files- turkish.py +9 -3
turkish.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# import nltk
|
2 |
# nltk.download('punkt')
|
3 |
# from nltk.tokenize import word_tokenize
|
|
|
4 |
|
5 |
special_combs = {"c" : "ј", "C" : "Ј"} #These are Serbian J characters, they will be later converted to Latin J.
|
6 |
|
@@ -44,15 +45,20 @@ def check_punc(word): #The pronunciation of some Turkish chars change if they're
|
|
44 |
punc_list = list(string.punctuation)
|
45 |
|
46 |
ending_punc = False
|
47 |
-
|
|
|
|
|
48 |
for punc in punc_list:
|
49 |
if word.endswith(punc):
|
50 |
ending_punc = True
|
51 |
-
|
52 |
-
|
53 |
for i in range(1,len(word)):
|
54 |
if word[-i] in punc_list:
|
55 |
how_many_punc_end += 1
|
|
|
|
|
|
|
56 |
return ending_punc, how_many_punc_end
|
57 |
|
58 |
|
|
|
1 |
# import nltk
|
2 |
# nltk.download('punkt')
|
3 |
# from nltk.tokenize import word_tokenize
|
4 |
+
import string
|
5 |
|
6 |
special_combs = {"c" : "ј", "C" : "Ј"} #These are Serbian J characters, they will be later converted to Latin J.
|
7 |
|
|
|
45 |
punc_list = list(string.punctuation)
|
46 |
|
47 |
ending_punc = False
|
48 |
+
how_many_punc_end = 0
|
49 |
+
#Modify it such that even it there are > 1 puncutaiton marks at the word ending, it'll detect that
|
50 |
+
|
51 |
for punc in punc_list:
|
52 |
if word.endswith(punc):
|
53 |
ending_punc = True
|
54 |
+
break
|
55 |
+
|
56 |
for i in range(1,len(word)):
|
57 |
if word[-i] in punc_list:
|
58 |
how_many_punc_end += 1
|
59 |
+
|
60 |
+
if word[-i] not in punc_list:
|
61 |
+
break
|
62 |
return ending_punc, how_many_punc_end
|
63 |
|
64 |
|