File size: 2,504 Bytes
23b1fe2
 
08016ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import re

special_combs = {
    "Este" : "Yeste", "este" : "yeste",
    "El" : "Yel", 
    "Che": "Ke", "che": "ke",
    "Chi": "Ki", "chi": "ki",
    "Ghe": "ଗେ", "ghe": "गे",
    "Ghi": "ଗି", "ghi": "गी",
    "Ch" : "h" , "ch" : "h",
    "Sc" : "Sk" , "sc" : "sk",
    "Ce" : "Чe", "ce" : "чe",
    "Ci" : "Чi", "ci" : "чi",
    "Ge" : "ଜେ", "ge" : "जे",
    "Gi" : "ଜି", "gi" : "जी",
}

romanian_dict = {
    "ă" : "aw", "Ă" : "Aw",
    "â" : "u", "Â" : "U",
    "î" : "u", "Î" : "U",
    "j" : "zh", "J" : "Zh",
    "q" : "k", "Q" : "K",
    "ș" : "sh", "Ș" : "Sh",
    "ț" : "ts", "Ț" : "Ts",
    "c" : "k", "C" : "K",
}

cyrillic_equiv_dict = {
    "ч" : "ch", "Ч" : "Ch",
    "ଗି" : "Gi", "गी": "gi",
    "ଗେ" : "Ge", "गे" : "ge",
    "ଜି" : "Ji", "जी" : "ji",
    "ଜେ" : "Je", "जे" : "je",
}

def romanian_position_conditional_replace(word):
    if len(word) == 1:
        return word
    
    if word.startswith("y"): 
        word = word.replace("y", "i",1)

    if word.startswith("Y"): 
        word = word.replace("Y", "I",1)
    
    if word.startswith("x"): #At beginning or word, x = ks
        word = word.replace("x", "ks",1)

    x_pattern = r'([aeiouAEIOU])(x)([aeiouAEIOU])' #x between vowels = ks
    replacement = r'\1gz\3'
    word = re.sub(x_pattern, replacement, word)

    if "x" in word or "X" in word:
        word = word.replace("x", "ks")
        word = word.replace("X", "Ks")

    return word

def check_special_comb(word):
    for key in special_combs.keys():
        if key in word:
            word = word.replace(key, special_combs[key])
    return word

def romanian_replace(word):
    for key in romanian_dict.keys():
        word = word.replace(key, romanian_dict[key])
    return word

def cyrillic_replace(word):
    for cyrillic in cyrillic_equiv_dict:
        if cyrillic in word:
            word = word.replace(cyrillic,cyrillic_equiv_dict[cyrillic])
    return word

def romanian_word_to_latin(word):
    word = romanian_position_conditional_replace(word)
    # print(word)
    word = check_special_comb(word)
    # print(word)
    word = romanian_replace(word)
    # print(word)
    word = cyrillic_replace(word)
    return word

def romanian_sentence_to_latin(text):
    tokens = text.split(" ")
    # print(tokens)
    latin_tokens = [romanian_word_to_latin(token) for token in tokens]
    # print(latin_tokens)
    latin_text = " ".join(latin_tokens)
    return latin_text