File size: 2,337 Bytes
e50fe35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
# 
#  Copyright (c) 2013-present, Anoop Kunchukuttan
#  All rights reserved.
#  
#  This source code is licensed under the MIT license found in the
#  LICENSE file in the root directory of this source tree.
# 

#Program to transliterate acronyms from one Latin script to Indic languages 
#
# @author Anoop Kunchukuttan 
#

from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
import string
import random

class LatinToIndicAcronymTransliterator(object):

    LATIN_TO_DEVANAGARI_TRANSTABLE = str.maketrans({
        'a':'ए',
        'b':'बी',
        'c':'सी',
        'd':'डी',
        'e':'ई',
        'f':'एफ',
        'g':'जी',
        'h':'एच',
        'i':'आई',
        'j':'जे',
        'k':'के',
        'l':'एल',
        'm':'एम',
        'n':'एन',
        'o':'ओ',
        'p':'पी',
        'q':'क्यू',
        'r':'आर',
        's':'एस',
        't':'टी',
        'u':'यू',
        'v':'वी',
        'w':'डब्ल्यू',
        'x':'एक्स',
        'y':'वाय',
        'z':'जेड',
    })

    # a_unichr=ord('a')
    # alphabet = [ chr(a_unichr+n) for n in range(26) ]        
    LATIN_ALPHABET = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

    @staticmethod
    def get_transtable():
        return LatinToIndicAcronymTransliterator.LATIN_TO_DEVANAGARI_TRANSTABLE

    @staticmethod
    def transliterate(w,lang):
        return UnicodeIndicTransliterator.transliterate(w.lower().translate(LatinToIndicAcronymTransliterator.LATIN_TO_DEVANAGARI_TRANSTABLE),'hi',lang)

    @staticmethod
    def generate_latin_acronyms(num_acronyms, min_len=2, max_len=6, strategy='random'):
        """

            generate Latin acronyms in lower case

        """
        
        def sample_acronym(strategy='random'):
            if strategy=='random':
                slen=random.randint(min_len,max_len)
                return ''.join(random.choices(LatinToIndicAcronymTransliterator.LATIN_ALPHABET,k=slen))
        
        
        return [ sample_acronym(strategy) for i in range(num_acronyms) ]