File size: 3,103 Bytes
c9f9492
 
 
836b0d2
 
c9f9492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836b0d2
c9f9492
 
 
 
 
 
 
abc6394
 
 
 
 
 
 
 
 
c9f9492
 
 
 
 
 
 
 
 
 
836b0d2
 
 
 
 
 
 
 
c9f9492
 
 
 
 
f5e28c7
 
 
c9f9492
 
 
 
 
836b0d2
 
c9f9492
 
f5e28c7
c9f9492
dd969eb
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Define a list of question adverbs
opened_question_adverbs = ["how", "when", "where", "why", "how much", "how many", "how often", "how long", "what", "which", "who", "whose", "whom"]

# time adverbs to be moved at the beginning of ASL Gloss sentences
time_words = ["yesterday", "today", "tomorrow", "now", "before", "after", "morning", "afternoon", "evening", "night"]

# ASL glossing rules implemented in functions
def gloss_word(word):
    return word.upper()

def handle_fingerspelling(word):
    return '-'.join(list(word.upper()))

def handle_lexicalized_fingerspelling(word):
    return f"#{word.upper()}"

def handle_repetition(word, count):
    return f"{word.upper()}{'+' * (count - 1)}" if count > 1 else word.upper()

def handle_role_shift(sentence):
    return f"rs {sentence}"

def handle_indexing(token, index):
    return f"ix_{index} {token.upper()}"

def gloss_sentence(doc):
    glossed_sentence = []
    for token in doc:
        glossed_word = gloss_word(token.text)
        glossed_sentence.append(glossed_word)
    return " ".join(glossed_sentence)

def add_time_indicator(gloss_sentence_):
    for word in gloss_sentence_:
        if word.text.lower() in time_words:
            return f"{word.text.upper()} {gloss_sentence_.replace(word.text.upper(), '').strip()}"
    return gloss_sentence_

# skip stop_words
def skip_stop_words(word):
    if word.lower() == 'the' or word.lower() == 'a':
        return ''
    else:
        return word

def question_type(doc):
    try:
        if doc[-1].text == '?':
            if doc[0].text.lower() in opened_question_adverbs:
                return "wh-question"
            else:
                return "yes-no-question"
        return None
    except IndexError:
        return None

def process_sentence(doc):
    nms = {
        "wh-question": "wh-q",
        "yes-no-question": "y/n-q"
    }
    classifiers = {
        "car": "CL:3",
        "person": "CL:1"
    }
    basic_verbs = {
        "is": "BE",
        "am": "BE",
        "are": "BE",
        "was": "BE",
        "were": "BE",
        "be": "BE"
    }
    
    glossed_sentence = []
    for token in doc:
        word = token.lemma_.lower()
        
        if token.pos_ == "PROPN":
            glossed_word = token.text.upper()
        elif word in ["i", "me"]:
            glossed_word = handle_indexing("I", 1)
        elif word in ["you"]:
            glossed_word = handle_indexing("YOU", 2)
        elif word in classifiers:
            glossed_word = classifiers[word]
        elif word in basic_verbs:
            glossed_word = basic_verbs[word]
        else:
            glossed_word = gloss_word(word)
            
        glossed_word = skip_stop_words(glossed_word)
        glossed_sentence.append(glossed_word)

    for gloss in glossed_sentence:
        if gloss.lower() in time_words:
            glossed_sentence.insert(0, glossed_sentence.pop(glossed_sentence.index(gloss)))
            break

    type_doc = question_type(doc)
    if type_doc != None:
        glossed_sentence.insert(0, nms[type_doc])
        
    return " ".join(glossed_sentence)