def parse_verb_morphology(morph): word_pos = "Verb" offset = 7 # Tense Consumption if morph[3:7] == "PRES": tense = "Present" elif morph[3:7] == "IMPF": tense = "Imperfect" elif morph[3:7] == "PLUP": tense = "Pluperfect" elif morph[3:7] == "PERF": tense = "Pluperfect" elif morph[3:7] == "FUTP" and morph[3:13] != 'FUTPASSIVE': tense = "FuturePerfect" elif morph[3:6] == 'FUT': offset = 6 tense = "Future" elif morph == "V99XXX0X": # these should we just have a default? tense = "Undeclined" else: tense = "PROBLEM" # Voice consumption if morph[offset:offset+6] == "ACTIVE": voice = 'Active' offset += 6 elif morph[offset:offset+7] == "PASSIVE": voice = 'Passive' offset += 7 elif morph[offset:offset+3] == 'IND' or morph[offset:offset+3] == 'SUB' or morph[offset:offset+3] == 'INF' or morph[offset:offset+3] == 'IMP': # Deponent verbs - Such verbs occur in passive voice but are translated in active voice. voice = 'Active' else: voice = 'PROBLEM' # Mood consumption if morph[offset:offset+3] == "SUB": mood = 'Subjunctive' elif morph[offset:offset+3] == "IND": mood = 'Indicative' elif morph[offset:offset+3] == "IMP": mood = 'Imperative' elif morph[offset:offset+3] == "INF": mood = 'Infinitive' else: mood = 'PROBLEM' offset += 3 # Person consumption if morph[offset] == '0': person = 'Undeclined' elif morph[offset] == '1': person = 'First' elif morph[offset] == '2': person = 'Second' elif morph[offset] == '3': person = 'Third' else: person = "PROBLEM" offset += 1 # Number consumption if morph[offset] == 'S': number = 'Singular' elif morph[offset] == 'P': number = 'Plural' elif morph[offset] == 'X': number = 'Infinitive' else: number = 'PROBLEM' return { 'pos': word_pos, 'tense': tense, 'voice': voice, 'mood': mood, 'person': person, 'number': number } def parse_verb_participle_morphology(morph): word_pos = "Participle" offset = 6 # Case consumption if morph[offset:offset+3] == 'NOM': v_case = 'Nominative' elif morph[offset:offset+3] == 'GEN': v_case = 'Genitive' elif morph[offset:offset+3] == 'DAT': v_case = 'Dative' elif morph[offset:offset+3] == 'ACC': v_case = 'Accusative' elif morph[offset:offset+3] == 'ABL': v_case = 'Ablative' elif morph[offset:offset+3] == 'VOC': v_case = 'Vocative' elif morph[offset:offset+3] == 'LOC': v_case = 'Locative' else: v_case = "PROBLEM" offset += 3 # Number consumption if morph[offset] == 'S': number = 'Singular' elif morph[offset] == 'P': number = 'Plural' elif morph[offset] == 'X': number = 'Infinitive' else: number = 'PROBLEM' offset += 1 # Gender consumption if morph[offset] == 'M': gender = 'Masculine' elif morph[offset] == 'F': gender = 'Feminine' elif morph[offset] == 'N': gender = 'Neuter' elif morph[offset] == 'X': gender = 'Unknown' elif morph[offset] == 'C': gender = 'Common' else: gender = 'PROBLEM' offset += 1 if morph[offset:] == 'FUTPPL' or morph[offset:] == 'FUTACTIVEPPL': participle_type = 'FutureActive' elif morph[offset:] == 'PRESPPL' or morph[offset:] == 'PRESACTIVEPPL': participle_type = 'Present Active' elif morph[offset:] == 'PERFPPL' or morph[offset:] == 'PERFACTIVEPPL': participle_type = 'PerfectActive' elif morph[offset:] == 'FUTPASSIVEPPL': participle_type = 'FuturePassive' elif morph[offset:] == 'PERFPASSIVEPPL': participle_type = 'PerfectPassive' else: participle_type = 'PROBLEM' return { 'pos': word_pos, 'case': v_case, 'number': number, 'gender': gender, 'participle_type': participle_type } def parse_noun_morphology(morph): word_pos = "Noun" offset = 3 # Case consumption if morph[offset:offset+3] == 'NOM': v_case = 'Nominative' elif morph[offset:offset+3] == 'GEN': v_case = 'Genitive' elif morph[offset:offset+3] == 'DAT': v_case = 'Dative' elif morph[offset:offset+3] == 'ACC': v_case = 'Accusative' elif morph[offset:offset+3] == 'ABL': v_case = 'Ablative' elif morph[offset:offset+3] == 'VOC': v_case = 'Vocative' elif morph[offset:offset+3] == 'LOC': v_case = 'Locative' elif morph[offset] == 'X': offset -= 2 v_case = 'Undeclined' else: v_case = "PROBLEM" offset += 3 # Number consumption if morph[offset] == 'S': number = 'Singular' elif morph[offset] == 'P': number = 'Plural' elif morph[offset] == 'X': number = 'Infinitive' else: number = 'PROBLEM' offset += 1 # Gender consumption if morph[offset] == 'M': gender = 'Masculine' elif morph[offset] == 'F': gender = 'Feminine' elif morph[offset] == 'N': gender = 'Neuter' elif morph[offset] == 'X': gender = 'Unknown' elif morph[offset] == 'C': gender = 'Common' else: gender = 'PROBLEM' return { 'pos': word_pos, 'case': v_case, 'number': number, 'gender': gender, } def parse_adjective_morphology(morph): word_pos = "Adjective" offset = 5 # Case consumption if morph[offset:offset+3] == 'NOM': v_case = 'Nominative' elif morph[offset:offset+3] == 'GEN': v_case = 'Genitive' elif morph[offset:offset+3] == 'DAT': v_case = 'Dative' elif morph[offset:offset+3] == 'ACC': v_case = 'Accusative' elif morph[offset:offset+3] == 'ABL': v_case = 'Ablative' elif morph[offset:offset+3] == 'VOC': v_case = 'Vocative' elif morph[offset:offset+3] == 'LOC': v_case = 'Locative' elif morph[offset] == 'X': offset -= 2 v_case = 'Undeclined' else: v_case = "PROBLEM" offset += 3 # Number consumption if morph[offset] == 'S': number = 'Singular' elif morph[offset] == 'P': number = 'Plural' elif morph[offset] == 'X': number = 'Infinitive' else: number = 'PROBLEM' offset += 1 # Gender consumption if morph[offset] == 'M': gender = 'Masculine' elif morph[offset] == 'F': gender = 'Feminine' elif morph[offset] == 'N': gender = 'Neuter' elif morph[offset] == 'X': gender = 'Unknown' elif morph[offset] == 'C': gender = 'Common' else: gender = 'PROBLEM' offset += 1 # Comparison consumption if morph[offset:] == 'POS': comparison = "Positive" elif morph[offset:] == 'COMP': comparison = "Comparative" elif morph[offset:] == 'SUPER': comparison = "Superlative" elif morph[offset:] == 'X': comparison = 'Unknown' else: comparison = "PROBLEM" print(morph) return { 'pos': word_pos, 'case': v_case, 'number': number, 'gender': gender, 'comparison': comparison } # PRON31NOMPM def parse_pronoun_morphology(morph): word_pos = "Pronoun" offset = 6 # Case consumption if morph[offset:offset+3] == 'NOM': v_case = 'Nominative' elif morph[offset:offset+3] == 'GEN': v_case = 'Genitive' elif morph[offset:offset+3] == 'DAT': v_case = 'Dative' elif morph[offset:offset+3] == 'ACC': v_case = 'Accusative' elif morph[offset:offset+3] == 'ABL': v_case = 'Ablative' elif morph[offset:offset+3] == 'VOC': v_case = 'Vocative' elif morph[offset:offset+3] == 'LOC': v_case = 'Locative' elif morph[offset] == 'X': offset -= 2 v_case = 'Undeclined' else: v_case = "PROBLEM" offset += 3 # Number consumption if morph[offset] == 'S': number = 'Singular' elif morph[offset] == 'P': number = 'Plural' elif morph[offset] == 'X': number = 'Infinitive' else: number = 'PROBLEM' offset += 1 # Gender consumption if morph[offset] == 'M': gender = 'Masculine' elif morph[offset] == 'F': gender = 'Feminine' elif morph[offset] == 'N': gender = 'Neuter' elif morph[offset] == 'X': gender = 'Unknown' elif morph[offset] == 'C': gender = 'Common' else: gender = 'PROBLEM' return { 'pos': word_pos, 'case': v_case, 'number': number, 'gender': gender, } def parse_preposition_morphology(morph): offset = 4 word_pos = "Preposition" if morph[offset:] == 'ABL': v_case = 'Ablative' elif morph[offset:] == 'ACC': v_case = 'Accusative' else: print(morph[offset:]) v_case = 'PROBLEM' return { 'pos': word_pos, 'case': v_case } def parse_adverb_morphology(morph): word_pos = "Adverb" offset = 3 if len(morph[offset:]) < 1: comparison = "Positive" # Comparison consumption elif morph[offset:] == 'POS': comparison = "Positive" elif morph[offset:] == 'COMP': comparison = "Comparative" elif morph[offset:] == 'SUPER': comparison = "Superlative" elif morph[offset:] == 'X': comparison = 'Unknown' else: comparison = "PROBLEM" print(morph) return { 'pos': word_pos, 'comparison': comparison } # PRON31NOMPM def parse_supine_morphology(morph): word_pos = "Supine" offset = 8 # Case consumption if morph[offset:offset+3] == 'NOM': v_case = 'Nominative' elif morph[offset:offset+3] == 'GEN': v_case = 'Genitive' elif morph[offset:offset+3] == 'DAT': v_case = 'Dative' elif morph[offset:offset+3] == 'ACC': v_case = 'Accusative' elif morph[offset:offset+3] == 'ABL': v_case = 'Ablative' elif morph[offset:offset+3] == 'VOC': v_case = 'Vocative' elif morph[offset:offset+3] == 'LOC': v_case = 'Locative' elif morph[offset] == 'X': offset -= 2 v_case = 'Undeclined' else: v_case = "PROBLEM" offset += 3 # Number consumption if morph[offset] == 'S': number = 'Singular' elif morph[offset] == 'P': number = 'Plural' elif morph[offset] == 'X': number = 'Infinitive' else: number = 'PROBLEM' offset += 1 # Gender consumption if morph[offset] == 'M': gender = 'Masculine' elif morph[offset] == 'F': gender = 'Feminine' elif morph[offset] == 'N': gender = 'Neuter' elif morph[offset] == 'X': gender = 'Unknown' elif morph[offset] == 'C': gender = 'Common' else: gender = 'PROBLEM' return { 'pos': word_pos, 'case': v_case, 'number': number, 'gender': gender, } def parse_morphology(morph): if len(morph) < 2: return {} # Participle if morph[0:4] == "VPAR": return parse_verb_participle_morphology(morph) # Adjective elif morph[0:3] == 'ADJ': return parse_adjective_morphology(morph) # Adverb elif morph[0:3] == 'ADV': return parse_adverb_morphology(morph) # Verb elif morph[0] == 'V' and morph[1] in [str(p) for p in range(9)]: return parse_verb_morphology(morph) # Noun elif morph[0] == 'N': return parse_noun_morphology(morph) # Pronoun elif morph[0:4] == 'PRON': return parse_pronoun_morphology(morph) # Preposition elif morph[0:4] == 'PREP': return parse_preposition_morphology(morph) # Supine elif morph[0:6] == 'SUPINE': return parse_supine_morphology(morph) elif morph[0:6] == 'INTERJ': return { 'pos': 'Interjection' } elif morph in ['TACKON', 'PREFIX', 'SUFFIX']: return { 'pos': 'Enclitic' } elif morph in ['OTHER', 'CONJ']: return { 'pos': morph.capitalize() } def morph_to_string(morph): if morph == {} or morph is None: return "" if morph['pos'] == 'Verb': return 'Verb_' + morph['tense'] + "_" + morph['voice'] + "_" + morph['mood'] + "_" + morph['person'] + "_" + morph['number'] elif morph['pos'] == 'Participle': return "Participle_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] + "_" + morph['participle_type'] elif morph['pos'] == 'Noun': return "Noun_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] elif morph['pos'] == 'Adjective': return 'Adjective_' + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] + '_' + morph['comparison'] elif morph['pos'] == 'Pronoun': return "Pronoun_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] elif morph['pos'] == 'Preposition': return "Preposition_" + morph['case'] elif morph['pos'] == 'Adverb': return "Adverb_" + morph['comparison'] elif morph['pos'] == 'Supine': return "Supine_" + morph['case'] + "_" + morph['number'] + "_" + morph['gender'] elif morph['pos'] == 'Enclitic': return 'Enclitic' elif morph['pos'] == 'Other': return 'OTHER' elif morph['pos'] == 'Conj': return 'Conjunction' elif morph['pos'] == 'Interjection': return 'Interjection' def simplify_form(morph): return morph_to_string(parse_morphology(morph))