Spaces:

a-v-bely
/

spanish-task-generator

Sleeping

File size: 14,366 Bytes

import json

# Load data and define globals
unstressed_vocals = tuple('aieou')
stressed_vocals = tuple('áíéóú')
all_vocals = unstressed_vocals + stressed_vocals
vocals_s = unstressed_vocals + ('á', 'é', 'ó',)
all_consonants = tuple('bcdfghjklmnñpqrstvwxzy')
consonants_es = tuple('lrndzjsxh')
consonants_s = tuple(cons for cons in all_consonants if cons not in consonants_es)

with open('language_data/inflexions.json', 'r', encoding='utf-8') as f:
    VERB_INFLECTIONS = json.load(f)
with open('language_data/irregular_verbs_list.json', 'r',
          encoding='utf-8') as f:
    ALL_IRREGULAR_VERBS = json.load(f)
with open('language_data/irregular_verbs.json', 'r',
          encoding='utf-8') as f:
    IRREGULAR_MODELS = json.load(f)


def multi_replace(tk, seq1=stressed_vocals, seq2=unstressed_vocals):
    for i in range(len(seq1)):
        tk = tk.replace(seq1[i], seq2[i])
    return tk


def pluralize_noun(lemma):
    current_vocals = [char for char in lemma if char in all_vocals]
    if lemma.endswith(vocals_s):
        plural = lemma + 's'
    elif lemma.endswith(('í', 'ú')):
        if lemma == 'sí':
            plural = lemma + 'es'
        else:
            plural = lemma + 's'
    elif lemma.endswith(tuple('lrndzj')) and len(current_vocals) >= 3 and current_vocals[-3] in stressed_vocals:
        plural = lemma
    elif lemma.endswith(('s', 'x')) and len(current_vocals) > 1 and current_vocals[-1] in unstressed_vocals:
        plural = lemma
    elif (lemma[-2] in all_vocals and lemma.endswith('y')) \
            or (lemma.endswith(consonants_es) or lemma.endswith('ch')):
        if lemma.endswith('z'):
            plural = lemma[:-1] + 'ces'
        else:
            plural = lemma + 'es'
    elif (lemma[-1] in all_consonants and lemma[-2] in all_consonants) or \
            (len(lemma) >= 2 and lemma[-2] in all_vocals and lemma.endswith(consonants_s)):
        if lemma == 'club':
            return 'clubes'
        elif lemma == 'álbum':
            return 'álbumes'
        elif lemma.endswith(('st', 'zt')):
            plural = lemma
        else:
            plural = lemma + 's'
    else:
        return None
    plural_vocals = [char for char in plural if char in all_vocals]
    if lemma.endswith(('n', 's')) and current_vocals[-1] in stressed_vocals:
        plural = multi_replace(plural)
    elif lemma.endswith('n') and all([vocal in unstressed_vocals for vocal in current_vocals]) \
            and len(current_vocals) >= 3:
        replacement = {'a': 'á', 'o': 'ó', 'e': 'é', 'u': 'ú', 'í': 'í'}
        plural = plural.replace(plural_vocals[-3], replacement[plural_vocals[-3]])
    return plural


def pluralize_adjective(lemma, target_tags):
    if lemma.endswith('e'):
        return lemma + 's'
    elif not any([lemma.endswith(_) for _ in all_vocals]):  # if endswith a consonant
        if lemma.endswith('z'):
            return lemma[:-1] + 'ces'
        elif lemma.endswith(('n', 's')) and any([_ in lemma for _ in stressed_vocals]):
            if target_tags.get('Gender') == 'Masc':
                return multi_replace(tk=(lemma + 'es'))
            else:
                return multi_replace(tk=(lemma + 'as'))
        else:
            return lemma + 'es'
    elif target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Masc':
        return lemma if not lemma.endswith('s') else lemma[:-1]
    elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Masc':
        return lemma + 's'
    elif target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Fem':
        lemma = lemma if not lemma.endswith('s') else lemma[:-1]
        if lemma.endswith('a'):
            return lemma
        elif lemma.endswith('e'):
            return lemma
        return lemma + 'a'
    elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Fem':
        return lemma[:-1] + 'as'


def pronouns_and_determinants(lemma, target_tags):
    if any([lemma.endswith(_) for _ in all_vocals]) and target_tags.get('Number') == 'Plur':
        return lemma + 's'
    elif any([lemma.endswith(_) for _ in all_vocals]) and target_tags.get('Number') == 'Sing':
        return lemma
    elif lemma.endswith('l'):
        if target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Masc':
            return lemma + 'lo'
        elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Masc':
            return lemma + 'los'
        elif target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Fem':
            return lemma + 'la'
        elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Fem':
            return lemma + 'las'
    elif not any([lemma.endswith(_) for _ in all_vocals]):
        return lemma + 'es'


def inflect_noun_adj_pron_det(lemma: str, target_pos: str, target_tags: dict) -> str or None:
    try:
        if target_pos in ('NOUN', 'PROPN'):
            if target_tags.get('Number') == 'Sing':
                return lemma
            elif target_tags.get('Number') == 'Plur':
                return pluralize_noun(lemma=lemma)
        elif target_pos == 'ADJ':
            return pluralize_adjective(lemma=lemma, target_tags=target_tags)
        elif target_pos == 'DET' or target_pos == 'PRON':
            return pronouns_and_determinants(lemma=lemma, target_tags=target_tags)
    except KeyError:
        return None


def add_inflection(lemma, stem, mood, conjugation, tense, person_number, inflections=VERB_INFLECTIONS):
    if conjugation in ('0', 0):
        return None
    conjugation = str(conjugation)
    if mood == 'Cnd':
        return lemma + inflections[mood][person_number]
    if mood == 'Imp':
        return stem + inflections[mood][conjugation][person_number]
    if tense == 'Fut':
        return lemma + inflections['Ind'][tense][person_number]
    else:
        inflection = stem + inflections[mood][tense][conjugation][person_number]
        if inflection.endswith('go'):
            inflection = inflection[:-2] + 'jo'
        return inflection


def irregular(lemma, stem, conjugation, mood, tense, person_number, irregular_models=IRREGULAR_MODELS):
    conj = lemma[-2:]
    if mood in ('Imp', 'Cnd'):
        suppletive_models = irregular_models[mood]
    elif mood in ('Ind', 'Sub') and tense:
        suppletive_models = irregular_models[mood][tense]
    else:
        return 0
    for model, verbs_and_tags in suppletive_models.items():
        verbs = verbs_and_tags['verbs']
        affected_tags = verbs_and_tags['affected_tags']
        if lemma in verbs and person_number in affected_tags:
            index, inside, *replacement = model.split('--')
            replacement = replacement[0] if len(replacement) == 1 else []
            if int(index) in (1, 11):
                stem = stem[::-1].replace(inside, replacement[::-1], 1)[::-1]
                stem = 'jueg' if lemma == 'jugar' else stem
                inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation,
                                            tense=tense, person_number=person_number)
                return inflection
            elif int(index) == 7:
                stem = stem[::-1].replace(inside, replacement[::-1], 1)[::-1]
                inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation,
                                            tense=tense, person_number=person_number)
                if (tense == 'Past' and mood == 'Ind') or (tense == 'Imp' and mood == 'Sub'):
                    inflection = inflection.replace('ují', 'uje').replace('ujió', 'ujo') \
                        .replace('uji', 'uj').replace('ujié', 'ujé').replace('jm', 'jim').replace('js', 'jis')
                return inflection
            elif int(index) == 9:
                if ((tense == 'Pres' or tense == 'Past') and mood == 'Ind') or mood == 'Sub':
                    stem = stem + 'y'
                    inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation,
                                                tense=tense, person_number=person_number)
                    return inflection.replace('uyi', 'uy').replace('uyi', 'uy')
            elif int(index) == 8:
                inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation,
                                            tense=tense, person_number=person_number)
                return inflection.replace('ñi', 'ñ').replace('lli', 'll')
            elif int(index) == 10:
                return verbs.get(lemma).get(person_number)


def conjugate_final_form(lemma, stem, conjugation, mood, tense, person_number):
    if '_' in lemma:
        lemma = lemma.split('_')[-1]
    if lemma in ALL_IRREGULAR_VERBS:
        inflection = irregular(lemma=lemma, stem=stem, conjugation=conjugation,
                               mood=mood, tense=tense, person_number=person_number)
        if inflection is None:
            inflection = add_inflection(lemma=lemma, stem=stem, conjugation=conjugation,
                                        mood=mood, tense=tense, person_number=person_number)
        elif inflection == 0:
            return None
    else:
        inflection = add_inflection(lemma=lemma, stem=stem, conjugation=conjugation,
                                    mood=mood, tense=tense, person_number=person_number)
    return inflection


def past_participle(lemma, conjugation, gender_number, irregular_model=IRREGULAR_MODELS):
    model = irregular_model['Participle']['10--super--exception']['verbs']
    if lemma in model:
        stem = model.get(lemma)[:-1]
        suffix = ''
    else:
        stem = lemma[:-2]
        suffix = 'ad' if conjugation == 1 else 'id'
    if gender_number == 'MascSing':
        ending = 'o'
    else:
        if gender_number == 'MascPlur':
            ending = 'os'
        else:
            ending = 'a' if gender_number == 'FemSing' else 'as'
    participle = stem + suffix + ending
    return participle.replace('iid', 'id')


def conjugate_complex_tenses(verb_lemma, verb_conjugation, haber_mood, haber_tense, haber_person_number):
    verb_past_participle = past_participle(lemma=verb_lemma, conjugation=verb_conjugation, gender_number='MascSing')
    aux_verb = conjugate_final_form(lemma='haber', stem='hab', conjugation=2,
                                    mood=haber_mood, tense=haber_tense, person_number=haber_person_number)
    return f'{aux_verb} {verb_past_participle}'


def gerund_simple(lemma, stem, conjugation):
    if (stem.endswith(all_vocals) or lemma == 'ir') and conjugation != 1:
        return stem + 'yendo'
    else:
        return stem + 'ando' if conjugation == 1 else stem + 'iendo'


def gerund(lemma, irregular_models=IRREGULAR_MODELS['Gerund']):
    stem = lemma[:-2]
    if lemma[-2:] == 'ar':
        conjugation = 1
    else:
        if lemma[-2:] == 'er':
            conjugation = 2
        else:
            if lemma[-2:] == 'ir' or lemma[-2:] == 'ír':
                conjugation = 3
            else:
                conjugation = 0
    if lemma not in ALL_IRREGULAR_VERBS:
        return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation)
    else:
        for model, verbs in irregular_models.items():
            if lemma in verbs['verbs']:
                index, inside, *replacement = model.split('--')
                replacement = replacement[0] if len(replacement) == 1 else []
                if int(index) == 9:
                    return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation).replace('iyendo', 'iendo')
                elif int(index) == 8:
                    return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation) \
                        .replace('ñi', 'ñ').replace('lli', 'll')
                elif int(index) == 10:
                    return verbs['verbs'][lemma]
                stem = stem[::-1].replace(inside, replacement[::-1], 1)[::-1]
                return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation).replace('iyendo', 'iendo')
        return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation)


def verb_inflexions(lemma, target_pos, target_tags):
    if '_' in lemma:
        lemma = lemma.split('_')[-1]
    stem = lemma[:-2]
    if lemma[-2:] == 'ar':
        conjugation = 1
    else:
        if lemma[-2:] == 'er':
            conjugation = 2
        else:
            if lemma[-2:] == 'ir' or lemma[-2:] == 'ír':
                conjugation = 3
            else:
                conjugation = 0
    person_number = str(target_tags.get('Person')) + str(target_tags.get('Number'))
    gender_number = str(target_tags.get('Gender')) + str(target_tags.get('Number'))
    verb_form = target_tags.get('VerbForm')
    mood = target_tags.get('Mood')
    tense = target_tags.get('Tense')
    if target_pos in ('VERB', 'AUX', 'ADJ', 'phrase'):
        if verb_form == 'Inf':
            return lemma
        elif verb_form == 'Ger':
            return gerund(lemma=lemma)
        elif verb_form == 'Fin':
            # print('FIN')
            return conjugate_final_form(lemma=lemma, stem=stem, conjugation=conjugation,
                                        mood=mood, tense=tense, person_number=person_number, )
        elif verb_form == 'Part':
            return past_participle(lemma=lemma, conjugation=conjugation, gender_number=gender_number)
        elif verb_form == 'Compuesto':
            # print('COMP')
            return conjugate_complex_tenses(verb_lemma=lemma, verb_conjugation=conjugation, haber_mood=mood,
                                            haber_tense=tense, haber_person_number=person_number)
        else:
            return None
    return None


def inflect(lemma, target_pos, target_tags):
    verb_form = target_tags.get('VerbForm')
    if (target_pos == 'VERB'
            or (target_pos == 'ADJ' and verb_form == 'Part')
            or (target_pos == 'phrase' and verb_form == 'Compuesto')):
        return verb_inflexions(lemma=lemma, target_pos=target_pos, target_tags=target_tags)
    elif target_pos in ('NOUN', 'PROPN', 'ADJ', 'DET', 'PRON'):
        return inflect_noun_adj_pron_det(lemma=lemma, target_pos=target_pos, target_tags=target_tags)
    else:
        return None