import json # Load data and define globals unstressed_vocals = tuple('aieou') stressed_vocals = tuple('áíéóú') all_vocals = unstressed_vocals + stressed_vocals vocals_s = unstressed_vocals + ('á', 'é', 'ó',) all_consonants = tuple('bcdfghjklmnñpqrstvwxzy') consonants_es = tuple('lrndzjsxh') consonants_s = tuple(cons for cons in all_consonants if cons not in consonants_es) with open('language_data/inflexions.json', 'r', encoding='utf-8') as f: VERB_INFLECTIONS = json.load(f) with open('language_data/irregular_verbs_list.json', 'r', encoding='utf-8') as f: ALL_IRREGULAR_VERBS = json.load(f) with open('language_data/irregular_verbs.json', 'r', encoding='utf-8') as f: IRREGULAR_MODELS = json.load(f) def multi_replace(tk, seq1=stressed_vocals, seq2=unstressed_vocals): for i in range(len(seq1)): tk = tk.replace(seq1[i], seq2[i]) return tk def pluralize_noun(lemma): current_vocals = [char for char in lemma if char in all_vocals] if lemma.endswith(vocals_s): plural = lemma + 's' elif lemma.endswith(('í', 'ú')): if lemma == 'sí': plural = lemma + 'es' else: plural = lemma + 's' elif lemma.endswith(tuple('lrndzj')) and len(current_vocals) >= 3 and current_vocals[-3] in stressed_vocals: plural = lemma elif lemma.endswith(('s', 'x')) and len(current_vocals) > 1 and current_vocals[-1] in unstressed_vocals: plural = lemma elif (lemma[-2] in all_vocals and lemma.endswith('y')) \ or (lemma.endswith(consonants_es) or lemma.endswith('ch')): if lemma.endswith('z'): plural = lemma[:-1] + 'ces' else: plural = lemma + 'es' elif (lemma[-1] in all_consonants and lemma[-2] in all_consonants) or \ (len(lemma) >= 2 and lemma[-2] in all_vocals and lemma.endswith(consonants_s)): if lemma == 'club': return 'clubes' elif lemma == 'álbum': return 'álbumes' elif lemma.endswith(('st', 'zt')): plural = lemma else: plural = lemma + 's' else: return None plural_vocals = [char for char in plural if char in all_vocals] if lemma.endswith(('n', 's')) and current_vocals[-1] in stressed_vocals: plural = multi_replace(plural) elif lemma.endswith('n') and all([vocal in unstressed_vocals for vocal in current_vocals]) \ and len(current_vocals) >= 3: replacement = {'a': 'á', 'o': 'ó', 'e': 'é', 'u': 'ú', 'í': 'í'} plural = plural.replace(plural_vocals[-3], replacement[plural_vocals[-3]]) return plural def pluralize_adjective(lemma, target_tags): if lemma.endswith('e'): return lemma + 's' elif not any([lemma.endswith(_) for _ in all_vocals]): # if endswith a consonant if lemma.endswith('z'): return lemma[:-1] + 'ces' elif lemma.endswith(('n', 's')) and any([_ in lemma for _ in stressed_vocals]): if target_tags.get('Gender') == 'Masc': return multi_replace(tk=(lemma + 'es')) else: return multi_replace(tk=(lemma + 'as')) else: return lemma + 'es' elif target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Masc': return lemma if not lemma.endswith('s') else lemma[:-1] elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Masc': return lemma + 's' elif target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Fem': lemma = lemma if not lemma.endswith('s') else lemma[:-1] if lemma.endswith('a'): return lemma elif lemma.endswith('e'): return lemma return lemma + 'a' elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Fem': return lemma[:-1] + 'as' def pronouns_and_determinants(lemma, target_tags): if any([lemma.endswith(_) for _ in all_vocals]) and target_tags.get('Number') == 'Plur': return lemma + 's' elif any([lemma.endswith(_) for _ in all_vocals]) and target_tags.get('Number') == 'Sing': return lemma elif lemma.endswith('l'): if target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Masc': return lemma + 'lo' elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Masc': return lemma + 'los' elif target_tags.get('Number') == 'Sing' and target_tags.get('Gender') == 'Fem': return lemma + 'la' elif target_tags.get('Number') == 'Plur' and target_tags.get('Gender') == 'Fem': return lemma + 'las' elif not any([lemma.endswith(_) for _ in all_vocals]): return lemma + 'es' def inflect_noun_adj_pron_det(lemma: str, target_pos: str, target_tags: dict) -> str or None: try: if target_pos in ('NOUN', 'PROPN'): if target_tags.get('Number') == 'Sing': return lemma elif target_tags.get('Number') == 'Plur': return pluralize_noun(lemma=lemma) elif target_pos == 'ADJ': return pluralize_adjective(lemma=lemma, target_tags=target_tags) elif target_pos == 'DET' or target_pos == 'PRON': return pronouns_and_determinants(lemma=lemma, target_tags=target_tags) except KeyError: return None def add_inflection(lemma, stem, mood, conjugation, tense, person_number, inflections=VERB_INFLECTIONS): if conjugation in ('0', 0): return None conjugation = str(conjugation) if mood == 'Cnd': return lemma + inflections[mood][person_number] if mood == 'Imp': return stem + inflections[mood][conjugation][person_number] if tense == 'Fut': return lemma + inflections['Ind'][tense][person_number] else: inflection = stem + inflections[mood][tense][conjugation][person_number] if inflection.endswith('go'): inflection = inflection[:-2] + 'jo' return inflection def irregular(lemma, stem, conjugation, mood, tense, person_number, irregular_models=IRREGULAR_MODELS): conj = lemma[-2:] if mood in ('Imp', 'Cnd'): suppletive_models = irregular_models[mood] elif mood in ('Ind', 'Sub') and tense: suppletive_models = irregular_models[mood][tense] else: return 0 for model, verbs_and_tags in suppletive_models.items(): verbs = verbs_and_tags['verbs'] affected_tags = verbs_and_tags['affected_tags'] if lemma in verbs and person_number in affected_tags: index, inside, *replacement = model.split('--') replacement = replacement[0] if len(replacement) == 1 else [] if int(index) in (1, 11): stem = stem[::-1].replace(inside, replacement[::-1], 1)[::-1] stem = 'jueg' if lemma == 'jugar' else stem inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation, tense=tense, person_number=person_number) return inflection elif int(index) == 7: stem = stem[::-1].replace(inside, replacement[::-1], 1)[::-1] inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation, tense=tense, person_number=person_number) if (tense == 'Past' and mood == 'Ind') or (tense == 'Imp' and mood == 'Sub'): inflection = inflection.replace('ují', 'uje').replace('ujió', 'ujo') \ .replace('uji', 'uj').replace('ujié', 'ujé').replace('jm', 'jim').replace('js', 'jis') return inflection elif int(index) == 9: if ((tense == 'Pres' or tense == 'Past') and mood == 'Ind') or mood == 'Sub': stem = stem + 'y' inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation, tense=tense, person_number=person_number) return inflection.replace('uyi', 'uy').replace('uyi', 'uy') elif int(index) == 8: inflection = add_inflection(lemma=stem + conj, stem=stem, mood=mood, conjugation=conjugation, tense=tense, person_number=person_number) return inflection.replace('ñi', 'ñ').replace('lli', 'll') elif int(index) == 10: return verbs.get(lemma).get(person_number) def conjugate_final_form(lemma, stem, conjugation, mood, tense, person_number): if '_' in lemma: lemma = lemma.split('_')[-1] if lemma in ALL_IRREGULAR_VERBS: inflection = irregular(lemma=lemma, stem=stem, conjugation=conjugation, mood=mood, tense=tense, person_number=person_number) if inflection is None: inflection = add_inflection(lemma=lemma, stem=stem, conjugation=conjugation, mood=mood, tense=tense, person_number=person_number) elif inflection == 0: return None else: inflection = add_inflection(lemma=lemma, stem=stem, conjugation=conjugation, mood=mood, tense=tense, person_number=person_number) return inflection def past_participle(lemma, conjugation, gender_number, irregular_model=IRREGULAR_MODELS): model = irregular_model['Participle']['10--super--exception']['verbs'] if lemma in model: stem = model.get(lemma)[:-1] suffix = '' else: stem = lemma[:-2] suffix = 'ad' if conjugation == 1 else 'id' if gender_number == 'MascSing': ending = 'o' else: if gender_number == 'MascPlur': ending = 'os' else: ending = 'a' if gender_number == 'FemSing' else 'as' participle = stem + suffix + ending return participle.replace('iid', 'id') def conjugate_complex_tenses(verb_lemma, verb_conjugation, haber_mood, haber_tense, haber_person_number): verb_past_participle = past_participle(lemma=verb_lemma, conjugation=verb_conjugation, gender_number='MascSing') aux_verb = conjugate_final_form(lemma='haber', stem='hab', conjugation=2, mood=haber_mood, tense=haber_tense, person_number=haber_person_number) return f'{aux_verb} {verb_past_participle}' def gerund_simple(lemma, stem, conjugation): if (stem.endswith(all_vocals) or lemma == 'ir') and conjugation != 1: return stem + 'yendo' else: return stem + 'ando' if conjugation == 1 else stem + 'iendo' def gerund(lemma, irregular_models=IRREGULAR_MODELS['Gerund']): stem = lemma[:-2] if lemma[-2:] == 'ar': conjugation = 1 else: if lemma[-2:] == 'er': conjugation = 2 else: if lemma[-2:] == 'ir' or lemma[-2:] == 'ír': conjugation = 3 else: conjugation = 0 if lemma not in ALL_IRREGULAR_VERBS: return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation) else: for model, verbs in irregular_models.items(): if lemma in verbs['verbs']: index, inside, *replacement = model.split('--') replacement = replacement[0] if len(replacement) == 1 else [] if int(index) == 9: return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation).replace('iyendo', 'iendo') elif int(index) == 8: return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation) \ .replace('ñi', 'ñ').replace('lli', 'll') elif int(index) == 10: return verbs['verbs'][lemma] stem = stem[::-1].replace(inside, replacement[::-1], 1)[::-1] return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation).replace('iyendo', 'iendo') return gerund_simple(lemma=lemma, stem=stem, conjugation=conjugation) def verb_inflexions(lemma, target_pos, target_tags): if '_' in lemma: lemma = lemma.split('_')[-1] stem = lemma[:-2] if lemma[-2:] == 'ar': conjugation = 1 else: if lemma[-2:] == 'er': conjugation = 2 else: if lemma[-2:] == 'ir' or lemma[-2:] == 'ír': conjugation = 3 else: conjugation = 0 person_number = str(target_tags.get('Person')) + str(target_tags.get('Number')) gender_number = str(target_tags.get('Gender')) + str(target_tags.get('Number')) verb_form = target_tags.get('VerbForm') mood = target_tags.get('Mood') tense = target_tags.get('Tense') if target_pos in ('VERB', 'AUX', 'ADJ', 'phrase'): if verb_form == 'Inf': return lemma elif verb_form == 'Ger': return gerund(lemma=lemma) elif verb_form == 'Fin': # print('FIN') return conjugate_final_form(lemma=lemma, stem=stem, conjugation=conjugation, mood=mood, tense=tense, person_number=person_number, ) elif verb_form == 'Part': return past_participle(lemma=lemma, conjugation=conjugation, gender_number=gender_number) elif verb_form == 'Compuesto': # print('COMP') return conjugate_complex_tenses(verb_lemma=lemma, verb_conjugation=conjugation, haber_mood=mood, haber_tense=tense, haber_person_number=person_number) else: return None return None def inflect(lemma, target_pos, target_tags): verb_form = target_tags.get('VerbForm') if (target_pos == 'VERB' or (target_pos == 'ADJ' and verb_form == 'Part') or (target_pos == 'phrase' and verb_form == 'Compuesto')): return verb_inflexions(lemma=lemma, target_pos=target_pos, target_tags=target_tags) elif target_pos in ('NOUN', 'PROPN', 'ADJ', 'DET', 'PRON'): return inflect_noun_adj_pron_det(lemma=lemma, target_pos=target_pos, target_tags=target_tags) else: return None