# Building Dynamic Wordlists from WordNet as a fallback

I am using an article from [GeeksforGeeks](https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/amp/) to guide building lists using NLTK's `WordNet`. I am considering that this may be a way to avoid having to build custom lists and want to test it out.

# Builds a dataframe dynamically from WordNet using NLTK.
def wordnet_df(word,POS=False,seed_definition=None):
    pos_options = ['NOUN','VERB','ADJ','ADV']
    synonyms, antonyms = syn_ant(word,POS,False)
    #print(synonyms, antonyms) #for QA purposes
    words = []
    cats = []
    #WordNet hates spaces so you have to remove them
    m_word = word.replace(" ", "_")
    
    #Allow the user to pick a seed definition if it is not provided directly to the function.
    if seed_definition is None:
        if POS in pos_options:
            seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
        else:
            seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
        for d in range(len(seed_definitions)):
            print(f"{d}: {seed_definitions[d]}")
        choice = int(input("Which of the definitions above most aligns to your selection?"))
        seed_definition = seed_definitions[choice]
    
    if POS in pos_options:
        for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):
                if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
                    cur_lemmas = syn.lemmas()
                    hypos = syn.hyponyms()
                    for hypo in hypos:
                        cur_lemmas.extend(hypo.lemmas())
                    for lemma in cur_lemmas:
                        ll = lemma.name()
                        cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                        words.append(re.sub("_"," ",ll))

        if len(synonyms) > 0:
            for w in synonyms:
                w = w.replace(" ","_")
                for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))
        if len(antonyms) > 0:
            for a in antonyms:
                a = a.replace(" ","_")
                for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))
    else:
        for syn in wordnet.synsets(m_word):
            if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
                cur_lemmas = syn.lemmas()
                hypos = syn.hyponyms()
                for hypo in hypos:
                    cur_lemmas.extend(hypo.lemmas())
                for lemma in cur_lemmas:
                    ll = lemma.name()
                    cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                    words.append(re.sub("_"," ",ll))        
        if len(synonyms) > 0:
            for w in synonyms:
                w = w.replace(" ","_")
                for syn in wordnet.synsets(w):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))
        if len(antonyms) > 0:
            for a in antonyms:
                a = a.replace(" ","_")
                for syn in wordnet.synsets(a):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))

    df = {"Categories":cats, "Words":words}
    df = pd.DataFrame(df) 
    df = df.drop_duplicates().reset_index()
    df = df.drop("index", axis=1)
    return df

# Building Dynamic Wordlists from WordNet as a fallback

I am using an article from [GeeksforGeeks](https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/amp/) to guide building lists using NLTK's `WordNet`. I am considering that this may be a way to avoid having to build custom lists and want to test it out.

# Building Dynamic Wordlists from WordNet as a fallback

I am using an article from [GeeksforGeeks](https://www.geeksforgeeks.org/get-synonymsantonyms-nltk-wordnet-python/amp/) to guide building lists using NLTK's `WordNet`. I am considering that this may be a way to avoid having to build custom lists and want to test it out.

In [1]:
#Import necessary libraries.
import re, nltk, pandas as pd, numpy as np, ssl
from nltk.corpus import wordnet
import spacy
nlp = spacy.load("en_core_web_lg")

In [2]:
#If an error is thrown that the corpus "omw-1.4" isn't discoverable you can use this code. (https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context
    
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/nbutters/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [15]:
hypos = wordnet.synsets("tiny")
print(hypos)
new_list = []
for syn in hypos:
    cur_lemmas = syn.lemmas()
    hypos = syn.hyponyms()
    for hypo in hypos:
        cur_lemmas.extend(hypo.lemmas())
    for lemma in cur_lemmas:
        ll = lemma.name()
        new_list.append(ll)
syns = []
for lemma in new_list:
    syns.extend(wordnet.synsets(lemma))
print(len(syns),syns)

[Synset('bantam.s.01')]
16 [Synset('bantam.n.01'), Synset('bantam.s.01'), Synset('diminutive.n.01'), Synset('bantam.s.01'), Synset('lilliputian.n.01'), Synset('lilliputian.n.02'), Synset('lilliputian.a.01'), Synset('bantam.s.01'), Synset('fiddling.s.01'), Synset('dwarf.n.01'), Synset('bantam.s.01'), Synset('petite.n.01'), Synset('bantam.s.01'), Synset('bantam.s.01'), Synset('flyspeck.n.01'), Synset('bantam.s.01')]


In [None]:
#Here I define a few test sentences from the Duct-Tape-Pipeline.
upt1 = "I like movies starring black actors."
upt2 = "I am a black trans-woman."
upt3 = "Native Americans deserve to have their land back."
upt4 = "This movie was filmed in Iraq."

In [16]:
# A simple function to pull synonyms and antonyms using spacy's POS
def syn_ant(word,POS=False,human=True):
    pos_options = ['NOUN','VERB','ADJ','ADV']
    synonyms = [] 
    antonyms = []
    #WordNet hates spaces so you have to remove them
    if " " in word:
        word = word.replace(" ", "_")
    
    if POS in pos_options:
        for syn in wordnet.synsets(word, pos=getattr(wordnet, POS)): 
            for l in syn.lemmas(): 
                current = l.name()
                if human:
                    current = re.sub("_"," ",current)
                synonyms.append(current) 
                if l.antonyms():
                    for ant in l.antonyms():
                        cur_ant = ant.name()
                        if human:
                            cur_ant = re.sub("_"," ",cur_ant)
                        antonyms.append(cur_ant)
    else: 
        for syn in wordnet.synsets(word): 
            for l in syn.lemmas(): 
                current = l.name()
                if human:
                    current = re.sub("_"," ",current)
                synonyms.append(current) 
                if l.antonyms():
                    for ant in l.antonyms():
                        cur_ant = ant.name()
                        if human:
                            cur_ant = re.sub("_"," ",cur_ant)
                        antonyms.append(cur_ant)
    synonyms = list(set(synonyms))
    antonyms = list(set(antonyms))
    return synonyms, antonyms

In [22]:
x, q = syn_ant("man")
print(x,q)

['man', "gentleman's gentleman", 'Isle of Man', 'Man', 'humanity', 'human', 'piece', 'valet de chambre', 'mankind', 'humans', 'military personnel', 'adult male', 'homo', 'human race', 'valet', 'humankind', 'military man', 'human being', 'serviceman', 'world', 'gentleman', 'human beings'] ['woman', 'civilian']


In [None]:
doc1 = nlp(upt1)
doc2 = nlp(upt2)
doc3 = nlp(upt3)
doc4 = nlp(upt4)

In [None]:
syn_ant(doc3[0].text,doc3[0].pos_)

In [None]:
#Discovering that NLTk WordNet uses "_" for compounds... and fixed it.
syn_ant("Native_American", "NOUN")

In [None]:
syn_ant("Papua_New_Guinea")

In [None]:
syn_ant("hate")

In [None]:
russian = wordnet.synset('mother.n.01')
print(russian.hyponyms())
hypos = []
[hypos.extend([re.sub("_"," ",lemma.name()) for lemma in hyponyms.lemmas()]) for hyponyms in russian.hyponyms()]
hypos

In [None]:
hyper_list = wordnet.synset('woman.n.01')
print(hyper_list.hypernyms())
hypers = []
[hypers.extend([re.sub("_"," ",lemma.name()) for lemma in hypernyms.lemmas()]) for hypernyms in hyper_list.hypernyms()]
hypers

In [None]:
hyper_list = wordnet.synset('man.n.01')
print(hyper_list.hypernyms())
hypers = []
[hypers.extend([re.sub("_"," ",lemma.name()) for lemma in hypernyms.lemmas()]) for hypernyms in hyper_list.hypernyms()]
hypers

In [None]:
parent = wordnet.synset('male.n.02')
print(parent.hyponyms())
hypos = []
[hypos.extend([re.sub("_"," ",lemma.name()) for lemma in hyponyms.lemmas()]) for hyponyms in parent.hyponyms()]
hypos

In [None]:
hypo2 = [[re.sub("_"," ",lemma.name()) for lemma in hyponym.lemmas()] for hyponym in parent.hyponyms()]
hypo2

In [None]:
syn_ant("white supremacist","NOUN",human=False)

In [None]:
# Builds a list dynamically from WordNet using NLTK.
def wordnet_list(word,POS=False):
    pos_options = ['NOUN','VERB','ADJ','ADV']
    synonyms, antonyms = syn_ant(word,POS,False)
    base = []
    final = [word]
    #WordNet hates spaces so you have to remove them
    m_word = word.replace(" ", "_")
    
    if POS in pos_options:
        for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):
            base.extend(syn.hyponyms())
            base.append(syn)
        
        if len(synonyms) > 0:
            for w in synonyms:
                w = w.replace(" ","_")
                for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):
                    base.extend(syn.hyponyms())
                    base.append(syn)
        if len(antonyms) > 0:
            for a in antonyms:
                a = a.replace(" ","_")
                for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):
                    base.extend(syn.hyponyms())
                    base.append(syn)
    else:
        for syn in wordnet.synsets(m_word):
            base.extend(syn.hyponyms())
            base.append(syn)
        
        if len(synonyms) > 0:
            for w in synonyms:
                w = w.replace(" ","_")
                for syn in wordnet.synsets(w):
                    base.extend(syn.hyponyms())
                    base.append(syn)
        if len(antonyms) > 0:
            for a in antonyms:
                a = a.replace(" ","_")
                for syn in wordnet.synsets(a):
                    base.extend(syn.hyponyms())
                    base.append(syn)
    base = list(set(base))
    for b in base:
        cur_words = []
        cur_words.extend([re.sub("_"," ",lemma.name()) for lemma in b.lemmas()])
        final.extend(cur_words)

        
                
    final = list(set(final))    
    return final

In [None]:
wordnet_list("white supremacist", "NOUN")

In [None]:
words = wordnet_list("girl", "NOUN")
print(f"The length of the list is {len(words)}.")

In [None]:
text = "The girl was brought to the front of the class."
test_doc = nlp(text)

In [None]:
df = pd.DataFrame()
df["Words"] = words

df["Sentences"] = df.Words.apply(lambda x: text.replace("girl",x))

df["Similarity"] = df.Words.apply(lambda x: nlp("girl").similarity(nlp(x)[0]))

In [None]:
df.sort_values(by='Similarity', ascending=False)

In [None]:
df2 = df[df.Similarity > 0].reset_index()

In [None]:
df2

In [None]:
minimum = df2.Similarity.min()
text2 = df2.loc[df2['Similarity'] == minimum, 'Words'].iloc[0]
text2

In [None]:
maximum = df2[df2.Words != "girl"].Similarity.max()
text3 = df2.loc[df2['Similarity'] == maximum, 'Words'].iloc[0]
text3

In [None]:
df3 = df2[df.Similarity > .5].reset_index()

In [None]:
homo = wordnet.synsets('gay')

In [None]:
for syn in homo:
    print(syn.lemmas())

In [None]:
mother = wordnet.synsets('homo')
cats = []
words = []
for syn in mother:
    lemmas = syn.lemmas()
    for lemma in lemmas:
        ll = lemma.name()
        print(ll)
        cats.append(syn.name().split(".")[0])
        words.append(ll)
        
print(cats,words)
print(len(cats),len(words))
df = {"Categories":cats, "Words":words}
df = pd.DataFrame(df)

In [None]:
df

In [None]:
def process_text(text):
    doc = nlp(text.lower())
    result = []
    for token in doc:
        if (token.is_stop) or (token.is_punct) or (token.lemma_ == '-PRON-'):
            continue
        result.append(token.lemma_)
    return " ".join(result)

In [None]:
def clean_definition(syn):
    #This function removes stop words from sentences to improve on document level similarity for differentiation.
    if type(syn) is str:
        synset = wordnet.synset(syn).definition()
    elif type(syn) is nltk.corpus.reader.wordnet.Synset:
        synset = syn.definition()
    definition = nlp(" ".join(token.lemma_ for token in nlp(synset) if not token.is_stop))
    return definition

def check_sim(a,b):
    if type(a) is str and type(b) is str:
        a = nlp(a)
        b = nlp(b)
    similarity = a.similarity(b)
    return similarity

In [None]:
# Builds a dataframe dynamically from WordNet using NLTK.
def wordnet_df(word,POS=False,seed_definition=None):
    pos_options = ['NOUN','VERB','ADJ','ADV']
    synonyms, antonyms = syn_ant(word,POS,False)
    #print(synonyms, antonyms) #for QA purposes
    words = []
    cats = []
    #WordNet hates spaces so you have to remove them
    m_word = word.replace(" ", "_")
    
    #Allow the user to pick a seed definition if it is not provided directly to the function.
    if seed_definition is None:
        if POS in pos_options:
            seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
        else:
            seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
        for d in range(len(seed_definitions)):
            print(f"{d}: {seed_definitions[d]}")
        choice = int(input("Which of the definitions above most aligns to your selection?"))
        seed_definition = seed_definitions[choice]
    
    if POS in pos_options:
        for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):
                if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
                    cur_lemmas = syn.lemmas()
                    hypos = syn.hyponyms()
                    for hypo in hypos:
                        cur_lemmas.extend(hypo.lemmas())
                    for lemma in cur_lemmas:
                        ll = lemma.name()
                        cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                        words.append(re.sub("_"," ",ll))

        if len(synonyms) > 0:
            for w in synonyms:
                w = w.replace(" ","_")
                for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))
        if len(antonyms) > 0:
            for a in antonyms:
                a = a.replace(" ","_")
                for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))
    else:
        for syn in wordnet.synsets(m_word):
            if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
                cur_lemmas = syn.lemmas()
                hypos = syn.hyponyms()
                for hypo in hypos:
                    cur_lemmas.extend(hypo.lemmas())
                for lemma in cur_lemmas:
                    ll = lemma.name()
                    cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                    words.append(re.sub("_"," ",ll))        
        if len(synonyms) > 0:
            for w in synonyms:
                w = w.replace(" ","_")
                for syn in wordnet.synsets(w):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))
        if len(antonyms) > 0:
            for a in antonyms:
                a = a.replace(" ","_")
                for syn in wordnet.synsets(a):
                    if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
                        cur_lemmas = syn.lemmas()
                        hypos = syn.hyponyms()
                        for hypo in hypos:
                            cur_lemmas.extend(hypo.lemmas())
                        for lemma in cur_lemmas:
                            ll = lemma.name()
                            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
                            words.append(re.sub("_"," ",ll))

    df = {"Categories":cats, "Words":words}
    df = pd.DataFrame(df) 
    df = df.drop_duplicates().reset_index()
    df = df.drop("index", axis=1)
    return df

In [None]:
df_mother = wordnet_df("gay")
df_mother

In [None]:
len(df_mother)

In [None]:
test = wordnet.synsets("mother",wordnet.NOUN)

In [None]:
test

In [None]:
test1 = wordnet.synsets('father',wordnet.NOUN)
testx = wordnet.synset("mother.n.01")
for syn in test1:
    definition = clean_definition(syn)
    test_def = clean_definition(testx)
    print(test_def)
    print(syn, definition, check_sim(process_text(test_def.text),process_text(definition.text)))

In [None]:
test = "colonizer.n.01"

In [None]:
test2 = "mother.n.01"

In [None]:
mother = nlp(wordnet.synset("black.n.05").definition())
print(mother)
colony = nlp(wordnet.synset("white.n.01").definition())
print(colony)
print(mother.similarity(colony))

In [None]:
mother_processed = nlp(process_text(mother.text))
colony_processed = nlp(process_text(colony.text))

In [None]:
print(mother_processed.similarity(colony_processed))

In [None]:
a = clean_definition(test)

b = clean_definition(test2)

a.similarity(b)

In [None]:
a_p = nlp(process_text(a.text))
b_p = nlp(process_text(b.text))
a_p.similarity(b_p)

In [None]:
check_sim(a,b)

In [None]:
test3 = wordnet.synset("white_supremacist.n.01")
c = clean_definition(test3)
a.similarity(c)

In [None]:
def get_parallel(word, seed_definition, QA=False):
    cleaned = nlp(process_text(seed_definition))
    root_syns = wordnet.synsets(word)
    hypers = []
    new_hypos = []
    
    for syn in root_syns:
        hypers.extend(syn.hypernyms())
    
    #hypers = list(set([syn for syn in hypers if cleaned.similarity(nlp(process_text(syn.definition()))) >=.5]))
    
    for syn in hypers:
        new_hypos.extend(syn.hyponyms())
    
    hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.75]))
    print(len(hypos))
    if len(hypos) < 3:
        hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.5]))
    elif len(hypos) <10:
        hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.66]))
    elif len(hypos) >= 10: 
        hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.8]))
    elif len(hypos) >= 20:
        hypos = list(set([(syn, cleaned.similarity(nlp(process_text(syn.definition())))) for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.9]))
    if QA:
        print(hypers)
        print(hypos)
        return hypers, hypos
    else:
        return hypos

# Builds a dataframe dynamically from WordNet using NLTK.
def wordnet_parallel_df(word,POS=False,seed_definition=None):
    pos_options = ['NOUN','VERB','ADJ','ADV']
    synonyms, antonyms = syn_ant(word,POS,False)
    #print(synonyms, antonyms) #for QA purposes
    words = []
    cats = []
    #WordNet hates spaces so you have to remove them
    m_word = word.replace(" ", "_")
    
    #Allow the user to pick a seed definition if it is not provided directly to the function.
    if seed_definition is None:
        if POS in pos_options:
            seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
        else:
            seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
        for d in range(len(seed_definitions)):
            print(f"{d}: {seed_definitions[d]}")
        choice = int(input("Which of the definitions above most aligns to your selection?"))
        seed_definition = seed_definitions[choice]
    
    hypos = get_parallel(m_word,seed_definition)
    for syn,sim in hypos:
        cur_lemmas = syn.lemmas()
        hypos = syn.hyponyms()
        for hypo in hypos:
            cur_lemmas.extend(hypo.lemmas())
        for lemma in cur_lemmas:
            ll = lemma.name()
            cats.append(re.sub("_"," ", syn.name().split(".")[0]))
            words.append(re.sub("_"," ",ll))

    df = {"Categories":cats, "Words":words}
    df = pd.DataFrame(df) 
    df = df.drop_duplicates().reset_index()
    df = df.drop("index", axis=1)
    return df

In [None]:
gay_root = wordnet.synsets("gay")

In [None]:
gay = wordnet.synset("gay.s.06").definition()
print(gay)
hypers, hypos1 = get_parallel("gay",gay,True)

In [None]:
len(hypos1)

In [None]:
for root in gay_root:
    print(root, root.definition())

In [None]:
wordnet.synsets("chinese")

In [None]:
chinese = wordnet.synset("chinese.a.01").definition()
hypers, hypos = get_parallel("chinese",chinese,True)

In [None]:
lemmas = []
for hypo in hypos1:
    lemmas.extend([re.sub("_"," ",lemma.name()) for lemma in hypo[0].lemmas()])
lemmas

In [None]:
len(lemmas)

In [None]:
df = wordnet_parallel_df("gay",seed_definition=gay)
df.head()

In [None]:
len(df)

In [None]:
df_grouped = df.groupby('Categories').count()

In [None]:
df_grouped.head()

In [None]:
tiny = wordnet.synsets("tiny", wordnet.ADJ)

In [None]:
tiny

In [None]:
tiny[0].lemmas()

In [None]:
new_alt = []
for lemma in tiny[0].lemmas():
    new_alt.extend(wordnet.synsets(lemma.name()))
new_alt

In [None]:
new_alt2 = list(set(new_alt))

In [None]:
for alt in new_alt2:
    print(alt,alt.hypernyms())