import gradio as gr import gradio.inputs as grinputs import gradio.outputs as groutputs from gensim.models import KeyedVectors from gensim.parsing import preprocessing filters = [ preprocessing.remove_stopwords, preprocessing.strip_tags, preprocessing.strip_punctuation, preprocessing.strip_numeric, preprocessing.strip_multiple_whitespaces, preprocessing.strip_non_alphanum, preprocessing.strip_short, preprocessing.remove_stopwords, preprocessing.lower_to_unicode, ] def parse_text(text): return text.replace(" ", "").replace(";", ",").split(',') def clean_words(words): clean_dict = {} for (word, score) in words: prep_word = preprocessing.preprocess_string(word, filters=filters) if len(prep_word) > 0: word = prep_word[0] is_clean = sum( [word.startswith(clean_word) or word.endswith(clean_word) for clean_word in clean_dict.keys()] ) == 0 if is_clean: clean_dict[word] = round(score, 2) return clean_dict path = "cc.en.300.vec" # path = "cc.fr.300.vec" m = KeyedVectors.load_word2vec_format(path, limit = 100000) def on_submit(text, mode): print('{} mode'.format(mode)) positive = parse_text(text) if mode=='Close': words = m.most_similar(positive=positive, topn=50) else: words = m.most_similar(positive=positive, topn=10000)[::-1] return str(clean_words(words))[1:-1] iface = gr.Interface( fn=on_submit, inputs=[ grinputs.Textbox(placeholder='word1, word2, word3, ...', label="Input words (coma separated). Returns words that are close (or far) from the input words."), grinputs.Radio(['Close', 'Far'], label="Close of Far mode")], outputs=[ groutputs.Textbox(label='Information')], allow_screenshot=False ) iface.launch()