File size: 1,840 Bytes
7048611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56ad37d
 
f5fc7cb
7048611
 
 
 
 
434f036
7048611
 
 
 
 
 
 
56ad37d
7048611
 
 
930ad25
7048611
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
import gradio.inputs as grinputs
import gradio.outputs as groutputs

from gensim.models import KeyedVectors
from gensim.parsing import preprocessing

filters = [
    preprocessing.remove_stopwords,
    preprocessing.strip_tags,
    preprocessing.strip_punctuation,
    preprocessing.strip_numeric,
    preprocessing.strip_multiple_whitespaces,
    preprocessing.strip_non_alphanum,
    preprocessing.strip_short,
    preprocessing.remove_stopwords,
    preprocessing.lower_to_unicode,
]

def parse_text(text):
    return text.replace(" ", "").replace(";", ",").split(',')

def clean_words(words):
    clean_dict = {}
    for (word, score) in words:
        prep_word = preprocessing.preprocess_string(word, filters=filters)
        if len(prep_word) > 0:
            word = prep_word[0]
            is_clean = sum( [word.startswith(clean_word) or word.endswith(clean_word) for clean_word in clean_dict.keys()] ) == 0
            if is_clean:
                clean_dict[word] = round(score, 2)
    return clean_dict

path = "cc.en.300.vec"
# path = "cc.fr.300.vec"
m = KeyedVectors.load_word2vec_format(path, limit = 100000)

def on_submit(text, mode):
    print('{} mode'.format(mode))
    positive = parse_text(text)
    if mode=='Close':
        words = m.most_similar(positive=positive, topn=50)
    else:
        words = m.most_similar(positive=positive, topn=10000)[::-1]
    return str(clean_words(words))[1:-1]

iface = gr.Interface(
    fn=on_submit, 
    inputs=[
        grinputs.Textbox(placeholder='word1, word2, word3, ...', label="Input words (coma separated). Returns words that are close (or far) from the input words."), 
        grinputs.Radio(['Close', 'Far'], label="Close of Far mode")], 
    outputs=[
        groutputs.Textbox(label='Information')],
    allow_screenshot=False
    )
iface.launch()