File size: 1,853 Bytes
c5c0a56
 
1f0ded3
 
 
 
 
c5c0a56
 
1f0ded3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
python -m spacy download en_core_web_sm

import sensegram
from wsd import WSD
from gensim.models import KeyedVectors

# Model files
sense_vectors_fpath = "./best_sense_gram_model/best_model.sense_vectors"
word_vectors_fpath = "./best_sense_gram_model/best_model.word_vectors"

# Model parameters
max_context_words  = 3
context_window_size = 5
ignore_case = True
lang = "ar" # to filter out stopwords

# Model loading ... takes some time
sv = sensegram.SenseGram.load_word2vec_format(sense_vectors_fpath, binary=False)
wv = KeyedVectors.load_word2vec_format(word_vectors_fpath, binary=False, unicode_errors="ignore")

# Method takes word and context and retirn the results of the model.
def wsd_method(word, context):
  output = ""
  output += "Probabilities of the senses:\n{}\n\n".format(sv.get_senses(word, ignore_case=ignore_case))
  for sense_id, prob in sv.get_senses(word, ignore_case=ignore_case):
      output += sense_id
      output += ("\n"+"="*20+"\n")
      for rsense_id, sim in sv.wv.most_similar(sense_id):
          output += "{} {:f}\n".format(rsense_id, sim)
      output +="\n"
  # Disambiguate a word in a context
  wsd_model = WSD(sv, wv, window=context_window_size, lang=lang,
                  max_context_words=max_context_words, ignore_case=ignore_case)    
  output += str(wsd_model.disambiguate(context, word))  
  return output
  
import gradio as gr
# Lanuching live demo
demo = gr.Interface(
    fn=wsd_method,
    inputs=[gr.Textbox(lines=1, placeholder="الكلمة"),gr.Textbox(lines=2, placeholder="السياق")],
    outputs="text",
    title="فـك الالتباس الدلالي",
    description="فضلًا أدخل الكلمة ثم السياق ثم اضغط على زر إرسال، ولاستعراض المخرجات كاملة يرجى استخدام زر التمرير لأسفل.",
)
demo.launch()