ibn-sidah-team
commited on
Commit
•
1297681
1
Parent(s):
813036e
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python -m venv .senseenv
|
2 |
+
# Activate the virtual environment
|
3 |
+
source .senseenv/bin/activate
|
4 |
+
# Deactivate the virtual environment
|
5 |
+
source .senseenv/bin/deactivate
|
6 |
+
|
7 |
+
|
8 |
+
pip install -r requirements.txt
|
9 |
+
pip install faiss-cpu
|
10 |
+
# In the requirements.txt but not installed correctly so we have to use pip command
|
11 |
+
#!pip install gensim==3.8.1
|
12 |
+
python -m spacy download en_core_web_sm
|
13 |
+
# Install gradio for user interface
|
14 |
+
pip install gradio
|
15 |
+
|
16 |
+
import sensegram
|
17 |
+
from wsd import WSD
|
18 |
+
from gensim.models import KeyedVectors
|
19 |
+
|
20 |
+
# Model files
|
21 |
+
sense_vectors_fpath = "./best_sense_gram_model/best_model.sense_vectors"
|
22 |
+
word_vectors_fpath = "./best_sense_gram_model/best_model.word_vectors"
|
23 |
+
|
24 |
+
# Model parameters
|
25 |
+
max_context_words = 3
|
26 |
+
context_window_size = 5
|
27 |
+
ignore_case = True
|
28 |
+
lang = "ar" # to filter out stopwords
|
29 |
+
|
30 |
+
# Model loading ... takes some time
|
31 |
+
sv = sensegram.SenseGram.load_word2vec_format(sense_vectors_fpath, binary=False)
|
32 |
+
wv = KeyedVectors.load_word2vec_format(word_vectors_fpath, binary=False, unicode_errors="ignore")
|
33 |
+
|
34 |
+
# Method takes word and context and retirn the results of the model.
|
35 |
+
def wsd_method(word, context):
|
36 |
+
output = ""
|
37 |
+
output += "Probabilities of the senses:\n{}\n\n".format(sv.get_senses(word, ignore_case=ignore_case))
|
38 |
+
for sense_id, prob in sv.get_senses(word, ignore_case=ignore_case):
|
39 |
+
output += sense_id
|
40 |
+
output += ("\n"+"="*20+"\n")
|
41 |
+
for rsense_id, sim in sv.wv.most_similar(sense_id):
|
42 |
+
output += "{} {:f}\n".format(rsense_id, sim)
|
43 |
+
output +="\n"
|
44 |
+
# Disambiguate a word in a context
|
45 |
+
wsd_model = WSD(sv, wv, window=context_window_size, lang=lang,
|
46 |
+
max_context_words=max_context_words, ignore_case=ignore_case)
|
47 |
+
output += str(wsd_model.disambiguate(context, word))
|
48 |
+
return output
|
49 |
+
|
50 |
+
import gradio as gr
|
51 |
+
# Lanuching live demo
|
52 |
+
demo = gr.Interface(
|
53 |
+
fn=wsd_method,
|
54 |
+
inputs=[gr.Textbox(lines=1, placeholder="الكلمة"),gr.Textbox(lines=2, placeholder="السياق")],
|
55 |
+
outputs="text",
|
56 |
+
title="فـك الالتباس الدلالي",
|
57 |
+
description="فضلًا أدخل الكلمة ثم السياق ثم اضغط على زر إرسال، ولاستعراض المخرجات كاملة يرجى استخدام زر التمرير لأسفل.",
|
58 |
+
)
|
59 |
+
demo.launch()
|
60 |
+
|