a-v-bely commited on
Commit
1156b6f
1 Parent(s): 736e3e5

Update (summarization & pos combinations)

Browse files
pages/4_📝_Онлайн-тест.py CHANGED
@@ -43,7 +43,7 @@ if st.session_state.get('-ONLINE_TEST_READY-') and st.session_state.get('-LOGGED
43
  use_container_width=True)
44
  COMMENTS = ONLINE_TEST.text_area(label='**Прокомментировать**',
45
  placeholder='Напишите комментарий')
46
- SUBMIT = ONLINE_TEST.form_submit_button('READY')
47
  if SUBMIT:
48
  points = test_mark = 'Teacher'
49
  appropriate_tasks = BAD_DISTRACTORS_AND_ANSWERS_temp["Задание уместно"].values.tolist()
 
43
  use_container_width=True)
44
  COMMENTS = ONLINE_TEST.text_area(label='**Прокомментировать**',
45
  placeholder='Напишите комментарий')
46
+ SUBMIT = ONLINE_TEST.form_submit_button('ГОТОВО')
47
  if SUBMIT:
48
  points = test_mark = 'Teacher'
49
  appropriate_tasks = BAD_DISTRACTORS_AND_ANSWERS_temp["Задание уместно"].values.tolist()
requirements.txt CHANGED
@@ -11,4 +11,5 @@ argon2-cffi>=21.3.0
11
  cryptography>=42.0.3
12
  transformers>=4.37.2
13
  streamlit-extras>=0.4.0
 
14
  es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl
 
11
  cryptography>=42.0.3
12
  transformers>=4.37.2
13
  streamlit-extras>=0.4.0
14
+ bert-extractive-summarizer>=0.10.1
15
  es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl
utilities/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/utilities/__pycache__/utils.cpython-310.pyc and b/utilities/__pycache__/utils.cpython-310.pyc differ
 
utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc CHANGED
Binary files a/utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc and b/utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc differ
 
utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc CHANGED
Binary files a/utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc and b/utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc differ
 
utilities_database/__pycache__/user_database_utils.cpython-310.pyc CHANGED
Binary files a/utilities_database/__pycache__/user_database_utils.cpython-310.pyc and b/utilities_database/__pycache__/user_database_utils.cpython-310.pyc differ
 
utilities_database/__pycache__/user_database_widgets.cpython-310.pyc CHANGED
Binary files a/utilities_database/__pycache__/user_database_widgets.cpython-310.pyc and b/utilities_database/__pycache__/user_database_widgets.cpython-310.pyc differ
 
utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc CHANGED
Binary files a/utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc and b/utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc differ
 
utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc CHANGED
Binary files a/utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc and b/utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc differ
 
utilities_language_bert/esp_main_workflow_bert.py CHANGED
@@ -11,11 +11,13 @@ from utilities_language_general.esp_utils import prepare_tasks
11
  from utilities_language_general.esp_constants import load_bert
12
  from streamlit.runtime.uploaded_file_manager import UploadedFile
13
  import utilities_language_general.esp_constants as esp_constants
 
14
  from utilities_language_general.esp_utils import prepare_target_words
15
  from utilities_language_general.esp_utils import compute_frequency_dict
16
  from utilities_language_general.esp_constants import BAD_USER_TARGET_WORDS
17
 
18
 
 
19
  def main_workflow(
20
  file: UploadedFile or None,
21
  text: str,
@@ -137,6 +139,22 @@ def main_workflow(
137
  logs.update(label="Запускаем процесс генерации заданий!", state='running')
138
  progress.progress(20)
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  for sentence in workflow:
141
  sentence.lemmatize_sentence()
142
 
@@ -149,7 +167,8 @@ def main_workflow(
149
  sentence.search_target_words(target_words_automatic_mode=tw_mode_automatic_mode,
150
  target_minimum=target_minimum,
151
  user_target_words=USER_TARGET_WORDS,
152
- frequency_dict=FREQ_DICT)
 
153
  progress.progress(int(30 + (j * (20 / len(workflow)))))
154
  progress.progress(50)
155
  DUPLICATE_TARGET_WORDS = defaultdict(list)
 
11
  from utilities_language_general.esp_constants import load_bert
12
  from streamlit.runtime.uploaded_file_manager import UploadedFile
13
  import utilities_language_general.esp_constants as esp_constants
14
+ from utilities_language_general.esp_constants import summarization
15
  from utilities_language_general.esp_utils import prepare_target_words
16
  from utilities_language_general.esp_utils import compute_frequency_dict
17
  from utilities_language_general.esp_constants import BAD_USER_TARGET_WORDS
18
 
19
 
20
+
21
  def main_workflow(
22
  file: UploadedFile or None,
23
  text: str,
 
139
  logs.update(label="Запускаем процесс генерации заданий!", state='running')
140
  progress.progress(20)
141
 
142
+ # Define summary length
143
+ text_length = len(current_text_sentences)
144
+ if text_length <= 15:
145
+ summary_length = text_length
146
+ elif text_length <= 25:
147
+ summary_length = 15
148
+ else:
149
+ n = (text_length - 20) // 5
150
+ summary_length = 15 + 2 * n
151
+ round_summary_length = summary_length - (summary_length % - 10)
152
+
153
+ # Get summary. May choose between round_summary_length and summary_length
154
+ SUMMARY = summarization(current_text, num_sentences=round_summary_length)
155
+ logs.success('Нашли интересные предложения. Пригодятся!')
156
+ progress.progress(25)
157
+
158
  for sentence in workflow:
159
  sentence.lemmatize_sentence()
160
 
 
167
  sentence.search_target_words(target_words_automatic_mode=tw_mode_automatic_mode,
168
  target_minimum=target_minimum,
169
  user_target_words=USER_TARGET_WORDS,
170
+ frequency_dict=FREQ_DICT,
171
+ summary=SUMMARY)
172
  progress.progress(int(30 + (j * (20 / len(workflow)))))
173
  progress.progress(50)
174
  DUPLICATE_TARGET_WORDS = defaultdict(list)
utilities_language_bert/esp_sentence_bert.py CHANGED
@@ -48,7 +48,7 @@ class SENTENCE:
48
  self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
49
  previous_was_phrase = False
50
 
51
- def search_target_words_automatically(self, target_minimum: set, frequency_dict: dict = None):
52
  for token in self.sentence_phrases:
53
  if isinstance(token, list): # if token is a phrase
54
  original_token1 = token[1]['original_token1']
@@ -79,7 +79,8 @@ class SENTENCE:
79
  'tags': tags,
80
  'position_in_sentence': self.original.find(original_token1.text),
81
  'not_named_entity': not_ner,
82
- 'frequency_in_text': 0
 
83
  }
84
  self.target_words.append(target_word)
85
  else: # if token is just a spacy.nlp token
@@ -98,10 +99,11 @@ class SENTENCE:
98
  'position_in_sentence': self.original.find(token.text),
99
  'not_named_entity': True if token.ent_type == 0 else False,
100
  'frequency_in_text': frequency_dict.get(token.lemma_, 1),
 
101
  }
102
  self.target_words.append(target_word)
103
 
104
- def search_user_target_words(self, user_target_words: set = None, frequency_dict: dict = None):
105
  for _utw in user_target_words:
106
  if _utw in self.original:
107
  parse_utw = nlp(_utw)
@@ -137,19 +139,20 @@ class SENTENCE:
137
  'tags': user_target_word_tags,
138
  'position_in_sentence': self.original.find(_utw),
139
  'not_named_entity': not_ner,
140
- 'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1)
 
141
  }
142
  self.target_words.append(target_word)
143
 
144
  def search_target_words(self, target_words_automatic_mode: bool, target_minimum,
145
  user_target_words: set = None,
146
- frequency_dict: dict = None):
147
  if target_words_automatic_mode:
148
  self.search_target_words_automatically(target_minimum=target_minimum,
149
- frequency_dict=frequency_dict)
150
  else:
151
  self.search_user_target_words(user_target_words=user_target_words,
152
- frequency_dict=frequency_dict)
153
 
154
  def filter_target_words(self, target_words_automatic_mode):
155
  c_position = 0
 
48
  self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
49
  previous_was_phrase = False
50
 
51
+ def search_target_words_automatically(self, target_minimum: set, frequency_dict: dict = None, summary:list=None):
52
  for token in self.sentence_phrases:
53
  if isinstance(token, list): # if token is a phrase
54
  original_token1 = token[1]['original_token1']
 
79
  'tags': tags,
80
  'position_in_sentence': self.original.find(original_token1.text),
81
  'not_named_entity': not_ner,
82
+ 'frequency_in_text': 0,
83
+ 'in_summary': self.original in summary
84
  }
85
  self.target_words.append(target_word)
86
  else: # if token is just a spacy.nlp token
 
99
  'position_in_sentence': self.original.find(token.text),
100
  'not_named_entity': True if token.ent_type == 0 else False,
101
  'frequency_in_text': frequency_dict.get(token.lemma_, 1),
102
+ 'in_summary': self.original in summary
103
  }
104
  self.target_words.append(target_word)
105
 
106
+ def search_user_target_words(self, user_target_words: set = None, frequency_dict: dict = None, summary:list=None):
107
  for _utw in user_target_words:
108
  if _utw in self.original:
109
  parse_utw = nlp(_utw)
 
139
  'tags': user_target_word_tags,
140
  'position_in_sentence': self.original.find(_utw),
141
  'not_named_entity': not_ner,
142
+ 'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1),
143
+ 'in_summary': self.original in summary
144
  }
145
  self.target_words.append(target_word)
146
 
147
  def search_target_words(self, target_words_automatic_mode: bool, target_minimum,
148
  user_target_words: set = None,
149
+ frequency_dict: dict = None, summary:list=None):
150
  if target_words_automatic_mode:
151
  self.search_target_words_automatically(target_minimum=target_minimum,
152
+ frequency_dict=frequency_dict, summary=summary)
153
  else:
154
  self.search_user_target_words(user_target_words=user_target_words,
155
+ frequency_dict=frequency_dict, summary=summary)
156
 
157
  def filter_target_words(self, target_words_automatic_mode):
158
  c_position = 0
utilities_language_general/__pycache__/esp_constants.cpython-310.pyc CHANGED
Binary files a/utilities_language_general/__pycache__/esp_constants.cpython-310.pyc and b/utilities_language_general/__pycache__/esp_constants.cpython-310.pyc differ
 
utilities_language_general/__pycache__/esp_utils.cpython-310.pyc CHANGED
Binary files a/utilities_language_general/__pycache__/esp_utils.cpython-310.pyc and b/utilities_language_general/__pycache__/esp_utils.cpython-310.pyc differ
 
utilities_language_general/__pycache__/morphology.cpython-310.pyc CHANGED
Binary files a/utilities_language_general/__pycache__/morphology.cpython-310.pyc and b/utilities_language_general/__pycache__/morphology.cpython-310.pyc differ
 
utilities_language_general/esp_constants.py CHANGED
@@ -3,6 +3,7 @@ import spacy
3
  import gensim
4
  import streamlit as st
5
  from transformers import pipeline
 
6
 
7
 
8
  @st.cache_resource
@@ -25,8 +26,12 @@ def load_bert():
25
  _pipeline = pipeline(task="fill-mask", model="a-v-white/bert-base-spanish-wwm-cased-finetuned-literature-pro")
26
  return _pipeline
27
 
 
 
 
28
 
29
  nlp = load_spacy()
 
30
  w2v_model_1_path = r'model1.gz'
31
  w2v_model_2_path = r'model2.gz'
32
 
@@ -57,7 +62,32 @@ with open('language_data/phrases.json', 'r', encoding='utf-8') as f:
57
  with open('language_data/fix_irregular_lemma.json', 'r', encoding='utf-8') as f:
58
  FIX_LEMMA = json.load(f)
59
 
60
- SIMILARITY_VALUES = {'A1': 1.0, 'A2': 1.0, 'B1': 1.0, 'B2': 1.0, 'C1': 1.0, 'C2': 1.0, 'Без уровня': 1.0}
61
- SIMILARITY_VALUES_bert = {'A1': 1.0, 'A2': 1.0, 'B1': 1.0, 'B2': 1.0, 'C1': 1.0, 'C2': 1.0, 'Без уровня': 1.0}
62
-
63
  BAD_USER_TARGET_WORDS = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import gensim
4
  import streamlit as st
5
  from transformers import pipeline
6
+ from summarizer import Summarizer
7
 
8
 
9
  @st.cache_resource
 
26
  _pipeline = pipeline(task="fill-mask", model="a-v-white/bert-base-spanish-wwm-cased-finetuned-literature-pro")
27
  return _pipeline
28
 
29
+ @st.cache_resource
30
+ def load_summarizer():
31
+ return Summarizer()
32
 
33
  nlp = load_spacy()
34
+ summarization = load_summarizer()
35
  w2v_model_1_path = r'model1.gz'
36
  w2v_model_2_path = r'model2.gz'
37
 
 
62
  with open('language_data/fix_irregular_lemma.json', 'r', encoding='utf-8') as f:
63
  FIX_LEMMA = json.load(f)
64
 
 
 
 
65
  BAD_USER_TARGET_WORDS = []
66
+
67
+
68
+ COMBINE_POS = {
69
+ 'simple':
70
+ {
71
+ 'A1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
72
+ 'A2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
73
+ 'B1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
74
+ 'B2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
75
+ 'AUX_ADJ': ['PRON_VERB'], 'PRON_VERB': ['AUX_ADJ'], 'ADP': ['SCONJ', 'ADV'], },
76
+ 'C1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
77
+ 'ADJ':['NOUN'], 'NOUN': ['ADJ']},
78
+ 'C2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
79
+ 'ADJ':['NOUN'], 'NOUN': ['ADJ']},
80
+ },
81
+ 'phrase':
82
+ {
83
+ 'A1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
84
+ 'A2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
85
+ 'B1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
86
+ 'B2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
87
+ 'AUX_ADJ': ['PRON_VERB'], 'PRON_VERB': ['AUX_ADJ'], 'ADP': ['SCONJ', 'ADV'], },
88
+ 'C1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
89
+ 'ADJ':['NOUN'], 'NOUN': ['ADJ']},
90
+ 'C2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
91
+ 'ADJ':['NOUN'], 'NOUN': ['ADJ']},
92
+ },
93
+ }
utilities_language_general/esp_utils.py CHANGED
@@ -2,8 +2,7 @@ from nltk import edit_distance
2
  from utilities.utils import answer_letter
3
  from utilities_language_general.esp_constants import nlp
4
  from utilities_language_general.esp_constants import FIX_LEMMA
5
- from utilities_language_general.esp_constants import SIMILARITY_VALUES
6
- from utilities_language_general.esp_constants import SIMILARITY_VALUES_bert
7
 
8
 
9
  def prepare_target_words(target_words):
@@ -111,6 +110,7 @@ def check_token_bert(token, current_minimum: set = None, check_allowed_pos: set
111
  def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None, global_distractors: set,
112
  distractor_minimum: set, level_name: str, max_num_distractors: int,
113
  max_length_ratio=5, min_edit_distance_ratio=0.5):
 
114
  distractors = []
115
  query = lemma if '_' in lemma else f'{lemma}_{pos}'
116
  lemma = '_'.join(lemma.split('_')[::2])
@@ -124,15 +124,16 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
124
  query_vector = model.get_mean_vector(query_parts)
125
  candidates = model.similar_by_vector(query_vector, topn=max_num_distractors + 100)
126
  for candidate in candidates:
127
- if candidate[0].count('_') == 1:
128
  distractor_lemma, distractor_pos = candidate[0].split('_')
129
  distractor_similarity = candidate[1]
130
  candidate_gender = get_tags(distractor_lemma).get('Gender')
131
  length_ratio = abs(len(lemma) - len(distractor_lemma))
132
- condition = ((distractor_pos == pos
133
- or (distractor_pos in ('VERB', 'ADJ', 'phrase') and pos in ('VERB', 'ADJ', 'phrase')))
 
 
134
  and distractor_lemma != lemma
135
- and distractor_similarity < SIMILARITY_VALUES[level_name]
136
  and candidate_gender == gender
137
  and length_ratio <= max_length_ratio
138
  and distractor_lemma not in global_distractors
@@ -150,16 +151,18 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
150
  if candidate[0].count('_') > 3 or pos in ('NOUN', 'ADJ', 'NUM'):
151
  continue
152
  d1_lemma, d1_pos, d2_lemma, d2_pos = candidate[0].split('_')
 
153
  distractor_lemma = f'{d1_lemma}_{d2_lemma}'
154
  distractor_similarity = candidate[1]
155
  condition = (((d1_pos == pos or d2_pos == pos)
 
 
156
  or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
157
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
158
  or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
159
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP')))
160
  and candidate[0] != lemma
161
  and distractor_lemma != lemma
162
- and distractor_similarity < SIMILARITY_VALUES[level_name]
163
  and distractor_lemma not in global_distractors)
164
  if condition:
165
  if distractor_minimum is not None:
@@ -202,11 +205,11 @@ def get_distractors_from_model_bert(model, text_with_masked_task: str, lemma: st
202
  distractor_similarity = candidate_distractor[1]
203
  candidate_gender = get_tags(distractor_lemma).get('Gender')
204
  length_ratio = abs(len(lemma) - len(distractor_lemma))
205
- if (((distractor_pos == pos)
206
- or (pos in ('VERB', 'ADJ', 'phrase') and distractor_pos in ('VERB', 'ADJ', 'phrase')))
 
207
  and distractor_lemma != lemma
208
  and (len(_distractors) < max_num_distractors+100)
209
- and (distractor_similarity < SIMILARITY_VALUES_bert[level_name])
210
  and (candidate_gender == gender)
211
  and (length_ratio <= max_length_ratio) # May be changed if case of phrases
212
  and (distractor_lemma not in global_distractors)
 
2
  from utilities.utils import answer_letter
3
  from utilities_language_general.esp_constants import nlp
4
  from utilities_language_general.esp_constants import FIX_LEMMA
5
+ from utilities_language_general.esp_constants import COMBINE_POS
 
6
 
7
 
8
  def prepare_target_words(target_words):
 
110
  def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None, global_distractors: set,
111
  distractor_minimum: set, level_name: str, max_num_distractors: int,
112
  max_length_ratio=5, min_edit_distance_ratio=0.5):
113
+
114
  distractors = []
115
  query = lemma if '_' in lemma else f'{lemma}_{pos}'
116
  lemma = '_'.join(lemma.split('_')[::2])
 
124
  query_vector = model.get_mean_vector(query_parts)
125
  candidates = model.similar_by_vector(query_vector, topn=max_num_distractors + 100)
126
  for candidate in candidates:
127
+ if candidate[0].count('_') == 1 and pos != 'phrase':
128
  distractor_lemma, distractor_pos = candidate[0].split('_')
129
  distractor_similarity = candidate[1]
130
  candidate_gender = get_tags(distractor_lemma).get('Gender')
131
  length_ratio = abs(len(lemma) - len(distractor_lemma))
132
+ condition = ((distractor_pos == pos
133
+ or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
134
+ and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos])
135
+ )
136
  and distractor_lemma != lemma
 
137
  and candidate_gender == gender
138
  and length_ratio <= max_length_ratio
139
  and distractor_lemma not in global_distractors
 
151
  if candidate[0].count('_') > 3 or pos in ('NOUN', 'ADJ', 'NUM'):
152
  continue
153
  d1_lemma, d1_pos, d2_lemma, d2_pos = candidate[0].split('_')
154
+ d_pos = f'{d1_pos}_{d2_pos}'
155
  distractor_lemma = f'{d1_lemma}_{d2_lemma}'
156
  distractor_similarity = candidate[1]
157
  condition = (((d1_pos == pos or d2_pos == pos)
158
+ or (COMBINE_POS['phrase'][level_name].get(d_pos) is not None and COMBINE_POS['phrase'][level_name].get(pos) is not None
159
+ and d_pos in COMBINE_POS['phrase'][level_name].get(d_pos) and pos in COMBINE_POS['phrase'][level_name].get(pos) )
160
  or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
161
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
162
  or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
163
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP')))
164
  and candidate[0] != lemma
165
  and distractor_lemma != lemma
 
166
  and distractor_lemma not in global_distractors)
167
  if condition:
168
  if distractor_minimum is not None:
 
205
  distractor_similarity = candidate_distractor[1]
206
  candidate_gender = get_tags(distractor_lemma).get('Gender')
207
  length_ratio = abs(len(lemma) - len(distractor_lemma))
208
+ if ((distractor_pos == pos
209
+ or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
210
+ and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos]))
211
  and distractor_lemma != lemma
212
  and (len(_distractors) < max_num_distractors+100)
 
213
  and (candidate_gender == gender)
214
  and (length_ratio <= max_length_ratio) # May be changed if case of phrases
215
  and (distractor_lemma not in global_distractors)
utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc CHANGED
Binary files a/utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc and b/utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc differ
 
utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc CHANGED
Binary files a/utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc and b/utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc differ
 
utilities_language_w2v/esp_main_workflow_w2v.py CHANGED
@@ -11,6 +11,7 @@ from utilities_language_general.esp_constants import load_w2v
11
  from utilities_language_general.esp_utils import prepare_tasks
12
  from streamlit.runtime.uploaded_file_manager import UploadedFile
13
  import utilities_language_general.esp_constants as esp_constants
 
14
  from utilities_language_general.esp_constants import w2v_model_1_path
15
  from utilities_language_general.esp_constants import w2v_model_2_path
16
  from utilities_language_general.esp_utils import prepare_target_words
@@ -146,6 +147,22 @@ def main_workflow(
146
  logs.update(label="Запускаем процесс генерации заданий!", state='running')
147
  progress.progress(20)
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  for sentence in workflow:
150
  sentence.lemmatize_sentence()
151
 
@@ -159,7 +176,8 @@ def main_workflow(
159
  target_words_automatic_mode=tw_mode_automatic_mode,
160
  target_minimum=target_minimum,
161
  user_target_words=USER_TARGET_WORDS,
162
- frequency_dict=FREQ_DICT)
 
163
  progress.progress(int(30 + (j * (30 / len(workflow)))))
164
  progress.progress(60)
165
  DUPLICATE_TARGET_WORDS = defaultdict(list)
@@ -217,7 +235,12 @@ def main_workflow(
217
  NUMBER_TASKS = 10
218
  else:
219
  NUMBER_TASKS = len(RESULT_TASKS)
220
- RESULT_TASKS = sample(RESULT_TASKS, NUMBER_TASKS)
 
 
 
 
 
221
  RESULT_TASKS = sorted(RESULT_TASKS, key=lambda t: (t.sentence_number, t.position_in_sentence))
222
 
223
  for task in RESULT_TASKS:
 
11
  from utilities_language_general.esp_utils import prepare_tasks
12
  from streamlit.runtime.uploaded_file_manager import UploadedFile
13
  import utilities_language_general.esp_constants as esp_constants
14
+ from utilities_language_general.esp_constants import summarization
15
  from utilities_language_general.esp_constants import w2v_model_1_path
16
  from utilities_language_general.esp_constants import w2v_model_2_path
17
  from utilities_language_general.esp_utils import prepare_target_words
 
147
  logs.update(label="Запускаем процесс генерации заданий!", state='running')
148
  progress.progress(20)
149
 
150
+ # Define summary length
151
+ text_length = len(current_text_sentences)
152
+ if text_length <= 15:
153
+ summary_length = text_length
154
+ elif text_length <= 25:
155
+ summary_length = 15
156
+ else:
157
+ n = (text_length - 20) // 5
158
+ summary_length = 15 + 2 * n
159
+ round_summary_length = summary_length - (summary_length % - 10)
160
+
161
+ # Get summary. May choose between round_summary_length and summary_length
162
+ SUMMARY = summarization(current_text, num_sentences=round_summary_length)
163
+ logs.success('Нашли интересные предложения. Пригодятся!')
164
+ progress.progress(25)
165
+
166
  for sentence in workflow:
167
  sentence.lemmatize_sentence()
168
 
 
176
  target_words_automatic_mode=tw_mode_automatic_mode,
177
  target_minimum=target_minimum,
178
  user_target_words=USER_TARGET_WORDS,
179
+ frequency_dict=FREQ_DICT,
180
+ summary=SUMMARY)
181
  progress.progress(int(30 + (j * (30 / len(workflow)))))
182
  progress.progress(60)
183
  DUPLICATE_TARGET_WORDS = defaultdict(list)
 
235
  NUMBER_TASKS = 10
236
  else:
237
  NUMBER_TASKS = len(RESULT_TASKS)
238
+ RESULT_TASKS_in_summary = filter(lambda task: task.in_summary, RESULT_TASKS)
239
+ RESULT_TASTS_not_in_summary = filter(lambda task: not task.in_summary, RESULT_TASKS)
240
+ if len(RESULT_TASKS_in_summary) >= NUMBER_TASKS:
241
+ RESULT_TASKS = RESULT_TASKS_in_summary
242
+ else:
243
+ RESULT_TASKS = RESULT_TASKS_in_summary + sample(RESULT_TASTS_not_in_summary, NUMBER_TASKS - len(RESULT_TASKS_in_summary))
244
  RESULT_TASKS = sorted(RESULT_TASKS, key=lambda t: (t.sentence_number, t.position_in_sentence))
245
 
246
  for task in RESULT_TASKS:
utilities_language_w2v/esp_sentence_w2v.py CHANGED
@@ -47,7 +47,7 @@ class SENTENCE:
47
  self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
48
  previous_was_phrase = False
49
 
50
- def search_target_words_automatically(self, model, target_minimum: set, frequency_dict: dict = None):
51
  for token in self.sentence_phrases:
52
  if isinstance(token, list): # if token is a phrase
53
  original_token1 = token[1]['original_token1']
@@ -76,7 +76,8 @@ class SENTENCE:
76
  'tags': tags,
77
  'position_in_sentence': self.original.find(original_token1.text),
78
  'not_named_entity': not_ner,
79
- 'frequency_in_text': 0
 
80
  }
81
  self.target_words.append(target_word)
82
  else: # if token is just a spacy.nlp token
@@ -94,10 +95,11 @@ class SENTENCE:
94
  'position_in_sentence': self.original.find(token.text),
95
  'not_named_entity': True if token.ent_type == 0 else False,
96
  'frequency_in_text': frequency_dict.get(token.lemma_, 1),
 
97
  }
98
  self.target_words.append(target_word)
99
 
100
- def search_user_target_words(self, model, user_target_words: set = None, frequency_dict: dict = None):
101
  for _utw in user_target_words:
102
  if _utw in self.original:
103
  parse_utw = nlp(_utw)
@@ -132,7 +134,8 @@ class SENTENCE:
132
  'tags': user_target_word_tags,
133
  'position_in_sentence': self.original.find(_utw),
134
  'not_named_entity': not_ner,
135
- 'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1)
 
136
  }
137
  if not (model.has_index_for(user_target_word_lemma)
138
  or model.has_index_for(f'{user_target_word_lemma}_{user_target_word_pos}')):
@@ -140,14 +143,14 @@ class SENTENCE:
140
  else:
141
  self.target_words.append(target_word)
142
 
143
- def search_target_words(self, model, target_words_automatic_mode: bool, target_minimum,
144
  user_target_words: set = None, frequency_dict: dict = None):
145
  if target_words_automatic_mode:
146
  self.search_target_words_automatically(model=model, target_minimum=target_minimum,
147
- frequency_dict=frequency_dict)
148
  else:
149
  self.search_user_target_words(model=model, user_target_words=user_target_words,
150
- frequency_dict=frequency_dict)
151
 
152
  def attach_distractors_to_target_word(self, model, global_distractors, distractor_minimum, level_name,
153
  max_frequency, logs, progress):
 
47
  self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
48
  previous_was_phrase = False
49
 
50
+ def search_target_words_automatically(self, model, target_minimum: set, frequency_dict: dict = None, summary:list=None):
51
  for token in self.sentence_phrases:
52
  if isinstance(token, list): # if token is a phrase
53
  original_token1 = token[1]['original_token1']
 
76
  'tags': tags,
77
  'position_in_sentence': self.original.find(original_token1.text),
78
  'not_named_entity': not_ner,
79
+ 'frequency_in_text': 0,
80
+ 'in_summary': self.original in summary
81
  }
82
  self.target_words.append(target_word)
83
  else: # if token is just a spacy.nlp token
 
95
  'position_in_sentence': self.original.find(token.text),
96
  'not_named_entity': True if token.ent_type == 0 else False,
97
  'frequency_in_text': frequency_dict.get(token.lemma_, 1),
98
+ 'in_summary': self.original in summary
99
  }
100
  self.target_words.append(target_word)
101
 
102
+ def search_user_target_words(self, model, user_target_words: set = None, frequency_dict: dict = None, summary:list=None):
103
  for _utw in user_target_words:
104
  if _utw in self.original:
105
  parse_utw = nlp(_utw)
 
134
  'tags': user_target_word_tags,
135
  'position_in_sentence': self.original.find(_utw),
136
  'not_named_entity': not_ner,
137
+ 'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1),
138
+ 'in_summary': self.original in summary
139
  }
140
  if not (model.has_index_for(user_target_word_lemma)
141
  or model.has_index_for(f'{user_target_word_lemma}_{user_target_word_pos}')):
 
143
  else:
144
  self.target_words.append(target_word)
145
 
146
+ def search_target_words(self, model, target_words_automatic_mode: bool, target_minimum, summary:list=None,
147
  user_target_words: set = None, frequency_dict: dict = None):
148
  if target_words_automatic_mode:
149
  self.search_target_words_automatically(model=model, target_minimum=target_minimum,
150
+ frequency_dict=frequency_dict, summary=summary)
151
  else:
152
  self.search_user_target_words(model=model, user_target_words=user_target_words,
153
+ frequency_dict=frequency_dict, summary=summary)
154
 
155
  def attach_distractors_to_target_word(self, model, global_distractors, distractor_minimum, level_name,
156
  max_frequency, logs, progress):
utilities_option_menu/__pycache__/option_menu.cpython-310.pyc CHANGED
Binary files a/utilities_option_menu/__pycache__/option_menu.cpython-310.pyc and b/utilities_option_menu/__pycache__/option_menu.cpython-310.pyc differ
 
utilities_ui/__pycache__/custom_download_button.cpython-310.pyc CHANGED
Binary files a/utilities_ui/__pycache__/custom_download_button.cpython-310.pyc and b/utilities_ui/__pycache__/custom_download_button.cpython-310.pyc differ