a-v-bely commited on
Commit
41e198b
1 Parent(s): 08d25e9

Update backend

Browse files
utilities_database/user_database_widgets.py CHANGED
@@ -123,7 +123,7 @@ class LogIn:
123
  email_sign_up=email_sign_up)
124
 
125
  user_name_sign_up = st.text_input("Имя пользователя *",
126
- placeholder='Введите имя пользователя')
127
  unique_user_name_check = db_utils.check_unique_usr(user_log_in_database=db,
128
  user_name_sign_up=user_name_sign_up)
129
 
@@ -206,7 +206,7 @@ class LogIn:
206
  new_passwd = st.text_input("Новый пароль", placeholder='Введите новый пароль',
207
  type='password')
208
 
209
- new_passwd_1 = st.text_input("Повторите новый пароль", placeholder='Введите повторите пароль',
210
  type='password')
211
 
212
  reset_passwd_submit_button = st.form_submit_button(label='Изменить пароль')
 
123
  email_sign_up=email_sign_up)
124
 
125
  user_name_sign_up = st.text_input("Имя пользователя *",
126
+ placeholder='Введите имя пользователя (латинские буквы и символы)')
127
  unique_user_name_check = db_utils.check_unique_usr(user_log_in_database=db,
128
  user_name_sign_up=user_name_sign_up)
129
 
 
206
  new_passwd = st.text_input("Новый пароль", placeholder='Введите новый пароль',
207
  type='password')
208
 
209
+ new_passwd_1 = st.text_input("Повторите новый пароль", placeholder='Повторите пароль',
210
  type='password')
211
 
212
  reset_passwd_submit_button = st.form_submit_button(label='Изменить пароль')
utilities_language_bert/esp_main_workflow_bert.py CHANGED
@@ -20,9 +20,8 @@ def main_workflow(
20
  file: UploadedFile or None,
21
  text: str,
22
  logs: ST_WIDGETS,
23
- logs_d: ST_WIDGETS,
24
  progress: st_progress,
25
- progress_s: st_progress,
26
  level: str,
27
  tw_mode_automatic_mode: str,
28
  target_words: str,
@@ -37,26 +36,26 @@ def main_workflow(
37
  :param file: user's file to generate tasks in
38
  :param text: user's text input to generate tasks in
39
  :param logs: widget to output logs to
40
- :param logs_d: show how many distractors already processed
41
  :param progress: progress bar
42
- :param progress_s: sentences progress bar
43
  :param target_words: how target words are chosen: by user or automatically
44
  :param tw_mode_automatic_mode:
45
  :param level: user's specification of CEFR level of text
46
  :param num_distractors: how many distractors does the user want the task to contain
47
  :param save_name: user specifies name to save file in cloud
48
- :param global_bad_target_words:global_bad_target_words
49
  :return: Dictionary with output data: filename, amount_mode, text_with_gaps, tasks_as_list, correct_answers,
50
  student_out, teacher_out, total_out, original_text
51
  """
52
  # Clear bad target_words each time
53
- global_bad_target_words = []
 
54
 
55
  # Define main global variables
56
- logs.write()
57
  GLOBAL_DISTRACTORS = set()
58
  MAX_FREQUENCY = 0
59
 
 
60
  mask_filler = load_bert()
61
 
62
  # Get input text
@@ -84,11 +83,12 @@ def main_workflow(
84
 
85
  # Text preprocessing
86
  original_text = current_text
87
- current_text = current_text.replace('.', '. ').replace('. . .', '...').replace(' ', ' ').replace('…', '...') \
88
- .replace('', '...').replace('—', '-').replace('\u2014', '-').replace('', '-').replace('-\n', '') \
89
- .replace('\n', '%^&*')
 
90
  current_text_sentences = [sent.text.strip() for sent in esp_constants.nlp(current_text).sents]
91
- logs.success('Получили Ваш текст!')
92
  progress.progress(10)
93
 
94
  # Compute frequency dict
@@ -100,7 +100,7 @@ def main_workflow(
100
  if j < len(FREQ_DICT) * _frequency_barrier_percent:
101
  MAX_FREQUENCY = tp[1]
102
  MAX_FREQUENCY = 3 if MAX_FREQUENCY < 3 else MAX_FREQUENCY
103
- logs.success("Посчитали немного статистики!")
104
  progress.progress(15)
105
 
106
  # Choose necessary language minimum according to user's input
@@ -134,7 +134,7 @@ def main_workflow(
134
  # Start generation process
135
  workflow = [SENTENCE(original=sent.strip(), n_sentence=num, max_num_distractors=num_distractors)
136
  for num, sent in enumerate(current_text_sentences)]
137
- logs.success("Запускаем процесс генерации заданий!")
138
  progress.progress(20)
139
 
140
  for sentence in workflow:
@@ -142,7 +142,7 @@ def main_workflow(
142
 
143
  for sentence in workflow:
144
  sentence.bind_phrases()
145
- logs.success("Подготовили предложения для дальнейшей работы!")
146
  progress.progress(30)
147
 
148
  for j, sentence in enumerate(workflow):
@@ -151,7 +151,7 @@ def main_workflow(
151
  user_target_words=USER_TARGET_WORDS,
152
  frequency_dict=FREQ_DICT)
153
  progress.progress(int(30 + (j * (20 / len(workflow)))))
154
- progress_s.progress(50)
155
  DUPLICATE_TARGET_WORDS = defaultdict(list)
156
  for sentence in workflow:
157
  for target_word in sentence.target_words:
@@ -164,8 +164,8 @@ def main_workflow(
164
  if target_word not in RESULT_TW:
165
  global_bad_target_words.append(target_word['original_text'])
166
  sentence.target_words.remove(target_word)
167
- progress_s.progress(55)
168
- logs.success('Выбрали слова-пропуски!')
169
 
170
  for sentence in workflow:
171
  for i, target_word in enumerate(sentence.target_words):
@@ -176,7 +176,7 @@ def main_workflow(
176
 
177
  for sentence in workflow:
178
  sentence.filter_target_words(target_words_automatic_mode=tw_mode_automatic_mode)
179
- progress_s.progress(60)
180
 
181
  RESULT_TASKS = []
182
  for sentence in workflow:
@@ -189,21 +189,21 @@ def main_workflow(
189
  global_distractors=GLOBAL_DISTRACTORS,
190
  distractor_minimum=distractor_minimum,
191
  max_frequency=MAX_FREQUENCY)
192
- logs_d.success(
193
- f'Обработали {num}/{len(RESULT_TASKS)} целевых слов!')
194
- logs_d.success(
195
- f'Обработали {len(RESULT_TASKS)}/{len(RESULT_TASKS)} целевых слов!')
196
- progress_s.progress(65)
197
- logs.success('Подобрали неправильные варианты!')
198
 
199
  for task in RESULT_TASKS:
200
  task.inflect_distractors()
201
- progress_s.progress(70)
202
- logs.success('Просклоняли и проспрягали неправильные варианты!')
203
 
204
  for task in RESULT_TASKS:
205
  task.sample_distractors(num_distractors=num_distractors)
206
- progress_s.progress(75)
207
  RESULT_TASKS = list(filter(lambda t: not t.bad_target_word, RESULT_TASKS))
208
 
209
  for task in RESULT_TASKS[::-1]:
@@ -226,8 +226,8 @@ def main_workflow(
226
 
227
  for task in RESULT_TASKS:
228
  task.compile_task(max_num_distractors=num_distractors)
229
- progress_s.progress(85)
230
- logs.success('Отобрали лучшие задания!')
231
 
232
  TEXT_WITH_GAPS = []
233
  VARIANTS = []
@@ -241,9 +241,6 @@ def main_workflow(
241
  TEXT_WITH_GAPS.append(sentence)
242
  del RESULT_TASKS
243
 
244
- logs.success('Сейчас все будет готово!')
245
- progress_s.progress(90)
246
-
247
  TEXT_WITH_GAPS = ' '.join([sentence for sentence in TEXT_WITH_GAPS]).replace('%^&*', '\n')
248
  PREPARED_TASKS = prepare_tasks(VARIANTS)
249
  STUDENT_OUT = f'{TEXT_WITH_GAPS}\n\n{"=" * 70}\n\n{PREPARED_TASKS["TASKS_STUDENT"]}'
@@ -251,8 +248,8 @@ def main_workflow(
251
  f'{PREPARED_TASKS["KEYS_ONLY"]}'
252
  TOTAL_OUT = f'{original_text}\n\n{"$" * 70}\n\n{STUDENT_OUT}\n\n{"=" * 70}\n\n{PREPARED_TASKS["TASKS_TEACHER"]}' \
253
  f'\n\n{"$" * 70}\n\n{PREPARED_TASKS["KEYS_ONLY"]}'
254
- logs.success('Сейчас все будет готово!')
255
- progress_s.progress(90)
256
  save_name = save_name if save_name != '' else f'{str(datetime.datetime.now())[:-7]}_{original_text[:20]}'
257
  out = {
258
  'name': save_name,
 
20
  file: UploadedFile or None,
21
  text: str,
22
  logs: ST_WIDGETS,
 
23
  progress: st_progress,
24
+ progress_d: st_progress,
25
  level: str,
26
  tw_mode_automatic_mode: str,
27
  target_words: str,
 
36
  :param file: user's file to generate tasks in
37
  :param text: user's text input to generate tasks in
38
  :param logs: widget to output logs to
 
39
  :param progress: progress bar
40
+ :param progress_d: sentences progress bar
41
  :param target_words: how target words are chosen: by user or automatically
42
  :param tw_mode_automatic_mode:
43
  :param level: user's specification of CEFR level of text
44
  :param num_distractors: how many distractors does the user want the task to contain
45
  :param save_name: user specifies name to save file in cloud
46
+ :param global_bad_target_words: global bad target words
47
  :return: Dictionary with output data: filename, amount_mode, text_with_gaps, tasks_as_list, correct_answers,
48
  student_out, teacher_out, total_out, original_text
49
  """
50
  # Clear bad target_words each time
51
+ if global_bad_target_words:
52
+ global_bad_target_words = []
53
 
54
  # Define main global variables
 
55
  GLOBAL_DISTRACTORS = set()
56
  MAX_FREQUENCY = 0
57
 
58
+ logs.update(label='Загружаем языковые модели и другие данные', state='running')
59
  mask_filler = load_bert()
60
 
61
  # Get input text
 
83
 
84
  # Text preprocessing
85
  original_text = current_text
86
+ current_text = (current_text.replace('.', '. ').replace('. . .', '...')
87
+ .replace(' ', ' ').replace('', '...').replace('', '...')
88
+ .replace('—', '-').replace('\u2014', '-').replace('—', '-')
89
+ .replace('-\n', '').replace('\n', '%^&*'))
90
  current_text_sentences = [sent.text.strip() for sent in esp_constants.nlp(current_text).sents]
91
+ logs.update(label='Получили Ваш текст!', state='running')
92
  progress.progress(10)
93
 
94
  # Compute frequency dict
 
100
  if j < len(FREQ_DICT) * _frequency_barrier_percent:
101
  MAX_FREQUENCY = tp[1]
102
  MAX_FREQUENCY = 3 if MAX_FREQUENCY < 3 else MAX_FREQUENCY
103
+ logs.update(label="Посчитали немного статистики!", state='running')
104
  progress.progress(15)
105
 
106
  # Choose necessary language minimum according to user's input
 
134
  # Start generation process
135
  workflow = [SENTENCE(original=sent.strip(), n_sentence=num, max_num_distractors=num_distractors)
136
  for num, sent in enumerate(current_text_sentences)]
137
+ logs.update(label="Запускаем процесс генерации заданий!", state='running')
138
  progress.progress(20)
139
 
140
  for sentence in workflow:
 
142
 
143
  for sentence in workflow:
144
  sentence.bind_phrases()
145
+ logs.update(label="Подготовили предложения для дальнейшей работы!", state='running')
146
  progress.progress(30)
147
 
148
  for j, sentence in enumerate(workflow):
 
151
  user_target_words=USER_TARGET_WORDS,
152
  frequency_dict=FREQ_DICT)
153
  progress.progress(int(30 + (j * (20 / len(workflow)))))
154
+ progress.progress(50)
155
  DUPLICATE_TARGET_WORDS = defaultdict(list)
156
  for sentence in workflow:
157
  for target_word in sentence.target_words:
 
164
  if target_word not in RESULT_TW:
165
  global_bad_target_words.append(target_word['original_text'])
166
  sentence.target_words.remove(target_word)
167
+ progress.progress(55)
168
+ logs.update(label='Выбрали слова-пропуски!', state='running')
169
 
170
  for sentence in workflow:
171
  for i, target_word in enumerate(sentence.target_words):
 
176
 
177
  for sentence in workflow:
178
  sentence.filter_target_words(target_words_automatic_mode=tw_mode_automatic_mode)
179
+ progress.progress(60)
180
 
181
  RESULT_TASKS = []
182
  for sentence in workflow:
 
189
  global_distractors=GLOBAL_DISTRACTORS,
190
  distractor_minimum=distractor_minimum,
191
  max_frequency=MAX_FREQUENCY)
192
+ progress_d.progress(num / len(RESULT_TASKS))
193
+ logs.update(label=f'Обработали {num}/{len(RESULT_TASKS)} целевых слов!', state='running')
194
+ logs.update(label=f'Обработали {len(RESULT_TASKS)}/{len(RESULT_TASKS)} целевых слов!', state='running')
195
+ progress_d.progress(100)
196
+ progress.progress(70)
197
+ logs.update(label='Подобрали неправильные варианты!', state='running')
198
 
199
  for task in RESULT_TASKS:
200
  task.inflect_distractors()
201
+ progress.progress(80)
202
+ logs.update(label='Просклоняли и проспрягали неправильные варианты!', state='running')
203
 
204
  for task in RESULT_TASKS:
205
  task.sample_distractors(num_distractors=num_distractors)
206
+ progress.progress(85)
207
  RESULT_TASKS = list(filter(lambda t: not t.bad_target_word, RESULT_TASKS))
208
 
209
  for task in RESULT_TASKS[::-1]:
 
226
 
227
  for task in RESULT_TASKS:
228
  task.compile_task(max_num_distractors=num_distractors)
229
+ progress.progress(90)
230
+ logs.update(label='Отобрали лучшие задания!', state='running')
231
 
232
  TEXT_WITH_GAPS = []
233
  VARIANTS = []
 
241
  TEXT_WITH_GAPS.append(sentence)
242
  del RESULT_TASKS
243
 
 
 
 
244
  TEXT_WITH_GAPS = ' '.join([sentence for sentence in TEXT_WITH_GAPS]).replace('%^&*', '\n')
245
  PREPARED_TASKS = prepare_tasks(VARIANTS)
246
  STUDENT_OUT = f'{TEXT_WITH_GAPS}\n\n{"=" * 70}\n\n{PREPARED_TASKS["TASKS_STUDENT"]}'
 
248
  f'{PREPARED_TASKS["KEYS_ONLY"]}'
249
  TOTAL_OUT = f'{original_text}\n\n{"$" * 70}\n\n{STUDENT_OUT}\n\n{"=" * 70}\n\n{PREPARED_TASKS["TASKS_TEACHER"]}' \
250
  f'\n\n{"$" * 70}\n\n{PREPARED_TASKS["KEYS_ONLY"]}'
251
+ logs.update(label='Сейчас все будет готово!', state='running')
252
+ progress.progress(95)
253
  save_name = save_name if save_name != '' else f'{str(datetime.datetime.now())[:-7]}_{original_text[:20]}'
254
  out = {
255
  'name': save_name,
utilities_language_bert/esp_sentence_bert.py CHANGED
@@ -208,7 +208,7 @@ class TASK:
208
  self.bad_target_word = True
209
  self.distractors = None
210
  else:
211
- self.distractors = [d[0] for i, d in enumerate(distractors_sentence) if i < 15]
212
  self.distractors_number = len(distractors_sentence) if distractors_sentence is not None else 0
213
 
214
  def inflect_distractors(self):
@@ -238,8 +238,7 @@ class TASK:
238
  def sample_distractors(self, num_distractors):
239
  if not self.bad_target_word:
240
  num_distractors = min(self.distractors_number, num_distractors) if num_distractors >= 4 else num_distractors
241
- self.inflected_distractors = sample(self.inflected_distractors[:min(self.distractors_number, 10)],
242
- num_distractors)
243
 
244
  def compile_task(self, max_num_distractors):
245
  len_distractors = len(self.inflected_distractors)
@@ -248,7 +247,8 @@ class TASK:
248
  letters = (f'({letter})' for letter in string.ascii_lowercase[:len_variants + 1])
249
  try:
250
  distractors = sample(self.inflected_distractors, len_variants) + [self.original_text, ]
251
- except ValueError:
 
252
  distractors = self.inflected_distractors + [self.original_text, ]
253
  tmp_vars = [f'{item[0]} {item[1].replace("_", " ")}'.lower()
254
  for item in zip(letters, sorted(distractors, key=lambda _: random()))]
 
208
  self.bad_target_word = True
209
  self.distractors = None
210
  else:
211
+ self.distractors = [d[0] for i, d in enumerate(distractors_sentence) if i < 30]
212
  self.distractors_number = len(distractors_sentence) if distractors_sentence is not None else 0
213
 
214
  def inflect_distractors(self):
 
238
  def sample_distractors(self, num_distractors):
239
  if not self.bad_target_word:
240
  num_distractors = min(self.distractors_number, num_distractors) if num_distractors >= 4 else num_distractors
241
+ self.inflected_distractors = sample(self.inflected_distractors, num_distractors)
 
242
 
243
  def compile_task(self, max_num_distractors):
244
  len_distractors = len(self.inflected_distractors)
 
247
  letters = (f'({letter})' for letter in string.ascii_lowercase[:len_variants + 1])
248
  try:
249
  distractors = sample(self.inflected_distractors, len_variants) + [self.original_text, ]
250
+ except ValueError as e:
251
+ print(f'{e}\n{len_distractors=}\n{len_variants=}')
252
  distractors = self.inflected_distractors + [self.original_text, ]
253
  tmp_vars = [f'{item[0]} {item[1].replace("_", " ")}'.lower()
254
  for item in zip(letters, sorted(distractors, key=lambda _: random()))]
utilities_language_general/esp_constants.py CHANGED
@@ -7,19 +7,23 @@ from transformers import pipeline
7
 
8
  @st.cache_resource
9
  def load_w2v(model_path):
10
- _w2v_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
 
11
  return _w2v_model
12
 
13
 
14
  @st.cache_resource
15
  def load_spacy():
16
- _nlp = spacy.load('es_core_news_lg')
 
17
  return _nlp
18
 
19
 
20
  @st.cache_resource
21
  def load_bert():
22
- return pipeline("fill-mask", model="a-v-white/bert-base-spanish-wwm-cased-finetuned-literature-pro")
 
 
23
 
24
 
25
  nlp = load_spacy()
 
7
 
8
  @st.cache_resource
9
  def load_w2v(model_path):
10
+ with st.spinner('Загружаю языковую модель'):
11
+ _w2v_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
12
  return _w2v_model
13
 
14
 
15
  @st.cache_resource
16
  def load_spacy():
17
+ with st.spinner('Загружаю морфо-синтаксический парсер'):
18
+ _nlp = spacy.load('es_core_news_lg')
19
  return _nlp
20
 
21
 
22
  @st.cache_resource
23
  def load_bert():
24
+ with st.spinner('Загружаю языковую модель'):
25
+ _pipeline = pipeline(task="fill-mask", model="a-v-white/bert-base-spanish-wwm-cased-finetuned-literature-pro")
26
+ return _pipeline
27
 
28
 
29
  nlp = load_spacy()
utilities_language_general/esp_utils.py CHANGED
@@ -132,7 +132,6 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
132
  condition = ((distractor_pos == pos
133
  or (distractor_pos in ('VERB', 'ADJ', 'phrase') and pos in ('VERB', 'ADJ', 'phrase')))
134
  and distractor_lemma != lemma
135
- and len(distractors) < 100
136
  and distractor_similarity < SIMILARITY_VALUES[level_name]
137
  and candidate_gender == gender
138
  and length_ratio <= max_length_ratio
@@ -160,7 +159,6 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
160
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP')))
161
  and candidate[0] != lemma
162
  and distractor_lemma != lemma
163
- and len(distractors) < 100
164
  and distractor_similarity < SIMILARITY_VALUES[level_name]
165
  and distractor_lemma not in global_distractors)
166
  if condition:
@@ -173,10 +171,10 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
173
  distractors.append((candidate[0], distractor_similarity))
174
  global_distractors.add(distractor_lemma)
175
  max_num_distractors = min(4, max_num_distractors) if max_num_distractors >= 4 else max_num_distractors
176
- if len(distractors) >= max_num_distractors:
177
- return distractors
178
- else:
179
  return None
 
 
180
 
181
 
182
  def get_distractors_from_model_bert(model, text_with_masked_task: str, lemma: str, pos: str, gender: str or None,
@@ -207,7 +205,7 @@ def get_distractors_from_model_bert(model, text_with_masked_task: str, lemma: st
207
  if (((distractor_pos == pos)
208
  or (pos in ('VERB', 'ADJ', 'phrase') and distractor_pos in ('VERB', 'ADJ', 'phrase')))
209
  and distractor_lemma != lemma
210
- and (len(_distractors) < max_num_distractors+10)
211
  and (distractor_similarity < SIMILARITY_VALUES_bert[level_name])
212
  and (candidate_gender == gender)
213
  and (length_ratio <= max_length_ratio) # May be changed if case of phrases
 
132
  condition = ((distractor_pos == pos
133
  or (distractor_pos in ('VERB', 'ADJ', 'phrase') and pos in ('VERB', 'ADJ', 'phrase')))
134
  and distractor_lemma != lemma
 
135
  and distractor_similarity < SIMILARITY_VALUES[level_name]
136
  and candidate_gender == gender
137
  and length_ratio <= max_length_ratio
 
159
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP')))
160
  and candidate[0] != lemma
161
  and distractor_lemma != lemma
 
162
  and distractor_similarity < SIMILARITY_VALUES[level_name]
163
  and distractor_lemma not in global_distractors)
164
  if condition:
 
171
  distractors.append((candidate[0], distractor_similarity))
172
  global_distractors.add(distractor_lemma)
173
  max_num_distractors = min(4, max_num_distractors) if max_num_distractors >= 4 else max_num_distractors
174
+ if len(distractors) < max_num_distractors:
 
 
175
  return None
176
+ else:
177
+ return distractors
178
 
179
 
180
  def get_distractors_from_model_bert(model, text_with_masked_task: str, lemma: str, pos: str, gender: str or None,
 
205
  if (((distractor_pos == pos)
206
  or (pos in ('VERB', 'ADJ', 'phrase') and distractor_pos in ('VERB', 'ADJ', 'phrase')))
207
  and distractor_lemma != lemma
208
+ and (len(_distractors) < max_num_distractors+100)
209
  and (distractor_similarity < SIMILARITY_VALUES_bert[level_name])
210
  and (candidate_gender == gender)
211
  and (length_ratio <= max_length_ratio) # May be changed if case of phrases
utilities_language_w2v/esp_main_workflow_w2v.py CHANGED
@@ -22,10 +22,8 @@ def main_workflow(
22
  file: UploadedFile or None,
23
  text: str,
24
  logs: ST_WIDGETS,
25
- logs_d: ST_WIDGETS,
26
  progress: st_progress,
27
  progress_d: st_progress,
28
- progress_s: st_progress,
29
  level: str,
30
  tw_mode_automatic_mode: str,
31
  target_words: str,
@@ -41,10 +39,8 @@ def main_workflow(
41
  :param file: user's file to generate tasks in
42
  :param text: user's text input to generate tasks in
43
  :param logs: widget to output logs to
44
- :param logs_d: show how many distractors already processed
45
  :param progress: progress bar
46
  :param progress_d: distractors progress bar
47
- :param progress_s: sentences progress bar
48
  :param target_words: how target words are chosen: by user or automatically
49
  :param tw_mode_automatic_mode:
50
  :param level: user's specification of CEFR level of text
@@ -55,11 +51,12 @@ def main_workflow(
55
  :return: Dictionary with output data: filename, amount_mode, text_with_gaps, tasks_as_list, correct_answers,
56
  student_out, teacher_out, total_out, original_text
57
  """
 
58
  # Clear bad target_words each time
59
- global_bad_target_words = []
 
60
 
61
  # Define main global variables
62
- logs.write()
63
  GLOBAL_DISTRACTORS = set()
64
  MAX_FREQUENCY = 0
65
 
@@ -88,11 +85,12 @@ def main_workflow(
88
 
89
  # Text preprocessing
90
  original_text = current_text
91
- current_text = current_text.replace('.', '. ').replace('. . .', '...').replace(' ', ' ').replace('…', '...') \
92
- .replace('', '...').replace('—', '-').replace('\u2014', '-').replace('', '-').replace('-\n', '') \
93
- .replace('\n', '%^&*')
 
94
  current_text_sentences = [sent.text.strip() for sent in esp_constants.nlp(current_text).sents]
95
- logs.success('Получили Ваш текст!')
96
  progress.progress(10)
97
 
98
  # Compute frequency dict
@@ -104,7 +102,7 @@ def main_workflow(
104
  if j < len(FREQ_DICT) * _frequency_barrier_percent:
105
  MAX_FREQUENCY = tp[1]
106
  MAX_FREQUENCY = 3 if MAX_FREQUENCY < 3 else MAX_FREQUENCY
107
- logs.success("Посчитали немного статистики!")
108
  progress.progress(15)
109
 
110
  # Choose necessary language minimum according to user's input
@@ -136,7 +134,7 @@ def main_workflow(
136
  st.stop()
137
 
138
  # Define which model is used for distractor generation
139
-
140
  if model_name == 'Модель-1':
141
  mask_filler = load_w2v(w2v_model_1_path)
142
  else:
@@ -145,7 +143,7 @@ def main_workflow(
145
  # Start generation process
146
  workflow = [SENTENCE(original=sent.strip(), n_sentence=num, max_num_distractors=num_distractors)
147
  for num, sent in enumerate(current_text_sentences)]
148
- logs.success("Запускаем процесс генерации заданий!")
149
  progress.progress(20)
150
 
151
  for sentence in workflow:
@@ -153,7 +151,7 @@ def main_workflow(
153
 
154
  for sentence in workflow:
155
  sentence.bind_phrases()
156
- logs.success("Подготовили предложения для дальнейшей работы!")
157
  progress.progress(30)
158
 
159
  for j, sentence in enumerate(workflow):
@@ -163,7 +161,7 @@ def main_workflow(
163
  user_target_words=USER_TARGET_WORDS,
164
  frequency_dict=FREQ_DICT)
165
  progress.progress(int(30 + (j * (30 / len(workflow)))))
166
- progress_s.progress(60)
167
  DUPLICATE_TARGET_WORDS = defaultdict(list)
168
  for sentence in workflow:
169
  for target_word in sentence.target_words:
@@ -176,8 +174,8 @@ def main_workflow(
176
  if target_word not in RESULT_TW:
177
  global_bad_target_words.append(target_word['original_text'])
178
  sentence.target_words.remove(target_word)
179
- progress_s.progress(65)
180
- logs.success('Выбрали слова-пропуски!')
181
 
182
  for sentence in workflow:
183
  sentence.attach_distractors_to_target_word(model=mask_filler,
@@ -185,22 +183,21 @@ def main_workflow(
185
  distractor_minimum=distractor_minimum,
186
  level_name=level,
187
  max_frequency=MAX_FREQUENCY,
188
- progress=progress_d,
189
- logs=logs_d)
190
- progress_s.progress(70)
191
- logs.success('Подобрали неправильные варианты!')
192
  for sentence in workflow:
193
  sentence.inflect_distractors()
194
- progress_s.progress(80)
195
- logs.success('Просклоняли и проспрягали неправильные варианты!')
196
 
197
  for sentence in workflow:
198
  sentence.filter_target_words(target_words_automatic_mode=tw_mode_automatic_mode)
199
 
200
  for sentence in workflow:
201
  sentence.sample_distractors(num_distractors=num_distractors)
202
- progress_s.progress(90)
203
- logs.success('Отобрали лучшие задания!')
204
 
205
  RESULT_TASKS = []
206
  for sentence in workflow:
@@ -244,8 +241,8 @@ def main_workflow(
244
  f'{PREPARED_TASKS["KEYS_ONLY"]}'
245
  TOTAL_OUT = f'{original_text}\n\n{"$" * 70}\n\n{STUDENT_OUT}\n\n{"=" * 70}\n\n{PREPARED_TASKS["TASKS_TEACHER"]}' \
246
  f'\n\n{"$" * 70}\n\n{PREPARED_TASKS["KEYS_ONLY"]}'
247
- logs.success('Сейчас все будет готово!')
248
- progress_s.progress(90)
249
  save_name = save_name if save_name != '' else f'{str(datetime.datetime.now())[:-7]}_{original_text[:20]}'
250
  out = {
251
  'name': save_name,
 
22
  file: UploadedFile or None,
23
  text: str,
24
  logs: ST_WIDGETS,
 
25
  progress: st_progress,
26
  progress_d: st_progress,
 
27
  level: str,
28
  tw_mode_automatic_mode: str,
29
  target_words: str,
 
39
  :param file: user's file to generate tasks in
40
  :param text: user's text input to generate tasks in
41
  :param logs: widget to output logs to
 
42
  :param progress: progress bar
43
  :param progress_d: distractors progress bar
 
44
  :param target_words: how target words are chosen: by user or automatically
45
  :param tw_mode_automatic_mode:
46
  :param level: user's specification of CEFR level of text
 
51
  :return: Dictionary with output data: filename, amount_mode, text_with_gaps, tasks_as_list, correct_answers,
52
  student_out, teacher_out, total_out, original_text
53
  """
54
+
55
  # Clear bad target_words each time
56
+ if global_bad_target_words:
57
+ global_bad_target_words = []
58
 
59
  # Define main global variables
 
60
  GLOBAL_DISTRACTORS = set()
61
  MAX_FREQUENCY = 0
62
 
 
85
 
86
  # Text preprocessing
87
  original_text = current_text
88
+ current_text = (current_text.replace('.', '. ').replace('. . .', '...')
89
+ .replace(' ', ' ').replace('', '...').replace('', '...')
90
+ .replace('—', '-').replace('\u2014', '-').replace('—', '-')
91
+ .replace('-\n', '').replace('\n', '%^&*'))
92
  current_text_sentences = [sent.text.strip() for sent in esp_constants.nlp(current_text).sents]
93
+ logs.update(label='Получили Ваш текст!', state='running')
94
  progress.progress(10)
95
 
96
  # Compute frequency dict
 
102
  if j < len(FREQ_DICT) * _frequency_barrier_percent:
103
  MAX_FREQUENCY = tp[1]
104
  MAX_FREQUENCY = 3 if MAX_FREQUENCY < 3 else MAX_FREQUENCY
105
+ logs.update(label="Посчитали немного статистики!", state='running')
106
  progress.progress(15)
107
 
108
  # Choose necessary language minimum according to user's input
 
134
  st.stop()
135
 
136
  # Define which model is used for distractor generation
137
+ logs.update(label='Загружаем языковые модели и другие данные', state='running')
138
  if model_name == 'Модель-1':
139
  mask_filler = load_w2v(w2v_model_1_path)
140
  else:
 
143
  # Start generation process
144
  workflow = [SENTENCE(original=sent.strip(), n_sentence=num, max_num_distractors=num_distractors)
145
  for num, sent in enumerate(current_text_sentences)]
146
+ logs.update(label="Запускаем процесс генерации заданий!", state='running')
147
  progress.progress(20)
148
 
149
  for sentence in workflow:
 
151
 
152
  for sentence in workflow:
153
  sentence.bind_phrases()
154
+ logs.update(label="Подготовили предложения для дальнейшей работы!", state='running')
155
  progress.progress(30)
156
 
157
  for j, sentence in enumerate(workflow):
 
161
  user_target_words=USER_TARGET_WORDS,
162
  frequency_dict=FREQ_DICT)
163
  progress.progress(int(30 + (j * (30 / len(workflow)))))
164
+ progress.progress(60)
165
  DUPLICATE_TARGET_WORDS = defaultdict(list)
166
  for sentence in workflow:
167
  for target_word in sentence.target_words:
 
174
  if target_word not in RESULT_TW:
175
  global_bad_target_words.append(target_word['original_text'])
176
  sentence.target_words.remove(target_word)
177
+ progress.progress(65)
178
+ logs.update(label='Выбрали слова-пропуски!', state='running')
179
 
180
  for sentence in workflow:
181
  sentence.attach_distractors_to_target_word(model=mask_filler,
 
183
  distractor_minimum=distractor_minimum,
184
  level_name=level,
185
  max_frequency=MAX_FREQUENCY,
186
+ logs=logs, progress=progress_d)
187
+ progress.progress(70)
188
+ logs.update(label='Подобрали неправильные варианты!', state='running')
 
189
  for sentence in workflow:
190
  sentence.inflect_distractors()
191
+ progress.progress(80)
192
+ logs.update(label='Просклоняли и проспрягали неправильные варианты!', state='running')
193
 
194
  for sentence in workflow:
195
  sentence.filter_target_words(target_words_automatic_mode=tw_mode_automatic_mode)
196
 
197
  for sentence in workflow:
198
  sentence.sample_distractors(num_distractors=num_distractors)
199
+ progress.progress(90)
200
+ logs.update(label='Отобрали лучшие задания!', state='running')
201
 
202
  RESULT_TASKS = []
203
  for sentence in workflow:
 
241
  f'{PREPARED_TASKS["KEYS_ONLY"]}'
242
  TOTAL_OUT = f'{original_text}\n\n{"$" * 70}\n\n{STUDENT_OUT}\n\n{"=" * 70}\n\n{PREPARED_TASKS["TASKS_TEACHER"]}' \
243
  f'\n\n{"$" * 70}\n\n{PREPARED_TASKS["KEYS_ONLY"]}'
244
+ logs.update(label='Сейчас все будет готово!', state='running')
245
+ progress.progress(90)
246
  save_name = save_name if save_name != '' else f'{str(datetime.datetime.now())[:-7]}_{original_text[:20]}'
247
  out = {
248
  'name': save_name,
utilities_language_w2v/esp_sentence_w2v.py CHANGED
@@ -150,7 +150,7 @@ class SENTENCE:
150
  frequency_dict=frequency_dict)
151
 
152
  def attach_distractors_to_target_word(self, model, global_distractors, distractor_minimum, level_name,
153
- max_frequency, progress, logs):
154
  n_target_words = len(self.target_words)
155
  bad_target_words = []
156
  for i, target_word in enumerate(self.target_words):
@@ -165,13 +165,14 @@ class SENTENCE:
165
  target_word['distractors'] = distractors
166
  target_word['distractors_number'] = len(distractors) if distractors is not None else 0
167
  progress.progress(i / n_target_words)
168
- logs.success(f'Обработали {i}/{n_target_words} слов в {self.n_sentence + 1}-м предложении')
 
 
169
  for btw in bad_target_words:
170
  BAD_USER_TARGET_WORDS.append(btw['original_text'])
171
  self.target_words.remove(btw)
172
- progress.progress(100)
173
- logs.success(
174
- f'Обработали {n_target_words}/{n_target_words} слов в {self.n_sentence + 1}-м предложении')
175
 
176
  def inflect_distractors(self):
177
  bad_target_words = []
@@ -184,7 +185,7 @@ class SENTENCE:
184
  inflected = inflect(lemma=distractor_lemma, target_pos=target_word['pos'],
185
  target_tags=target_word['tags'])
186
  else:
187
- continue
188
  else:
189
  inflected = inflect(lemma=distractor_lemma, target_pos=target_word['pos'],
190
  target_tags=target_word['tags'])
@@ -217,8 +218,7 @@ class SENTENCE:
217
  len_inflected_distractors = len(target_word['inflected_distractors'])
218
  num_distractors = min(len_inflected_distractors, num_distractors) \
219
  if num_distractors >= 4 else num_distractors
220
- target_word['inflected_distractors'] = sample(target_word['inflected_distractors'][:min(
221
- len_inflected_distractors, 10)], num_distractors)
222
 
223
 
224
  class TASK:
@@ -240,12 +240,13 @@ class TASK:
240
 
241
  def compile_task(self, max_num_distractors):
242
  len_distractors = len(self.inflected_distractors)
243
- len_variants = min(len_distractors, max_num_distractors) if max_num_distractors > 4 \
244
  else max_num_distractors
245
  letters = (f'({letter})' for letter in string.ascii_lowercase[:len_variants + 1])
246
  try:
247
  distractors = sample(self.inflected_distractors, len_variants) + [self.original_text, ]
248
- except ValueError:
 
249
  distractors = self.inflected_distractors + [self.original_text, ]
250
  self.variants.append(
251
  (self.original_text, [f'{item[0]} {item[1].replace("_", " ").lower()}'.lower()
 
150
  frequency_dict=frequency_dict)
151
 
152
  def attach_distractors_to_target_word(self, model, global_distractors, distractor_minimum, level_name,
153
+ max_frequency, logs, progress):
154
  n_target_words = len(self.target_words)
155
  bad_target_words = []
156
  for i, target_word in enumerate(self.target_words):
 
165
  target_word['distractors'] = distractors
166
  target_word['distractors_number'] = len(distractors) if distractors is not None else 0
167
  progress.progress(i / n_target_words)
168
+ logs.update(label=f'Обработали {i}/{n_target_words} слов в {self.n_sentence + 1}-м предложении',
169
+ state='running')
170
+ progress.progress(100)
171
  for btw in bad_target_words:
172
  BAD_USER_TARGET_WORDS.append(btw['original_text'])
173
  self.target_words.remove(btw)
174
+ logs.update(label=f'Обработали {n_target_words}/{n_target_words} слов в {self.n_sentence + 1}-м предложении',
175
+ state='running')
 
176
 
177
  def inflect_distractors(self):
178
  bad_target_words = []
 
185
  inflected = inflect(lemma=distractor_lemma, target_pos=target_word['pos'],
186
  target_tags=target_word['tags'])
187
  else:
188
+ continue # TODO
189
  else:
190
  inflected = inflect(lemma=distractor_lemma, target_pos=target_word['pos'],
191
  target_tags=target_word['tags'])
 
218
  len_inflected_distractors = len(target_word['inflected_distractors'])
219
  num_distractors = min(len_inflected_distractors, num_distractors) \
220
  if num_distractors >= 4 else num_distractors
221
+ target_word['inflected_distractors'] = sample(target_word['inflected_distractors'], num_distractors)
 
222
 
223
 
224
  class TASK:
 
240
 
241
  def compile_task(self, max_num_distractors):
242
  len_distractors = len(self.inflected_distractors)
243
+ len_variants = min(len_distractors, max_num_distractors) if max_num_distractors >= 4 \
244
  else max_num_distractors
245
  letters = (f'({letter})' for letter in string.ascii_lowercase[:len_variants + 1])
246
  try:
247
  distractors = sample(self.inflected_distractors, len_variants) + [self.original_text, ]
248
+ except ValueError as e:
249
+ print(f'{e}\n{len_distractors=}\n{len_variants=}')
250
  distractors = self.inflected_distractors + [self.original_text, ]
251
  self.variants.append(
252
  (self.original_text, [f'{item[0]} {item[1].replace("_", " ").lower()}'.lower()
utilities_ui/custom_download_button.py CHANGED
@@ -4,7 +4,7 @@ import uuid
4
  import base64
5
  import streamlit as st
6
  from typing import Optional, Union
7
- from streamlit.elements.button import DownloadButtonDataType
8
 
9
  DownloadButtonDataType = Union[DownloadButtonDataType, "pd.DataFrame", "Styler"]
10
 
 
4
  import base64
5
  import streamlit as st
6
  from typing import Optional, Union
7
+ from streamlit.elements.widgets.button import DownloadButtonDataType
8
 
9
  DownloadButtonDataType = Union[DownloadButtonDataType, "pd.DataFrame", "Styler"]
10