Spaces:
Sleeping
Sleeping
a-v-bely
commited on
Commit
•
94004b3
1
Parent(s):
f49c9b7
distractor classification for bert
Browse files- language_data/model3_with_wn_catboost_classifier.pickle +3 -0
- language_data/model3_with_wn_minmaxscaler.pickle +3 -0
- utilities_cookies/cookie_manager.py +2 -4
- utilities_cookies/encrypted_cookie_manager.py +1 -3
- utilities_database/user_database_utils.py +7 -11
- utilities_database/user_database_widgets.py +1 -2
- utilities_language_bert/esp_main_workflow_bert.py +23 -46
- utilities_language_bert/esp_sentence_bert.py +5 -4
- utilities_language_general/esp_constants.py +0 -1
- utilities_language_general/esp_utils.py +5 -6
- utilities_language_w2v/esp_main_workflow_w2v.py +1 -5
language_data/model3_with_wn_catboost_classifier.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d27b12b7d7c7aa81da02aba229941ffef9e51879be6673c4f389bea10cd1a2db
|
3 |
+
size 2425245
|
language_data/model3_with_wn_minmaxscaler.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c415fb5e8b4258876b11043f43593fde8026456202629c5280cc59a1a5c5351b
|
3 |
+
size 1404
|
utilities_cookies/cookie_manager.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
from pathlib import Path
|
3 |
-
from typing import Mapping
|
4 |
-
from datetime import datetime
|
5 |
-
from datetime import timedelta
|
6 |
from urllib.parse import unquote
|
7 |
-
from
|
|
|
8 |
from streamlit.components.v1 import components
|
9 |
|
10 |
|
|
|
1 |
import streamlit as st
|
2 |
from pathlib import Path
|
|
|
|
|
|
|
3 |
from urllib.parse import unquote
|
4 |
+
from datetime import datetime, timedelta
|
5 |
+
from typing import Mapping, MutableMapping
|
6 |
from streamlit.components.v1 import components
|
7 |
|
8 |
|
utilities_cookies/encrypted_cookie_manager.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import os
|
2 |
import base64
|
3 |
import streamlit as st
|
4 |
-
from typing import Tuple
|
5 |
-
from typing import Optional
|
6 |
from cryptography import fernet
|
7 |
-
from typing import MutableMapping
|
8 |
from cryptography.fernet import Fernet
|
9 |
from cryptography.hazmat.primitives import hashes
|
10 |
from utilities_cookies.cookie_manager import CookieManager
|
|
|
1 |
import os
|
2 |
import base64
|
3 |
import streamlit as st
|
4 |
+
from typing import Tuple, Optional, MutableMapping
|
|
|
5 |
from cryptography import fernet
|
|
|
6 |
from cryptography.fernet import Fernet
|
7 |
from cryptography.hazmat.primitives import hashes
|
8 |
from utilities_cookies.cookie_manager import CookieManager
|
utilities_database/user_database_utils.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
-
import re
|
2 |
-
import json
|
3 |
-
import secrets
|
4 |
import pandas as pd
|
5 |
import streamlit as st
|
|
|
|
|
6 |
from trycourier import Courier
|
|
|
7 |
from argon2 import PasswordHasher
|
8 |
from argon2.exceptions import VerifyMismatchError
|
9 |
|
@@ -37,7 +37,7 @@ def check_valid_name(name_sign_up: str) -> bool:
|
|
37 |
name_regex_eng = r'^[A-Za-z_]\w *'
|
38 |
name_regex_rus = r'^[А-Яа-я_][А-Яа-я0-9_] *'
|
39 |
|
40 |
-
if
|
41 |
return True
|
42 |
return False
|
43 |
|
@@ -46,12 +46,8 @@ def check_valid_email(email_sign_up: str) -> bool:
|
|
46 |
"""
|
47 |
Checks if the user entered a valid email while creating the account.
|
48 |
"""
|
49 |
-
regex =
|
50 |
return True
|
51 |
-
|
52 |
-
# if re.fullmatch(regex, email_sign_up):
|
53 |
-
# return True
|
54 |
-
# return False
|
55 |
|
56 |
|
57 |
def check_unique_email(user_log_in_database, email_sign_up: str) -> bool:
|
@@ -133,7 +129,7 @@ def generate_random_passwd() -> str:
|
|
133 |
Generates a random password to be sent in email.
|
134 |
"""
|
135 |
password_length = 10
|
136 |
-
return
|
137 |
|
138 |
|
139 |
def send_passwd_in_email(auth_token: str, user_name_forgot_passwd: str, email_forgot_passwd: str, company_name: str,
|
@@ -266,5 +262,5 @@ def load_users_particular_task(user_task_database, load_mode, creator_name, save
|
|
266 |
.eq('save_name', save_name)\
|
267 |
.eq('save_type', load_mode)\
|
268 |
.eq('cefr_level',cefr_level).execute().data[0]['generated_result']
|
269 |
-
return_data =
|
270 |
return return_data
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
3 |
+
from json import loads
|
4 |
+
from re import search, compile
|
5 |
from trycourier import Courier
|
6 |
+
from secrets import token_urlsafe
|
7 |
from argon2 import PasswordHasher
|
8 |
from argon2.exceptions import VerifyMismatchError
|
9 |
|
|
|
37 |
name_regex_eng = r'^[A-Za-z_]\w *'
|
38 |
name_regex_rus = r'^[А-Яа-я_][А-Яа-я0-9_] *'
|
39 |
|
40 |
+
if search(name_regex_eng, name_sign_up) or search(name_regex_rus, name_sign_up):
|
41 |
return True
|
42 |
return False
|
43 |
|
|
|
46 |
"""
|
47 |
Checks if the user entered a valid email while creating the account.
|
48 |
"""
|
49 |
+
regex = compile(r'([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+')
|
50 |
return True
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
def check_unique_email(user_log_in_database, email_sign_up: str) -> bool:
|
|
|
129 |
Generates a random password to be sent in email.
|
130 |
"""
|
131 |
password_length = 10
|
132 |
+
return token_urlsafe(password_length)
|
133 |
|
134 |
|
135 |
def send_passwd_in_email(auth_token: str, user_name_forgot_passwd: str, email_forgot_passwd: str, company_name: str,
|
|
|
262 |
.eq('save_name', save_name)\
|
263 |
.eq('save_type', load_mode)\
|
264 |
.eq('cefr_level',cefr_level).execute().data[0]['generated_result']
|
265 |
+
return_data = loads(return_data.replace("'", '"'), strict=False)
|
266 |
return return_data
|
utilities_database/user_database_widgets.py
CHANGED
@@ -3,7 +3,6 @@ from datetime import datetime
|
|
3 |
from supabase import create_client, Client
|
4 |
from utilities_option_menu.option_menu import option_menu
|
5 |
import utilities_database.user_database_utils as db_utils
|
6 |
-
from utilities_database.user_database_utils import check_usr_pass
|
7 |
from utilities_cookies.encrypted_cookie_manager import EncryptedCookieManager
|
8 |
|
9 |
DB_URL = st.secrets['SUPABASE_URL']
|
@@ -91,7 +90,7 @@ class LogIn:
|
|
91 |
login_submit_button = st.form_submit_button(label='Войти')
|
92 |
|
93 |
if login_submit_button:
|
94 |
-
authenticate_user_check = check_usr_pass(user_log_in_database=user_login_table,
|
95 |
user_name=user_name,
|
96 |
password=password)
|
97 |
|
|
|
3 |
from supabase import create_client, Client
|
4 |
from utilities_option_menu.option_menu import option_menu
|
5 |
import utilities_database.user_database_utils as db_utils
|
|
|
6 |
from utilities_cookies.encrypted_cookie_manager import EncryptedCookieManager
|
7 |
|
8 |
DB_URL = st.secrets['SUPABASE_URL']
|
|
|
90 |
login_submit_button = st.form_submit_button(label='Войти')
|
91 |
|
92 |
if login_submit_button:
|
93 |
+
authenticate_user_check = db_utils.check_usr_pass(user_log_in_database=user_login_table,
|
94 |
user_name=user_name,
|
95 |
password=password)
|
96 |
|
utilities_language_bert/esp_main_workflow_bert.py
CHANGED
@@ -1,29 +1,20 @@
|
|
1 |
import datetime
|
2 |
from io import StringIO
|
|
|
3 |
from random import sample
|
4 |
from collections import defaultdict
|
5 |
-
from streamlit import progress as st_progress
|
6 |
-
from streamlit.elements import WIDGETS as ST_WIDGETS
|
7 |
-
from utilities_language_general.esp_constants import st
|
8 |
-
from utilities_language_bert.esp_sentence_bert import TASK
|
9 |
-
from utilities_language_bert.esp_sentence_bert import SENTENCE
|
10 |
-
from utilities_language_general.esp_utils import prepare_tasks
|
11 |
-
from utilities_language_general.esp_constants import load_bert
|
12 |
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
13 |
-
|
14 |
-
from utilities_language_general.
|
15 |
-
from utilities_language_general.
|
16 |
-
from utilities_language_general.esp_utils import compute_frequency_dict
|
17 |
-
from utilities_language_general.esp_constants import BAD_USER_TARGET_WORDS
|
18 |
-
|
19 |
|
20 |
|
21 |
def main_workflow(
|
22 |
-
file: UploadedFile
|
23 |
text: str,
|
24 |
-
logs
|
25 |
-
progress
|
26 |
-
progress_d
|
27 |
level: str,
|
28 |
tw_mode_automatic_mode: str,
|
29 |
target_words: str,
|
@@ -58,6 +49,7 @@ def main_workflow(
|
|
58 |
MAX_FREQUENCY = 0
|
59 |
|
60 |
logs.update(label='Загружаем языковые модели и другие данные', state='running')
|
|
|
61 |
mask_filler = load_bert()
|
62 |
|
63 |
# Get input text
|
@@ -67,15 +59,15 @@ def main_workflow(
|
|
67 |
elif text != '':
|
68 |
current_text = text
|
69 |
else:
|
70 |
-
|
71 |
current_text = ''
|
72 |
-
|
73 |
|
74 |
# Process target words
|
75 |
if tw_mode_automatic_mode == 'Самостоятельно':
|
76 |
if target_words == '':
|
77 |
-
|
78 |
-
|
79 |
# Cannot make up paradigm, so only USER_TARGET_WORDS is used
|
80 |
USER_TARGET_WORDS = prepare_target_words(target_words)
|
81 |
tw_mode_automatic_mode = False
|
@@ -89,7 +81,7 @@ def main_workflow(
|
|
89 |
.replace(' ', ' ').replace('…', '...').replace('…', '...')
|
90 |
.replace('—', '-').replace('\u2014', '-').replace('—', '-')
|
91 |
.replace('-\n', '').replace('\n', '%^&*'))
|
92 |
-
current_text_sentences = [sent.text.strip() for sent in
|
93 |
logs.update(label='Получили Ваш текст!', state='running')
|
94 |
progress.progress(10)
|
95 |
|
@@ -106,27 +98,8 @@ def main_workflow(
|
|
106 |
progress.progress(15)
|
107 |
|
108 |
# Choose necessary language minimum according to user's input
|
109 |
-
if level
|
110 |
-
target_minimum =
|
111 |
-
distractor_minimum = esp_constants.a1_distractor_set
|
112 |
-
elif level == 'A2':
|
113 |
-
target_minimum = esp_constants.a2_target_set
|
114 |
-
distractor_minimum = esp_constants.a2_distractor_set
|
115 |
-
elif level == 'B1':
|
116 |
-
target_minimum = esp_constants.b1_target_set
|
117 |
-
distractor_minimum = esp_constants.b1_distractor_set
|
118 |
-
elif level == 'B2':
|
119 |
-
target_minimum = esp_constants.b2_target_set
|
120 |
-
distractor_minimum = esp_constants.b2_distractor_set
|
121 |
-
elif level == 'C1':
|
122 |
-
target_minimum = esp_constants.c1_target_set
|
123 |
-
distractor_minimum = esp_constants.c1_distractor_set
|
124 |
-
elif level == 'C2':
|
125 |
-
target_minimum = esp_constants.c2_target_set
|
126 |
-
distractor_minimum = esp_constants.c2_distractor_set
|
127 |
-
elif level == 'Без уровня':
|
128 |
-
target_minimum = None
|
129 |
-
distractor_minimum = None
|
130 |
else:
|
131 |
target_minimum = None
|
132 |
distractor_minimum = None
|
@@ -204,7 +177,11 @@ def main_workflow(
|
|
204 |
RESULT_TASKS.append(task)
|
205 |
|
206 |
for num, task in enumerate(RESULT_TASKS):
|
207 |
-
task.attach_distractors_to_target_word(model=mask_filler,
|
|
|
|
|
|
|
|
|
208 |
global_distractors=GLOBAL_DISTRACTORS,
|
209 |
distractor_minimum=distractor_minimum,
|
210 |
max_frequency=MAX_FREQUENCY)
|
@@ -240,8 +217,8 @@ def main_workflow(
|
|
240 |
NUMBER_TASKS = 10
|
241 |
else:
|
242 |
NUMBER_TASKS = len(RESULT_TASKS)
|
243 |
-
RESULT_TASKS_in_summary = filter(lambda task: task.in_summary, RESULT_TASKS)
|
244 |
-
RESULT_TASTS_not_in_summary = filter(lambda task: not task.in_summary, RESULT_TASKS)
|
245 |
if len(RESULT_TASKS_in_summary) >= NUMBER_TASKS:
|
246 |
RESULT_TASKS = RESULT_TASKS_in_summary
|
247 |
else:
|
|
|
1 |
import datetime
|
2 |
from io import StringIO
|
3 |
+
from typing import Union
|
4 |
from random import sample
|
5 |
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
7 |
+
from utilities_language_bert.esp_sentence_bert import TASK, SENTENCE
|
8 |
+
from utilities_language_general.esp_utils import prepare_tasks, prepare_target_words, compute_frequency_dict
|
9 |
+
from utilities_language_general.esp_constants import st, load_bert, load_classifiers, nlp, summarization, BAD_USER_TARGET_WORDS, MINIMUM_SETS
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def main_workflow(
|
13 |
+
file: Union[UploadedFile, None],
|
14 |
text: str,
|
15 |
+
logs,
|
16 |
+
progress,
|
17 |
+
progress_d,
|
18 |
level: str,
|
19 |
tw_mode_automatic_mode: str,
|
20 |
target_words: str,
|
|
|
49 |
MAX_FREQUENCY = 0
|
50 |
|
51 |
logs.update(label='Загружаем языковые модели и другие данные', state='running')
|
52 |
+
pos_dict, scaler, classifier = load_classifiers('model3')
|
53 |
mask_filler = load_bert()
|
54 |
|
55 |
# Get input text
|
|
|
59 |
elif text != '':
|
60 |
current_text = text
|
61 |
else:
|
62 |
+
st.warning('Вы и текст не вставили, и файл не выбрали 😢')
|
63 |
current_text = ''
|
64 |
+
st.stop()
|
65 |
|
66 |
# Process target words
|
67 |
if tw_mode_automatic_mode == 'Самостоятельно':
|
68 |
if target_words == '':
|
69 |
+
st.warning('Вы не ввели целевые слова')
|
70 |
+
st.stop()
|
71 |
# Cannot make up paradigm, so only USER_TARGET_WORDS is used
|
72 |
USER_TARGET_WORDS = prepare_target_words(target_words)
|
73 |
tw_mode_automatic_mode = False
|
|
|
81 |
.replace(' ', ' ').replace('…', '...').replace('…', '...')
|
82 |
.replace('—', '-').replace('\u2014', '-').replace('—', '-')
|
83 |
.replace('-\n', '').replace('\n', '%^&*'))
|
84 |
+
current_text_sentences = [sent.text.strip() for sent in nlp(current_text).sents]
|
85 |
logs.update(label='Получили Ваш текст!', state='running')
|
86 |
progress.progress(10)
|
87 |
|
|
|
98 |
progress.progress(15)
|
99 |
|
100 |
# Choose necessary language minimum according to user's input
|
101 |
+
if level:
|
102 |
+
target_minimum, distractor_minimum = MINIMUM_SETS[level]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
else:
|
104 |
target_minimum = None
|
105 |
distractor_minimum = None
|
|
|
177 |
RESULT_TASKS.append(task)
|
178 |
|
179 |
for num, task in enumerate(RESULT_TASKS):
|
180 |
+
task.attach_distractors_to_target_word(model=mask_filler,
|
181 |
+
scaler=scaler,
|
182 |
+
classifier=classifier,
|
183 |
+
pos_dict=pos_dict,
|
184 |
+
level_name=level,
|
185 |
global_distractors=GLOBAL_DISTRACTORS,
|
186 |
distractor_minimum=distractor_minimum,
|
187 |
max_frequency=MAX_FREQUENCY)
|
|
|
217 |
NUMBER_TASKS = 10
|
218 |
else:
|
219 |
NUMBER_TASKS = len(RESULT_TASKS)
|
220 |
+
RESULT_TASKS_in_summary = list(filter(lambda task: task.in_summary, RESULT_TASKS))
|
221 |
+
RESULT_TASTS_not_in_summary = list(filter(lambda task: not task.in_summary, RESULT_TASKS))
|
222 |
if len(RESULT_TASKS_in_summary) >= NUMBER_TASKS:
|
223 |
RESULT_TASKS = RESULT_TASKS_in_summary
|
224 |
else:
|
utilities_language_bert/esp_sentence_bert.py
CHANGED
@@ -43,6 +43,7 @@ class SENTENCE:
|
|
43 |
if not previous_was_phrase:
|
44 |
self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
|
45 |
previous_was_phrase = False
|
|
|
46 |
|
47 |
def search_target_words_automatically(self, target_minimum: set, frequency_dict: dict = None, summary:list=None):
|
48 |
for token in self.sentence_phrases:
|
@@ -188,11 +189,11 @@ class TASK:
|
|
188 |
def __repr__(self):
|
189 |
return '\n'.join([f'{key}\t=\t{value}' for key, value in self.__dict__.items()])
|
190 |
|
191 |
-
def attach_distractors_to_target_word(self, model,
|
192 |
-
level_name, max_frequency):
|
193 |
pos = self.pos[0] if self.pos[0] == 'phrase' else self.pos[1]
|
194 |
-
distractors_sentence = get_distractors_from_model_bert(model=model,
|
195 |
-
gender=self.gender,
|
196 |
text_with_masked_task=self.masked_sentence,
|
197 |
global_distractors=global_distractors,
|
198 |
distractor_minimum=distractor_minimum,
|
|
|
43 |
if not previous_was_phrase:
|
44 |
self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
|
45 |
previous_was_phrase = False
|
46 |
+
self.sentence_phrases.append(self.sentence_lemma_pos[-1][1])
|
47 |
|
48 |
def search_target_words_automatically(self, target_minimum: set, frequency_dict: dict = None, summary:list=None):
|
49 |
for token in self.sentence_phrases:
|
|
|
189 |
def __repr__(self):
|
190 |
return '\n'.join([f'{key}\t=\t{value}' for key, value in self.__dict__.items()])
|
191 |
|
192 |
+
def attach_distractors_to_target_word(self, model, scaler, classifier, pos_dict,
|
193 |
+
global_distractors, distractor_minimum, level_name, max_frequency):
|
194 |
pos = self.pos[0] if self.pos[0] == 'phrase' else self.pos[1]
|
195 |
+
distractors_sentence = get_distractors_from_model_bert(model=model, scaler=scaler, classifier=classifier, pos_dict=pos_dict,
|
196 |
+
level_name=level_name, lemma=self.lemma, pos=pos, gender=self.gender,
|
197 |
text_with_masked_task=self.masked_sentence,
|
198 |
global_distractors=global_distractors,
|
199 |
distractor_minimum=distractor_minimum,
|
utilities_language_general/esp_constants.py
CHANGED
@@ -2,7 +2,6 @@ import json
|
|
2 |
import spacy
|
3 |
import gensim
|
4 |
import streamlit as st
|
5 |
-
|
6 |
from pickle import load
|
7 |
from transformers import pipeline
|
8 |
from summarizer import Summarizer
|
|
|
2 |
import spacy
|
3 |
import gensim
|
4 |
import streamlit as st
|
|
|
5 |
from pickle import load
|
6 |
from transformers import pipeline
|
7 |
from summarizer import Summarizer
|
utilities_language_general/esp_utils.py
CHANGED
@@ -192,9 +192,8 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
|
|
192 |
return distractors
|
193 |
|
194 |
|
195 |
-
def get_distractors_from_model_bert(
|
196 |
-
global_distractors: set, distractor_minimum: set,
|
197 |
-
max_num_distractors: int, max_length_ratio=5, min_edit_distance_ratio=0.5):
|
198 |
_distractors = []
|
199 |
try:
|
200 |
bert_candidates = [token for token in model(text_with_masked_task, top_k=max_num_distractors + 100)]
|
@@ -217,9 +216,9 @@ def get_distractors_from_model_bert(doc, model, scaler, classifier, text_with_ma
|
|
217 |
distractor_similarity = candidate_distractor[1]
|
218 |
candidate_gender = get_tags(distractor_lemma).get('Gender')
|
219 |
length_ratio = abs(len(lemma) - len(distractor_lemma))
|
220 |
-
decision = make_decision(doc, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict,
|
221 |
-
|
222 |
-
|
223 |
if ((distractor_pos == pos
|
224 |
or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
|
225 |
and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos]))
|
|
|
192 |
return distractors
|
193 |
|
194 |
|
195 |
+
def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, level_name: str, lemma: str, pos: str, gender: str, text_with_masked_task: str,
|
196 |
+
global_distractors: set, distractor_minimum: set, max_num_distractors: int, max_length_ratio=5, min_edit_distance_ratio=0.5):
|
|
|
197 |
_distractors = []
|
198 |
try:
|
199 |
bert_candidates = [token for token in model(text_with_masked_task, top_k=max_num_distractors + 100)]
|
|
|
216 |
distractor_similarity = candidate_distractor[1]
|
217 |
candidate_gender = get_tags(distractor_lemma).get('Gender')
|
218 |
length_ratio = abs(len(lemma) - len(distractor_lemma))
|
219 |
+
decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
|
220 |
+
target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
|
221 |
+
substitute_lemma=distractor_lemma, substitute_pos=distractor_pos, bert_score=distractor_similarity)
|
222 |
if ((distractor_pos == pos
|
223 |
or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
|
224 |
and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos]))
|
utilities_language_w2v/esp_main_workflow_w2v.py
CHANGED
@@ -3,16 +3,12 @@ from io import StringIO
|
|
3 |
from typing import Union
|
4 |
from random import sample
|
5 |
from collections import defaultdict
|
6 |
-
from streamlit import progress as st_progress
|
7 |
-
from streamlit.elements import WIDGETS as ST_WIDGETS
|
8 |
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
9 |
-
import utilities_language_general.esp_constants as esp_constants
|
10 |
from utilities_language_w2v.esp_sentence_w2v import TASK, SENTENCE
|
11 |
from utilities_language_general.esp_utils import prepare_tasks, prepare_target_words, compute_frequency_dict
|
12 |
from utilities_language_general.esp_constants import st, load_w2v, load_classifiers, nlp, summarization, BAD_USER_TARGET_WORDS, MINIMUM_SETS
|
13 |
|
14 |
|
15 |
-
|
16 |
def main_workflow(
|
17 |
file: Union[UploadedFile, None],
|
18 |
text: str,
|
@@ -84,7 +80,7 @@ def main_workflow(
|
|
84 |
.replace(' ', ' ').replace('…', '...').replace('…', '...')
|
85 |
.replace('—', '-').replace('\u2014', '-').replace('—', '-')
|
86 |
.replace('-\n', '').replace('\n', '%^&*'))
|
87 |
-
current_text_sentences = [sent.text.strip() for sent in
|
88 |
logs.update(label='Получили Ваш текст!', state='running')
|
89 |
progress.progress(10)
|
90 |
|
|
|
3 |
from typing import Union
|
4 |
from random import sample
|
5 |
from collections import defaultdict
|
|
|
|
|
6 |
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
|
|
7 |
from utilities_language_w2v.esp_sentence_w2v import TASK, SENTENCE
|
8 |
from utilities_language_general.esp_utils import prepare_tasks, prepare_target_words, compute_frequency_dict
|
9 |
from utilities_language_general.esp_constants import st, load_w2v, load_classifiers, nlp, summarization, BAD_USER_TARGET_WORDS, MINIMUM_SETS
|
10 |
|
11 |
|
|
|
12 |
def main_workflow(
|
13 |
file: Union[UploadedFile, None],
|
14 |
text: str,
|
|
|
80 |
.replace(' ', ' ').replace('…', '...').replace('…', '...')
|
81 |
.replace('—', '-').replace('\u2014', '-').replace('—', '-')
|
82 |
.replace('-\n', '').replace('\n', '%^&*'))
|
83 |
+
current_text_sentences = [sent.text.strip() for sent in nlp(current_text).sents]
|
84 |
logs.update(label='Получили Ваш текст!', state='running')
|
85 |
progress.progress(10)
|
86 |
|