Spaces:
Runtime error
Runtime error
Upload utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pymorphy2
|
2 |
+
morph = pymorphy2.MorphAnalyzer()
|
3 |
+
|
4 |
+
def counter(s: str) -> dict:
|
5 |
+
d = {}
|
6 |
+
for i in s:
|
7 |
+
if i not in d:
|
8 |
+
d[i] = 0
|
9 |
+
d[i] += 1
|
10 |
+
return d
|
11 |
+
|
12 |
+
|
13 |
+
def sweet_check(s1: str, s2: str):
|
14 |
+
STOP_PUNCT = list(',./!@#$%^&*()_+=-<>?\|{}[]`~/')
|
15 |
+
STOP = set(
|
16 |
+
["скидка", "скидкой", "скидки", "скидке", "скидкой", "скидке", "недорого", "дешево", "в", "на", "для", "о", "у",
|
17 |
+
"и", "с", "из"] + STOP_PUNCT)
|
18 |
+
s1 = s1.lower()
|
19 |
+
s2 = s2.lower()
|
20 |
+
set_s1 = set(s1.split(' ')) - STOP
|
21 |
+
set_s2 = set(s2.split(' ')) - STOP
|
22 |
+
if set_s1 == set_s2:
|
23 |
+
return False
|
24 |
+
diff_s1 = ' '.join(list(set_s1 - set_s2))
|
25 |
+
diff_s2 = ' '.join(list(set_s2 - set_s1))
|
26 |
+
|
27 |
+
if len(diff_s1) == 0:
|
28 |
+
# return diff_s2
|
29 |
+
return True
|
30 |
+
if len(diff_s2) == 0:
|
31 |
+
return False
|
32 |
+
|
33 |
+
return True
|
34 |
+
|
35 |
+
|
36 |
+
def check(s1: str, s2: str, debag=False, morph=morph) -> float:
|
37 |
+
STOP_PUNCT = list(',./!@#$%^&*()_+=-<>?\|{}[]`~/')
|
38 |
+
STOP = set(
|
39 |
+
["скидка", "скидкой", "скидки", "скидке", "скидкой", "скидке", "недорого", "дешево", "в", "на", "для", "о", "у",
|
40 |
+
"и", "с", "из"] + STOP_PUNCT)
|
41 |
+
s1 = s1.lower()
|
42 |
+
s2 = s2.lower()
|
43 |
+
s1 = [morph.parse(i)[0].normal_form for i in s1.split(' ')]
|
44 |
+
s2 = [morph.parse(i)[0].normal_form for i in s2.split(' ')]
|
45 |
+
set_s1 = set(s1) - STOP
|
46 |
+
set_s2 = set(s2) - STOP
|
47 |
+
if set_s1 == set_s2:
|
48 |
+
return False
|
49 |
+
|
50 |
+
diff_s1 = ' '.join(list(set_s1 - set_s2))
|
51 |
+
diff_s2 = ' '.join(list(set_s2 - set_s1))
|
52 |
+
if debag:
|
53 |
+
print(s1)
|
54 |
+
print(s2)
|
55 |
+
|
56 |
+
if len(diff_s1) == 0:
|
57 |
+
return True
|
58 |
+
if len(diff_s2) == 0:
|
59 |
+
return False
|
60 |
+
|
61 |
+
dt = {len(diff_s1): diff_s1, len(diff_s2): diff_s2}
|
62 |
+
|
63 |
+
c = 0
|
64 |
+
max_s, min_s = dt[max(len(diff_s1), len(diff_s2))], dt[min(len(diff_s1), len(diff_s2))]
|
65 |
+
c_s1 = counter(min_s)
|
66 |
+
c_s2 = counter(max_s)
|
67 |
+
for i in min_s:
|
68 |
+
if i in c_s2 and c_s2[i] > 0:
|
69 |
+
c += 1
|
70 |
+
c_s2[i] -= 1
|
71 |
+
else:
|
72 |
+
c -= 1
|
73 |
+
|
74 |
+
if len(diff_s2) == len(diff_s1):
|
75 |
+
c -= 1
|
76 |
+
if debag:
|
77 |
+
print(c / len(min_s))
|
78 |
+
if c / len(min_s) < 1.0:
|
79 |
+
return True
|
80 |
+
return False
|