Danil commited on
Commit
c82443e
1 Parent(s): c2dde68

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +80 -0
utils.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymorphy2
2
+ morph = pymorphy2.MorphAnalyzer()
3
+
4
+ def counter(s: str) -> dict:
5
+ d = {}
6
+ for i in s:
7
+ if i not in d:
8
+ d[i] = 0
9
+ d[i] += 1
10
+ return d
11
+
12
+
13
+ def sweet_check(s1: str, s2: str):
14
+ STOP_PUNCT = list(',./!@#$%^&*()_+=-<>?\|{}[]`~/')
15
+ STOP = set(
16
+ ["скидка", "скидкой", "скидки", "скидке", "скидкой", "скидке", "недорого", "дешево", "в", "на", "для", "о", "у",
17
+ "и", "с", "из"] + STOP_PUNCT)
18
+ s1 = s1.lower()
19
+ s2 = s2.lower()
20
+ set_s1 = set(s1.split(' ')) - STOP
21
+ set_s2 = set(s2.split(' ')) - STOP
22
+ if set_s1 == set_s2:
23
+ return False
24
+ diff_s1 = ' '.join(list(set_s1 - set_s2))
25
+ diff_s2 = ' '.join(list(set_s2 - set_s1))
26
+
27
+ if len(diff_s1) == 0:
28
+ # return diff_s2
29
+ return True
30
+ if len(diff_s2) == 0:
31
+ return False
32
+
33
+ return True
34
+
35
+
36
+ def check(s1: str, s2: str, debag=False, morph=morph) -> float:
37
+ STOP_PUNCT = list(',./!@#$%^&*()_+=-<>?\|{}[]`~/')
38
+ STOP = set(
39
+ ["скидка", "скидкой", "скидки", "скидке", "скидкой", "скидке", "недорого", "дешево", "в", "на", "для", "о", "у",
40
+ "и", "с", "из"] + STOP_PUNCT)
41
+ s1 = s1.lower()
42
+ s2 = s2.lower()
43
+ s1 = [morph.parse(i)[0].normal_form for i in s1.split(' ')]
44
+ s2 = [morph.parse(i)[0].normal_form for i in s2.split(' ')]
45
+ set_s1 = set(s1) - STOP
46
+ set_s2 = set(s2) - STOP
47
+ if set_s1 == set_s2:
48
+ return False
49
+
50
+ diff_s1 = ' '.join(list(set_s1 - set_s2))
51
+ diff_s2 = ' '.join(list(set_s2 - set_s1))
52
+ if debag:
53
+ print(s1)
54
+ print(s2)
55
+
56
+ if len(diff_s1) == 0:
57
+ return True
58
+ if len(diff_s2) == 0:
59
+ return False
60
+
61
+ dt = {len(diff_s1): diff_s1, len(diff_s2): diff_s2}
62
+
63
+ c = 0
64
+ max_s, min_s = dt[max(len(diff_s1), len(diff_s2))], dt[min(len(diff_s1), len(diff_s2))]
65
+ c_s1 = counter(min_s)
66
+ c_s2 = counter(max_s)
67
+ for i in min_s:
68
+ if i in c_s2 and c_s2[i] > 0:
69
+ c += 1
70
+ c_s2[i] -= 1
71
+ else:
72
+ c -= 1
73
+
74
+ if len(diff_s2) == len(diff_s1):
75
+ c -= 1
76
+ if debag:
77
+ print(c / len(min_s))
78
+ if c / len(min_s) < 1.0:
79
+ return True
80
+ return False