File size: 2,405 Bytes
edec0a4
 
7e589bc
 
edec0a4
 
 
 
 
c9f7b9a
edec0a4
 
 
c9f7b9a
 
 
 
 
 
 
46f6c56
 
 
edec0a4
 
c9f7b9a
 
 
 
 
edec0a4
 
7e589bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edec0a4
 
 
 
 
72e2c10
 
 
 
edec0a4
7e589bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fe14e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import difflib
import pandas as pd
from util.search_data import *


def word_to_market_name(word):
    markets_df = pd.read_csv('data/market_name_utf8.csv')
    markets_names = markets_df['μ‹œμž₯λͺ…']

    output = []
    scores = dict()

    for m in markets_names:
        flag = True
        for c in range(len(word)):
            if c < len(m):
                if m[c] != word[c]:
                    flag = False
        if flag:
            output.append(m)
        else:
            sm = difflib.SequenceMatcher(None, word, m)
            scores[m] = sm.ratio()
    sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
    top_3_markets = [market[0] for market in sorted_scores[:3]]
    for i in range(len(top_3_markets)):
        output.append(top_3_markets[i])
    
    
    return output


def word_to_product_name(word):
    if not os.path.exists("data/products.txt"):
        products = get_all_product_names()
    else:
        temp = ''
        with open("data/products.txt", "r", encoding = "utf-8") as f:
            temp = f.read()
        products = temp.split("\n")[:-2] 

    output = []
    scores = dict()

    for p in products:
        flag = True
        for c in range(len(word)):
            if c < len(p):
                if p[c] != word[c]:
                    flag = False
        if flag:
            output.append(p)
        else:
            sm = difflib.SequenceMatcher(None, word, p)
            scores[p] = sm.ratio()
    sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
    top_3_product = [product[0] for product in sorted_scores[:3]]
    for i in range(len(top_3_product)):
        output.append(top_3_product[i])
    
    return output



def check_word(word):
    markets_df = pd.read_csv('data/market_name_utf8.csv')
    markets_names = markets_df['μ‹œμž₯λͺ…']

    for m in markets_names:
        if word == m:
            print(f"check_word, {word}")
            return True

    return False

def check_product(word):
    if not os.path.exists("data/products.txt"):
        products = get_all_product_names()
    else:
        temp = ''
        with open("data/products.txt", "r", encoding = "utf-8") as f:
            temp = f.read()
        products = temp.split("\n")[:-2] 
    for p in products:
        if word == p:
            print(f"check_word, {word}")
            return True

    return False