import difflib import pandas as pd from util.search_data import * def word_to_market_name(word): markets_df = pd.read_csv('data/market_name_utf8.csv') markets_names = markets_df['시장명'] output = [] scores = dict() for m in markets_names: flag = True for c in range(len(word)): if c < len(m): if m[c] != word[c]: flag = False if flag: output.append(m) else: sm = difflib.SequenceMatcher(None, word, m) scores[m] = sm.ratio() sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True) top_3_markets = [market[0] for market in sorted_scores[:3]] for i in range(len(top_3_markets)): output.append(top_3_markets[i]) return output def word_to_product_name(word): if not os.path.exists("data/products.txt"): products = get_all_product_names() else: temp = '' with open("data/products.txt", "r", encoding = "utf-8") as f: temp = f.read() products = temp.split("\n")[:-2] output = [] scores = dict() for p in products: flag = True for c in range(len(word)): if c < len(p): if p[c] != word[c]: flag = False if flag: output.append(p) else: sm = difflib.SequenceMatcher(None, word, p) scores[p] = sm.ratio() sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True) top_3_product = [product[0] for product in sorted_scores[:3]] for i in range(len(top_3_product)): output.append(top_3_product[i]) return output def check_word(word): markets_df = pd.read_csv('data/market_name_utf8.csv') markets_names = markets_df['시장명'] for m in markets_names: if word == m: print(f"check_word, {word}") return True return False def check_product(word): if not os.path.exists("data/products.txt"): products = get_all_product_names() else: temp = '' with open("data/products.txt", "r", encoding = "utf-8") as f: temp = f.read() products = temp.split("\n")[:-2] for p in products: if word == p: print(f"check_word, {word}") return True return False