Spaces:

ldhldh
/

seoul_backend

Runtime error

App Files Files Community

seoul_backend / util /preprocessing.py

ldhldh

Update util/preprocessing.py

7e589bc verified over 1 year ago

raw

history blame contribute delete

2.41 kB

	import difflib
	import pandas as pd
	from util.search_data import *


	def word_to_market_name(word):
	markets_df = pd.read_csv('data/market_name_utf8.csv')
	markets_names = markets_df['시장명']

	output = []
	scores = dict()

	for m in markets_names:
	flag = True
	for c in range(len(word)):
	if c < len(m):
	if m[c] != word[c]:
	flag = False
	if flag:
	output.append(m)
	else:
	sm = difflib.SequenceMatcher(None, word, m)
	scores[m] = sm.ratio()
	sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
	top_3_markets = [market[0] for market in sorted_scores[:3]]
	for i in range(len(top_3_markets)):
	output.append(top_3_markets[i])


	return output


	def word_to_product_name(word):
	if not os.path.exists("data/products.txt"):
	products = get_all_product_names()
	else:
	temp = ''
	with open("data/products.txt", "r", encoding = "utf-8") as f:
	temp = f.read()
	products = temp.split("\n")[:-2]

	output = []
	scores = dict()

	for p in products:
	flag = True
	for c in range(len(word)):
	if c < len(p):
	if p[c] != word[c]:
	flag = False
	if flag:
	output.append(p)
	else:
	sm = difflib.SequenceMatcher(None, word, p)
	scores[p] = sm.ratio()
	sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
	top_3_product = [product[0] for product in sorted_scores[:3]]
	for i in range(len(top_3_product)):
	output.append(top_3_product[i])

	return output



	def check_word(word):
	markets_df = pd.read_csv('data/market_name_utf8.csv')
	markets_names = markets_df['시장명']

	for m in markets_names:
	if word == m:
	print(f"check_word, {word}")
	return True

	return False

	def check_product(word):
	if not os.path.exists("data/products.txt"):
	products = get_all_product_names()
	else:
	temp = ''
	with open("data/products.txt", "r", encoding = "utf-8") as f:
	temp = f.read()
	products = temp.split("\n")[:-2]
	for p in products:
	if word == p:
	print(f"check_word, {word}")
	return True

	return False