Spaces:
Runtime error
Runtime error
Commit
·
7cfd43a
1
Parent(s):
c1ead4a
fix wrong prompt format
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ load_dotenv()
|
|
17 |
logger = logging.getLogger(__name__)
|
18 |
logger.setLevel(logging.DEBUG)
|
19 |
|
|
|
20 |
|
21 |
def plot_wordcloud( text):
|
22 |
"""
|
@@ -71,7 +72,7 @@ def do( business_id, business_name, address):
|
|
71 |
|
72 |
crawled_results = pd.DataFrame(crawled_results)
|
73 |
# logger.debug(crawled_results)
|
74 |
-
extracted_results = extract_results( crawled_results)
|
75 |
# logger.error(extracted_results['extracted_results'].columns)
|
76 |
extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name'] ]
|
77 |
|
|
|
17 |
logger = logging.getLogger(__name__)
|
18 |
logger.setLevel(logging.DEBUG)
|
19 |
|
20 |
+
classes = list([ x for x in category2supercategory.keys() if len(x)>0])
|
21 |
|
22 |
def plot_wordcloud( text):
|
23 |
"""
|
|
|
72 |
|
73 |
crawled_results = pd.DataFrame(crawled_results)
|
74 |
# logger.debug(crawled_results)
|
75 |
+
extracted_results = extract_results( crawled_results, classes=classes)
|
76 |
# logger.error(extracted_results['extracted_results'].columns)
|
77 |
extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name'] ]
|
78 |
|
sheet.py
CHANGED
@@ -99,18 +99,22 @@ def compose_analysis( client, query, search_results, classes: list, model: str =
|
|
99 |
Return
|
100 |
response: str
|
101 |
"""
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
"role": "system",
|
106 |
-
"content": f'''
|
107 |
As a helpful and rigorous retail analyst, given the provided query and a list of search results for the query,
|
108 |
-
your task is to first identify relevant information of the identical store based on store name and proxmity of address if known. After that, extract `store_name`, `address`, `description`, `category` and `phone_number` from the found relevant information, where `category` can only be
|
109 |
It's very important to omit unrelated results. Do not make up any assumption.
|
110 |
Please think step by step, and output in json format. An example output json is like {"store_name": "...", "address": "...", "description": "... products, service or highlights ...", "category": "...", "phone_number": "..."}
|
111 |
If no relevant information has been found, simply output json with empty values.
|
112 |
I'll tip you and guarantee a place in heaven you do a great job completely according to my instruction.
|
113 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
},
|
115 |
{
|
116 |
"role": "user",
|
@@ -150,7 +154,7 @@ def test_compose_analysis():
|
|
150 |
def compose_classication(
|
151 |
client,
|
152 |
evidence,
|
153 |
-
classes: list = ['小吃店', '日式料理(含居酒屋,串燒)', '火(鍋/爐)', '東南亞料理(不含日韓)', '海鮮熱炒', '特色餐廳(含雞、鵝、牛、羊肉)', '傳統餐廳', '燒烤', '韓式料理(含火鍋,烤肉)', '西餐廳(含美式,義式,墨式)'],
|
154 |
backup_classes: list = [ '中式', '西式'],
|
155 |
model: str = 'gpt-3.5-turbo-0125'
|
156 |
) -> str:
|
@@ -382,12 +386,13 @@ def extract_results( data: pd.DataFrame, classes: list ):
|
|
382 |
business_id = d[2]
|
383 |
business_name = d[3]
|
384 |
address = d[6]
|
|
|
385 |
query = compose_query( address, business_name)
|
386 |
try:
|
387 |
ana_res = compose_analysis( client, query = query, search_results = evidence, classes = classes)
|
388 |
ana_res = json.loads(ana_res)
|
389 |
except Exception as e:
|
390 |
-
print(f"# ANALYSIS error {e}: i = {i},
|
391 |
empty_indices.append(i)
|
392 |
continue
|
393 |
|
@@ -630,8 +635,8 @@ category2supercategory = {
|
|
630 |
"西餐廳(含美式,義式,墨式)": "西式",
|
631 |
"中式": "中式",
|
632 |
"西式": "西式",
|
633 |
-
"
|
634 |
-
"
|
635 |
"早餐": ""
|
636 |
}
|
637 |
|
@@ -647,7 +652,7 @@ supercategory2category = {
|
|
647 |
"燒烤",
|
648 |
"韓式料理(含火鍋,烤肉)"
|
649 |
],
|
650 |
-
"西式": ["西餐廳(含美式,義式,墨式)", "
|
651 |
"": ["早餐"]
|
652 |
}
|
653 |
|
@@ -671,7 +676,7 @@ if __name__=='__main__':
|
|
671 |
parser.add_argument("--combined_file_path", type=str, default="data/gpt3.5/combined_results.joblib")
|
672 |
parser.add_argument("--postprocessed_results", type=str, default="data/gpt3.5/postprocessed_results.joblib")
|
673 |
parser.add_argument("--formatted_results", type=str, default="data/gpt3.5/formatted_results.csv")
|
674 |
-
parser.add_argument("--classes", type=list, default=['小吃店', '日式料理(含居酒屋,串燒)', '火(鍋/爐)', '東南亞料理(不含日韓)', '海鮮熱炒', '特色餐廳(含雞、鵝、牛、羊肉)', '傳統餐廳', '燒烤', '韓式料理(含火鍋,烤肉)', '西餐廳(含美式,義式,墨式)'])
|
675 |
parser.add_argument("--backup_classes", type=list, default=['中式', '西式'])
|
676 |
parser.add_argument("--strategy", type=str, default='replace', choices=['replace', 'patch'])
|
677 |
parser.add_argument("--n_processes", type=int, default=4)
|
|
|
99 |
Return
|
100 |
response: str
|
101 |
"""
|
102 |
+
categories = ", ".join([ "`"+x+"`" for x in classes if x!='早餐' ])+ " or " + "`早餐`"
|
103 |
+
# print(f"categoreis: {categories}")
|
104 |
+
system_prompt = '''
|
|
|
|
|
105 |
As a helpful and rigorous retail analyst, given the provided query and a list of search results for the query,
|
106 |
+
your task is to first identify relevant information of the identical store based on store name and proxmity of address if known. After that, extract `store_name`, `address`, `description`, `category` and `phone_number` from the found relevant information, where `category` can only be `小吃店`, `日式料理(含居酒屋,串燒)`, `火(鍋/爐)`, `東南亞料理(不含日韓)`, `海鮮熱炒`, `特色餐廳(含雞、鵝、牛、羊肉)`, `傳統餐廳`, `燒烤`, `韓式料理(含火鍋,烤肉)`, `西餐廳(含美式,義式,墨式)`, `西餐廳(餐酒館、酒吧、飛鏢吧、pub、lounge bar)`, `西餐廳(土耳其、漢堡、薯條、法式、歐式、印度)` or `早餐`.
|
107 |
It's very important to omit unrelated results. Do not make up any assumption.
|
108 |
Please think step by step, and output in json format. An example output json is like {"store_name": "...", "address": "...", "description": "... products, service or highlights ...", "category": "...", "phone_number": "..."}
|
109 |
If no relevant information has been found, simply output json with empty values.
|
110 |
I'll tip you and guarantee a place in heaven you do a great job completely according to my instruction.
|
111 |
'''
|
112 |
+
# print(f"system prompt = {system_prompt}")
|
113 |
+
chat_completion = client.chat.completions.create(
|
114 |
+
messages=[
|
115 |
+
{
|
116 |
+
"role": "system",
|
117 |
+
"content": system_prompt
|
118 |
},
|
119 |
{
|
120 |
"role": "user",
|
|
|
154 |
def compose_classication(
|
155 |
client,
|
156 |
evidence,
|
157 |
+
classes: list = ['小吃店', '日式料理(含居酒屋,串燒)', '火(鍋/爐)', '東南亞料理(不含日韓)', '海鮮熱炒', '特色餐廳(含雞、鵝、牛、羊肉)', '傳統餐廳', '燒烤', '韓式料理(含火鍋,烤肉)', '西餐廳(含美式,義式,墨式)', ],
|
158 |
backup_classes: list = [ '中式', '西式'],
|
159 |
model: str = 'gpt-3.5-turbo-0125'
|
160 |
) -> str:
|
|
|
386 |
business_id = d[2]
|
387 |
business_name = d[3]
|
388 |
address = d[6]
|
389 |
+
ana_res = None
|
390 |
query = compose_query( address, business_name)
|
391 |
try:
|
392 |
ana_res = compose_analysis( client, query = query, search_results = evidence, classes = classes)
|
393 |
ana_res = json.loads(ana_res)
|
394 |
except Exception as e:
|
395 |
+
print(f"# ANALYSIS error {e}: i = {i}, ana_res = {ana_res}")
|
396 |
empty_indices.append(i)
|
397 |
continue
|
398 |
|
|
|
635 |
"西餐廳(含美式,義式,墨式)": "西式",
|
636 |
"中式": "中式",
|
637 |
"西式": "西式",
|
638 |
+
"西餐廳(餐酒館、酒吧、飛鏢吧、pub、lounge bar)": "西式",
|
639 |
+
"西餐廳(土耳其、漢堡、薯條、法式、歐式、印度)": "西式",
|
640 |
"早餐": ""
|
641 |
}
|
642 |
|
|
|
652 |
"燒烤",
|
653 |
"韓式料理(含火鍋,烤肉)"
|
654 |
],
|
655 |
+
"西式": ["西餐廳(含美式,義式,墨式)", "西餐廳(餐酒館、酒吧、飛鏢吧、pub、lounge bar)", "西餐廳(土耳其、漢堡、薯條、法式、歐式、印度)"],
|
656 |
"": ["早餐"]
|
657 |
}
|
658 |
|
|
|
676 |
parser.add_argument("--combined_file_path", type=str, default="data/gpt3.5/combined_results.joblib")
|
677 |
parser.add_argument("--postprocessed_results", type=str, default="data/gpt3.5/postprocessed_results.joblib")
|
678 |
parser.add_argument("--formatted_results", type=str, default="data/gpt3.5/formatted_results.csv")
|
679 |
+
parser.add_argument("--classes", type=list, default=['小吃店', '日式料理(含居酒屋,串燒)', '火(鍋/爐)', '東南亞料理(不含日韓)', '海鮮熱炒', '特色餐廳(含雞、鵝、牛、羊肉)', '傳統餐廳', '燒烤', '韓式料理(含火鍋,烤肉)', '西餐廳(含美式,義式,墨式)', '西餐廳(餐酒館、酒吧、飛鏢吧、pub、lounge bar)', '西餐廳(土耳其、漢堡、薯條、法式、歐式、印度)', '早餐'])
|
680 |
parser.add_argument("--backup_classes", type=list, default=['中式', '西式'])
|
681 |
parser.add_argument("--strategy", type=str, default='replace', choices=['replace', 'patch'])
|
682 |
parser.add_argument("--n_processes", type=int, default=4)
|