Spaces:

linpershey
/

sheetbot

Runtime error

App Files Files Community

linpershey commited on May 2, 2024

Commit

c1ead4a

1 Parent(s): 6023585

fix prompt mistakes

Browse files

Files changed (1) hide show

sheet.py +8 -8

sheet.py CHANGED Viewed

@@ -90,7 +90,7 @@ def test_get_condensed_result():
     res = get_serp(query)
     cond_res = get_condensed_result(res)
-def compose_analysis( client, query, search_results, model: str = 'gpt-3.5-turbo-0125'):
     """
     Argument
         query: str
@@ -103,9 +103,9 @@ def compose_analysis( client, query, search_results, model: str = 'gpt-3.5-turbo
         messages=[
             {
                 "role": "system",
-                "content": '''
                     As a helpful and rigorous retail analyst, given the provided query and a list of search results for the query,
-                    your task is to first identify relevant information of the identical store based on store name and proxmity of address if known. After that, extract `store_name`, `address`, `description`, `category` and `phone_number` from the found relevant information, where `category` can only be `小吃店`, `日式料理(含居酒屋，串燒)`, `火(鍋／爐)`, `東南亞料理(不含日韓)`, `海鮮熱炒`,  `特色餐廳(含雞、鵝、牛、羊肉)`, `傳統餐廳`, `燒烤`, `韓式料理(含火鍋，烤肉)` or `西餐廳(含美式，義式，墨式)`.
                     It's very important to omit unrelated results. Do not make up any assumption.
                     Please think step by step, and output in json format. An example output json is like {"store_name": "...", "address": "...", "description": "... products, service or highlights ...", "category": "...", "phone_number": "..."}
                     If no relevant information has been found, simply output json with empty values.
@@ -366,7 +366,7 @@ def crawl_results_mp( data: pd.DataFrame, crawl_file_path: str, n_processes: int
     print( f"total time: {time.time() - st}")
     return crawled_results
-def extract_results( data: pd.DataFrame ):
     """
     Argument
         data: `evidence`, `result`
@@ -384,7 +384,7 @@ def extract_results( data: pd.DataFrame ):
         address = d[6]
         query = compose_query( address, business_name)
         try:
-            ana_res = compose_analysis( client, query = query, search_results = evidence)
             ana_res = json.loads(ana_res)
         except Exception as e:
             print(f"# ANALYSIS error {e}: i = {i}, evidence = {evidence}")
@@ -405,7 +405,7 @@ def extract_results( data: pd.DataFrame ):
         "empty_indices": empty_indices
     }
-def extract_results_mp( crawled_results, extracted_file_path):
     """
     Argument
     Return
@@ -417,7 +417,7 @@ def extract_results_mp( crawled_results, extracted_file_path):
     if not os.path.exists(extracted_file_path):
         split_data = split_dataframe( crawled_results)
         with mp.Pool(args.n_processes) as pool:
-            extracted_results = pool.map( extract_results, split_data)
             extracted_results = merge_results( extracted_results, dataframe_columns=['extracted_results'], list_columns=['empty_indices'])
             with open( extracted_file_path, "wb") as f:
                 joblib.dump( extracted_results, f)
@@ -630,7 +630,7 @@ category2supercategory = {
         "西餐廳(含美式，義式，墨式)": "西式",
         "中式": "中式",
         "西式": "西式",
-        "西餐廳（餐酒館、酒吧、標吧、pub、lounge bar）": "西式",
         "西餐廳（土耳其、漢堡、薯條、法式、歐式、印度）": "西式",
         "早餐": ""
     }

     res = get_serp(query)
     cond_res = get_condensed_result(res)
+def compose_analysis( client, query, search_results, classes: list, model: str = 'gpt-3.5-turbo-0125'):
     """
     Argument
         query: str
         messages=[
             {
                 "role": "system",
+                "content": f'''
                     As a helpful and rigorous retail analyst, given the provided query and a list of search results for the query,
+                    your task is to first identify relevant information of the identical store based on store name and proxmity of address if known. After that, extract `store_name`, `address`, `description`, `category` and `phone_number` from the found relevant information, where `category` can only be {",".join("`"+x+"`" for x in classes)}.
                     It's very important to omit unrelated results. Do not make up any assumption.
                     Please think step by step, and output in json format. An example output json is like {"store_name": "...", "address": "...", "description": "... products, service or highlights ...", "category": "...", "phone_number": "..."}
                     If no relevant information has been found, simply output json with empty values.
     print( f"total time: {time.time() - st}")
     return crawled_results
+def extract_results( data: pd.DataFrame, classes: list ):
     """
     Argument
         data: `evidence`, `result`
         address = d[6]
         query = compose_query( address, business_name)
         try:
+            ana_res = compose_analysis( client, query = query, search_results = evidence, classes = classes)
             ana_res = json.loads(ana_res)
         except Exception as e:
             print(f"# ANALYSIS error {e}: i = {i}, evidence = {evidence}")
         "empty_indices": empty_indices
     }
+def extract_results_mp( crawled_results, extracted_file_path, classes: list):
     """
     Argument
     Return
     if not os.path.exists(extracted_file_path):
         split_data = split_dataframe( crawled_results)
         with mp.Pool(args.n_processes) as pool:
+            extracted_results = pool.starmap( extract_results, [ (x, classes) for x in split_data])
             extracted_results = merge_results( extracted_results, dataframe_columns=['extracted_results'], list_columns=['empty_indices'])
             with open( extracted_file_path, "wb") as f:
                 joblib.dump( extracted_results, f)
         "西餐廳(含美式，義式，墨式)": "西式",
         "中式": "中式",
         "西式": "西式",
+        "西餐廳（餐酒館、酒吧、飛鏢吧、pub、lounge bar）": "西式",
         "西餐廳（土耳其、漢堡、薯條、法式、歐式、印度）": "西式",
         "早餐": ""
     }