Spaces:
Paused
Paused
Commit
•
394e2d6
1
Parent(s):
7c3dcbb
upadtes to category mapper
Browse files- audits/1719415577.csv +1 -0
- category_mapper.py +15 -8
audits/1719415577.csv
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
input_word,original_dictionary_word,new_dictionary_word
|
2 |
Oatmeal Cereal,"Cereal, oat bunches","Oatmeal, NFS"
|
|
|
|
1 |
input_word,original_dictionary_word,new_dictionary_word
|
2 |
Oatmeal Cereal,"Cereal, oat bunches","Oatmeal, NFS"
|
3 |
+
Jars,"Water, bottled, plain",Non-Food Item
|
category_mapper.py
CHANGED
@@ -14,7 +14,7 @@ from db.db_utils import get_connection
|
|
14 |
load_dotenv()
|
15 |
|
16 |
api_key = os.getenv("OPENAI_API_KEY")
|
17 |
-
use_openai =
|
18 |
|
19 |
if use_openai:
|
20 |
client = OpenAI(api_key=api_key)
|
@@ -30,17 +30,19 @@ file_path = './dictionary/final_corrected_wweia_food_category_complete - final_c
|
|
30 |
spreadsheet = pd.read_csv(file_path)
|
31 |
|
32 |
def find_best_category(food_item, category, dataframe):
|
33 |
-
|
34 |
-
filtered_df = dataframe[dataframe['closest_category'] == category]
|
35 |
-
if filtered_df.empty:
|
36 |
-
|
37 |
|
38 |
descriptions = filtered_df['wweia_food_category_description'].tolist()
|
39 |
|
40 |
prompt = (
|
41 |
-
f"Given the food item '{food_item}' and the
|
|
|
42 |
f"You should respond in json format with an object that has the key `guess`, and the value is the categoy."
|
43 |
)
|
|
|
44 |
if use_openai:
|
45 |
completion = client.chat.completions.create(
|
46 |
messages=[
|
@@ -89,8 +91,8 @@ for row in tqdm(rows, desc="Processing"):
|
|
89 |
print(f"Processing '{food_item}'")
|
90 |
|
91 |
# fix the category for Breakfast Cereals
|
92 |
-
if category == 'Breakfast Cereals':
|
93 |
-
|
94 |
# elif category == 'Fast Foods':
|
95 |
# # TODO
|
96 |
# elif category == 'American Indian/Alaska Native Foods':
|
@@ -108,6 +110,11 @@ for row in tqdm(rows, desc="Processing"):
|
|
108 |
print(f"A: '{best_category}'")
|
109 |
|
110 |
if best_category:
|
|
|
|
|
|
|
|
|
|
|
111 |
if use_openai:
|
112 |
db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
|
113 |
else:
|
|
|
14 |
load_dotenv()
|
15 |
|
16 |
api_key = os.getenv("OPENAI_API_KEY")
|
17 |
+
use_openai = True
|
18 |
|
19 |
if use_openai:
|
20 |
client = OpenAI(api_key=api_key)
|
|
|
30 |
spreadsheet = pd.read_csv(file_path)
|
31 |
|
32 |
def find_best_category(food_item, category, dataframe):
|
33 |
+
filtered_df = dataframe
|
34 |
+
# filtered_df = dataframe[dataframe['closest_category'] == category]
|
35 |
+
# if filtered_df.empty:
|
36 |
+
# filtered_df = dataframe
|
37 |
|
38 |
descriptions = filtered_df['wweia_food_category_description'].tolist()
|
39 |
|
40 |
prompt = (
|
41 |
+
f"Given the food item '{food_item}' and the classification of '{category}', choose the most appropriate category from the following options:\n{descriptions}\n\n"
|
42 |
+
f"Only respond with a category from the above. Do not come up with a new category. Do not respond with 'Legumes and Legume Products'.\n\n"
|
43 |
f"You should respond in json format with an object that has the key `guess`, and the value is the categoy."
|
44 |
)
|
45 |
+
|
46 |
if use_openai:
|
47 |
completion = client.chat.completions.create(
|
48 |
messages=[
|
|
|
91 |
print(f"Processing '{food_item}'")
|
92 |
|
93 |
# fix the category for Breakfast Cereals
|
94 |
+
# if category == 'Breakfast Cereals':
|
95 |
+
# category = 'Cereal Grains and Pasta'
|
96 |
# elif category == 'Fast Foods':
|
97 |
# # TODO
|
98 |
# elif category == 'American Indian/Alaska Native Foods':
|
|
|
110 |
print(f"A: '{best_category}'")
|
111 |
|
112 |
if best_category:
|
113 |
+
# ensure that the best_category is in the spreadsheet
|
114 |
+
if best_category not in spreadsheet['wweia_food_category_description'].values:
|
115 |
+
print(f"Error: '{best_category}' not found in the spreadsheet")
|
116 |
+
continue
|
117 |
+
|
118 |
if use_openai:
|
119 |
db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
|
120 |
else:
|