beweinreich commited on
Commit
394e2d6
1 Parent(s): 7c3dcbb

upadtes to category mapper

Browse files
Files changed (2) hide show
  1. audits/1719415577.csv +1 -0
  2. category_mapper.py +15 -8
audits/1719415577.csv CHANGED
@@ -1,2 +1,3 @@
1
  input_word,original_dictionary_word,new_dictionary_word
2
  Oatmeal Cereal,"Cereal, oat bunches","Oatmeal, NFS"
 
 
1
  input_word,original_dictionary_word,new_dictionary_word
2
  Oatmeal Cereal,"Cereal, oat bunches","Oatmeal, NFS"
3
+ Jars,"Water, bottled, plain",Non-Food Item
category_mapper.py CHANGED
@@ -14,7 +14,7 @@ from db.db_utils import get_connection
14
  load_dotenv()
15
 
16
  api_key = os.getenv("OPENAI_API_KEY")
17
- use_openai = False
18
 
19
  if use_openai:
20
  client = OpenAI(api_key=api_key)
@@ -30,17 +30,19 @@ file_path = './dictionary/final_corrected_wweia_food_category_complete - final_c
30
  spreadsheet = pd.read_csv(file_path)
31
 
32
  def find_best_category(food_item, category, dataframe):
33
- # filtered_df = dataframe
34
- filtered_df = dataframe[dataframe['closest_category'] == category]
35
- if filtered_df.empty:
36
- filtered_df = dataframe
37
 
38
  descriptions = filtered_df['wweia_food_category_description'].tolist()
39
 
40
  prompt = (
41
- f"Given the food item '{food_item}' and the category '{category}', choose the most appropriate category from the following options:\n{descriptions}\n\n"
 
42
  f"You should respond in json format with an object that has the key `guess`, and the value is the categoy."
43
  )
 
44
  if use_openai:
45
  completion = client.chat.completions.create(
46
  messages=[
@@ -89,8 +91,8 @@ for row in tqdm(rows, desc="Processing"):
89
  print(f"Processing '{food_item}'")
90
 
91
  # fix the category for Breakfast Cereals
92
- if category == 'Breakfast Cereals':
93
- category = 'Cereal Grains and Pasta'
94
  # elif category == 'Fast Foods':
95
  # # TODO
96
  # elif category == 'American Indian/Alaska Native Foods':
@@ -108,6 +110,11 @@ for row in tqdm(rows, desc="Processing"):
108
  print(f"A: '{best_category}'")
109
 
110
  if best_category:
 
 
 
 
 
111
  if use_openai:
112
  db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
113
  else:
 
14
  load_dotenv()
15
 
16
  api_key = os.getenv("OPENAI_API_KEY")
17
+ use_openai = True
18
 
19
  if use_openai:
20
  client = OpenAI(api_key=api_key)
 
30
  spreadsheet = pd.read_csv(file_path)
31
 
32
  def find_best_category(food_item, category, dataframe):
33
+ filtered_df = dataframe
34
+ # filtered_df = dataframe[dataframe['closest_category'] == category]
35
+ # if filtered_df.empty:
36
+ # filtered_df = dataframe
37
 
38
  descriptions = filtered_df['wweia_food_category_description'].tolist()
39
 
40
  prompt = (
41
+ f"Given the food item '{food_item}' and the classification of '{category}', choose the most appropriate category from the following options:\n{descriptions}\n\n"
42
+ f"Only respond with a category from the above. Do not come up with a new category. Do not respond with 'Legumes and Legume Products'.\n\n"
43
  f"You should respond in json format with an object that has the key `guess`, and the value is the categoy."
44
  )
45
+
46
  if use_openai:
47
  completion = client.chat.completions.create(
48
  messages=[
 
91
  print(f"Processing '{food_item}'")
92
 
93
  # fix the category for Breakfast Cereals
94
+ # if category == 'Breakfast Cereals':
95
+ # category = 'Cereal Grains and Pasta'
96
  # elif category == 'Fast Foods':
97
  # # TODO
98
  # elif category == 'American Indian/Alaska Native Foods':
 
110
  print(f"A: '{best_category}'")
111
 
112
  if best_category:
113
+ # ensure that the best_category is in the spreadsheet
114
+ if best_category not in spreadsheet['wweia_food_category_description'].values:
115
+ print(f"Error: '{best_category}' not found in the spreadsheet")
116
+ continue
117
+
118
  if use_openai:
119
  db_cursor.execute('UPDATE dictionary SET wweia_category = %s WHERE fdc_id = %s', (best_category, fdc_id))
120
  else: